From d6a033aa5bd1c6cfdb1772e8ff2d1b3098daaea6 Mon Sep 17 00:00:00 2001 From: "matthias@arch" Date: Thu, 18 May 2023 03:18:49 +0200 Subject: [PATCH] Added region table to db --- database.uxf | 217 ++++++++++++++++++++++++--------------- regina/database.py | 81 +++++++++------ regina/sql/create_db.sql | 10 +- 3 files changed, 190 insertions(+), 118 deletions(-) diff --git a/database.uxf b/database.uxf index 215a626..43681aa 100644 --- a/database.uxf +++ b/database.uxf @@ -1,13 +1,13 @@ - 9 + 12 UMLClass - 441 - 306 - 180 - 162 + 396 + 240 + 240 + 216 visitor -- @@ -27,10 +27,10 @@ bg=MAGENTA Relation - 558 - 216 - 36 - 108 + 552 + 120 + 48 + 144 lt=- m1=n @@ -41,10 +41,10 @@ m2=1 UMLClass - 702 - 306 - 171 - 153 + 744 + 240 + 228 + 204 request -- @@ -64,10 +64,10 @@ bg=CYAN Relation - 612 - 333 - 108 - 45 + 624 + 276 + 144 + 60 lt=- m1=1 @@ -78,10 +78,10 @@ m2=n UMLClass - 333 - 135 - 180 - 90 + 252 + 12 + 240 + 120 platform -- @@ -97,38 +97,19 @@ bg=MAGENTA UMLClass - 702 - 549 - 171 - 117 + 744 + 564 + 228 + 156 city -- <<PK>> - city_id: INTEGER -- -- country_id: INTEGER - name: TEXT -- region: TEXT -style=autoresize -bg=ORANGE - - - - UMLClass - - 945 - 549 - 171 - 99 - - country --- -<<PK>> - country_id: INTEGER --- -- name: TEXT UNIQUE -- code: TEXT UNIQUE +- region_id: INTEGER style=autoresize bg=ORANGE @@ -136,10 +117,10 @@ bg=ORANGE Relation - 864 - 567 - 99 - 45 + 960 + 588 + 132 + 60 lt=- m1=1 @@ -150,10 +131,10 @@ m2=n Relation - 612 - 567 - 108 - 45 + 624 + 588 + 144 + 60 lt=- m1=1 @@ -164,10 +145,10 @@ m2=n UMLClass - 441 - 549 - 180 - 117 + 396 + 564 + 240 + 156 ip_range -- @@ -184,10 +165,10 @@ bg=ORANGE Relation - 522 - 459 - 36 - 108 + 504 + 444 + 48 + 144 lt=- m1=1 @@ -198,10 +179,10 @@ m2=n UMLClass - 945 - 306 - 162 - 90 + 1068 + 240 + 216 + 120 route -- @@ -217,10 +198,10 @@ bg=CYAN UMLClass - 549 - 135 - 171 - 90 + 540 + 12 + 228 + 120 browser -- @@ -236,10 +217,10 @@ bg=MAGENTA Relation - 486 - 216 - 36 - 108 + 456 + 120 + 48 + 144 lt=- m1=n @@ -250,10 +231,10 @@ m2=1 UMLClass - 756 - 135 - 171 - 90 + 816 + 12 + 228 + 120 referer -- @@ -269,10 +250,10 @@ bg=CYAN Relation - 783 - 216 - 36 - 108 + 852 + 120 + 48 + 144 lt=- m1=n @@ -283,10 +264,10 @@ m2=1 Relation - 864 - 333 - 99 - 45 + 960 + 276 + 132 + 60 lt=- m1=n @@ -294,4 +275,70 @@ m2=1 10.0;20.0;90.0;20.0 + + UMLClass + + 1068 + 564 + 228 + 132 + + country +-- +<<PK>> +- country_id: INTEGER +-- +- name: TEXT UNIQUE +- code: TEXT UNIQUE +style=autoresize +bg=ORANGE + + + + Relation + + 936 + 708 + 48 + 120 + + lt=- +m1=1 +m2=n + + 10.0;80.0;10.0;10.0 + + + Relation + + 1080 + 684 + 48 + 144 + + lt=- +m1=1 +m2=n + + 10.0;10.0;10.0;100.0 + + + UMLClass + + 912 + 804 + 228 + 132 + + region +-- +<<PK>> +- region_id: INTEGER +-- +- name: TEXT +- country_id: INTEGER +style=autoresize +bg=ORANGE + + diff --git a/regina/database.py b/regina/database.py index 0d4ac00..a2112fc 100644 --- a/regina/database.py +++ b/regina/database.py @@ -283,29 +283,42 @@ class Database: assert(type(country_id_val) == int) return country_id_val - def get_city_id(self, name, region, country_id) -> int: + def get_region_id(self, name: str, country_id: int) -> int: + """ + get the id of region of name + if not present, insert and return id + """ name = sanitize(name) - region = sanitize(region) - if not sql_exists(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)], do_sanitize=False): - self.cur.execute(f"INSERT INTO city (name, region, country_id) VALUES ('{name}', '{region}', '{country_id}')") - cities = sql_select(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)], do_sanitize=False) + if not sql_exists(self.cur, "region", [("name", name), ("country_id", country_id)], do_sanitize=False): + self.cur.execute(f"INSERT INTO region (name, country_id) VALUES ('{name}', '{country_id}')") + regions = self(f"SELECT region_id FROM region WHERE name = '{name}' AND country_id = '{country_id}'") + if len(regions) > 0: + region_id_val = regions[0][0] + else: + warning(f"get_region_id: Could not get region_id for name='{name}'.") + return 0 + assert(type(region_id_val) == int) + return region_id_val + + def get_city_id(self, name, region_id: int, country_id:int) -> int: + name = sanitize(name) + if not sql_exists(self.cur, "city", [("name", name), ("region_id", region_id), ("country_id", country_id)], do_sanitize=False): + self.cur.execute(f"INSERT INTO city (name, region_id, country_id) VALUES ('{name}', '{region_id}', '{country_id}')") + cities = sql_select(self.cur, "city", [("name", name), ("region_id", region_id), ("country_id", country_id)], do_sanitize=False) if len(cities) > 0: city_id_val = cities[0][0] else: - warning(f"get_city_id: Could not get city_id for name='{name}', region='{region}' and country_id='{country_id}'.") + warning(f"get_city_id: Could not get city_id for name='{name}', region_id='{region_id}' and country_id='{country_id}'.") return 0 assert(type(city_id_val) == int) return city_id_val - def update_geoip_tables(self, geoip_city_csv_path: str): """ update the geoip data with the contents of the geoip_city_csv file Make sure to update the visitor.ip_range_id column for all visitors. In case something changed, they might point to a different city. - - TODO: update teh visitor.ip_range_id column to match (potentially) new city ip range """ # indices for the csv FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5 @@ -333,24 +346,26 @@ class Database: self.cur.execute(f"DELETE FROM ip_range") self.cur.execute(f"DELETE FROM city") self.cur.execute(f"DELETE FROM country") + self.cur.execute(f"DELETE FROM region") self.conn.commit() self.cur.execute(f"VACUUM") # guarantees that unkown city/country will have id 0 self.cur.execute(f"INSERT INTO country (country_id, name, code) VALUES (0, 'Unknown', 'XX') ") - self.cur.execute(f"INSERT INTO city (city_id, name, region) VALUES (0, 'Unknown', 'Unkown') ") + self.cur.execute(f"INSERT INTO region (region_id, name, country_id) VALUES (0, 'Unknown', 0) ") + self.cur.execute(f"INSERT INTO city (city_id, name, region_id, country_id) VALUES (0, 'Unknown', 0, 0) ") # for combining city ranges into a 'City in ' range # country_id for the range that was last added (for combining multiple csv rows in one ip_range) RANGE_DONE = -1 - combine_range_country_id = RANGE_DONE - combine_range_country_name = "" + combine_range_city_id = RANGE_DONE combine_range_low = RANGE_DONE combine_range_high = RANGE_DONE - def add_range(low, high, city_name, region, country_id): - city_id = self.get_city_id(city_name, region, country_id) - pdebug(f"update_ip_range_id: Adding range for city={city_name:20}, country_id={country_id:3}, low={low:16}, high={high:16}", lvl=2) + get_all = "all" in settings["data-collection"]["get_cities_for_countries"] + + def add_range(low, high, city_id): + pdebug(f"update_ip_range_id: Adding range for city={city_id:20}, low={low:16}, high={high:16}", lvl=3) self.cur.execute(f"INSERT INTO ip_range (low, high, city_id) VALUES ({low}, {high}, {city_id})") for i, row in enumerate(csv, 1): # if i % 100 == 0: @@ -365,25 +380,27 @@ class Database: # make sure country exists country_id = self.get_country_id(row[COUNTRY], row[CODE]) # only add cities for countries the user is interested in - if row[CODE] in settings["data-collection"]["get_cities_for_countries"]: - add_range(row[FROM], row[TO], row[CITY], row[REGION], country_id) + if get_all or row[CODE] in settings["data-collection"]["get_cities_for_countries"]: + region_id = self.get_region_id(row[REGION], country_id) + city_id = self.get_city_id(row[CITY], region_id, country_id) else: - # if continuing - if combine_range_country_id != RANGE_DONE: - # if continuing previous range, extend the upper range limit - if combine_range_country_id == country_id: - combine_range_high = row[TO] - else: # new range for country, append - add_range(combine_range_low, combine_range_high, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id) - combine_range_country_id = RANGE_DONE - # not elif, this has to be executed if previous else was executed - if combine_range_country_id == RANGE_DONE : # currently in new range, combine with later ranges - combine_range_country_id = country_id - combine_range_country_name = row[COUNTRY] - combine_range_low = row[FROM] + region_id = self.get_region_id(f"Region in {row[COUNTRY]}", country_id) + city_id = self.get_city_id(f"City in {row[COUNTRY]}", region_id, country_id) + # if continuing + if combine_range_city_id != RANGE_DONE: + # if continuing previous range, extend the upper range limit + if combine_range_city_id == city_id: combine_range_high = row[TO] - if combine_range_country_id >= 0: # last range , append - add_range(combine_range_low, combine_range_high, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id) + else: # new range for city, append + add_range(combine_range_low, combine_range_high, combine_range_city_id) + combine_range_city_id = RANGE_DONE + # not elif, this has to be executed if previous else was executed + if combine_range_city_id == RANGE_DONE : # currently in new range, combine with later ranges + combine_range_city_id = city_id + combine_range_low = row[FROM] + combine_range_high = row[TO] + if combine_range_city_id != RANGE_DONE: # last range , append + add_range(combine_range_low, combine_range_high, combine_range_city_id) # diff --git a/regina/sql/create_db.sql b/regina/sql/create_db.sql index 973c609..1341838 100644 --- a/regina/sql/create_db.sql +++ b/regina/sql/create_db.sql @@ -59,7 +59,15 @@ CREATE TABLE IF NOT EXISTS ip_range( CREATE TABLE IF NOT EXISTS city( city_id INTEGER PRIMARY KEY, name TEXT, - region TEXT, + region_id INTEGER, + country_id INTEGER, + FOREIGN KEY(region_id) REFERENCES region(region_id), + FOREIGN KEY(country_id) REFERENCES country(country_id) +) STRICT; + +CREATE TABLE IF NOT EXISTS region( + region_id INTEGER PRIMARY KEY, + name TEXT, country_id INTEGER, FOREIGN KEY(country_id) REFERENCES country(country_id) ) STRICT;