diff --git a/database.svg b/database.svg
new file mode 100644
index 0000000..714d817
--- /dev/null
+++ b/database.svg
@@ -0,0 +1,280 @@
+
+
+
diff --git a/database.uxf b/database.uxf
index e4e262c..215a626 100644
--- a/database.uxf
+++ b/database.uxf
@@ -1,13 +1,13 @@
- 8
+ 9
UMLClass
- 96
- 248
- 160
- 144
+ 441
+ 306
+ 180
+ 162
visitor
--
@@ -17,19 +17,20 @@
- ip_address: INTEGER
- platform_id: INTEGER
- browser_id: INTEGER
-- mobile: INTEGER
+- is_mobile: INTEGER
- is_human: INTEGER
-- range_id: INTEGER
-style=autoresize
+- ip_range_id: INTEGER
+style=autoresize
+bg=MAGENTA
Relation
- 216
- 168
- 32
- 96
+ 558
+ 216
+ 36
+ 108
lt=-
m1=n
@@ -40,10 +41,10 @@ m2=1
UMLClass
- 352
- 240
- 152
- 136
+ 702
+ 306
+ 171
+ 153
request
--
@@ -56,30 +57,31 @@ m2=1
--
- time: INTEGER
- status: INTEGER
-style=autoresize
+style=autoresize
+bg=CYAN
Relation
- 248
- 248
- 120
- 40
+ 612
+ 333
+ 108
+ 45
lt=-
m1=1
m2=n
- 10.0;20.0;130.0;20.0
+ 10.0;20.0;100.0;20.0
UMLClass
- 16
- 96
- 160
- 80
+ 333
+ 135
+ 180
+ 90
platform
--
@@ -88,16 +90,17 @@ m2=n
--
- name: TEXT UNIQUE
--
-style=autoresize
+style=autoresize
+bg=MAGENTA
UMLClass
- 328
- 488
- 152
- 104
+ 702
+ 549
+ 171
+ 117
city
--
@@ -107,95 +110,98 @@ style=autoresize
- country_id: INTEGER
- name: TEXT
- region: TEXT
-style=autoresize
+style=autoresize
+bg=ORANGE
UMLClass
- 536
- 488
- 152
- 88
+ 945
+ 549
+ 171
+ 99
country
--
<<PK>>
- country_id: INTEGER
--
-- name: TEXT
-- code: TEXT
-style=autoresize
+- name: TEXT UNIQUE
+- code: TEXT UNIQUE
+style=autoresize
+bg=ORANGE
Relation
- 472
- 504
- 80
- 40
+ 864
+ 567
+ 99
+ 45
lt=-
m1=1
m2=n
- 80.0;20.0;10.0;20.0
+ 90.0;20.0;10.0;20.0
Relation
- 264
- 504
- 80
- 40
+ 612
+ 567
+ 108
+ 45
lt=-
m1=1
m2=n
- 80.0;20.0;10.0;20.0
+ 100.0;20.0;10.0;20.0
UMLClass
- 136
- 488
- 136
- 104
+ 441
+ 549
+ 180
+ 117
ip_range
--
<<PK>>
-- range_id
+- ip_range_id
--
-- from: INTEGER
-- to: INTEGER
+- low: INTEGER UNIQUE
+- high: INTEGER UNIQUE
- city_id: INTEGER
-style=autoresize
+style=autoresize
+bg=ORANGE
Relation
- 176
- 384
- 32
- 120
+ 522
+ 459
+ 36
+ 108
lt=-
m1=1
m2=n
- 10.0;130.0;10.0;10.0
+ 10.0;100.0;10.0;10.0
UMLClass
- 576
- 264
- 144
- 80
+ 945
+ 306
+ 162
+ 90
route
--
@@ -204,16 +210,17 @@ m2=n
--
- name: TEXT UNIQUE
--
-style=autoresize
+style=autoresize
+bg=CYAN
UMLClass
- 208
- 96
- 152
- 80
+ 549
+ 135
+ 171
+ 90
browser
--
@@ -222,16 +229,17 @@ style=autoresize
--
- name: TEXT UNIQUE
--
-style=autoresize
+style=autoresize
+bg=MAGENTA
Relation
- 144
- 168
- 32
- 96
+ 486
+ 216
+ 36
+ 108
lt=-
m1=n
@@ -242,10 +250,10 @@ m2=1
UMLClass
- 392
- 96
- 152
- 80
+ 756
+ 135
+ 171
+ 90
referer
--
@@ -254,35 +262,36 @@ m2=1
--
- name: TEXT UNIQUE
--
-style=autoresize
+style=autoresize
+bg=CYAN
Relation
- 400
- 168
- 32
- 88
+ 783
+ 216
+ 36
+ 108
lt=-
m1=n
m2=1
- 10.0;90.0;10.0;10.0
+ 10.0;100.0;10.0;10.0
Relation
- 496
- 288
- 96
- 40
+ 864
+ 333
+ 99
+ 45
lt=-
m1=n
m2=1
- 10.0;20.0;100.0;20.0
+ 10.0;20.0;90.0;20.0
diff --git a/regina/database.py b/regina/database.py
index f62a1a7..0d4ac00 100644
--- a/regina/database.py
+++ b/regina/database.py
@@ -15,11 +15,11 @@ if __name__ == "__main__": # make relative imports work as described here: http
sys.path.insert(0, path.dirname(path.dirname(filepath)))
# local
-from regina.utility.sql_util import replace_null, sanitize, sql_select, sql_exists
+from regina.utility.sql_util import replace_null, sanitize, sql_select, sql_exists, sql_tablesize
from regina.utility.utility import pdebug, get_filepath, warning, pmessage, is_blacklisted, is_whitelisted
from regina.utility.globals import settings
from regina.data_collection.request import Request
-from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
+from regina.utility.globals import user_agent_platforms, user_agent_browsers, settings
"""
create reginas database as shown in the uml diagram database.uxf
@@ -32,14 +32,22 @@ class Database:
# verify that the database is created
self.cur.execute("pragma schema_version")
if self.cur.fetchone()[0] == 0: # not created
- pdebug(f"Database.__init__: Creating database at {database_path}")
+ pdebug(f"Database.__init__: Creating new databse at {database_path}", lvl=1)
with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file:
create_db = file.read()
self.cur.executescript(create_db)
self.conn.commit()
+ else:
+ pdebug(f"Database.__init__: Opening existing database at {database_path}", lvl=1)
+
+ def __del__(self):
+ self.cur.close()
+ self.conn.commit()
+ self.conn.close()
def __call__(self, s):
"""execute a command and return fetchall()"""
+ pdebug(f"Database: execute: \"{s}\"", lvl=4)
self.cur.execute(s)
return self.cur.fetchall()
def execute(self, s):
@@ -51,133 +59,147 @@ class Database:
# VISITOR
#
def is_visitor_human(self, visitor_id: int):
+ self.execute(f"SELECT is_human FROM visitor WHERE visitor_id = {visitor_id}")
+ if self.fetchone()[0] == 1:
+ return True
+ return False
+
+ def update_is_visitor_human(self, visitor_id: int):
"""
check if they have a known platform AND browser
if settings "human_needs_success": check if at least one request did not result in an error (http status >= 400)
+
+ updates the visitor.is_human column
+ @returns True if human, else False
"""
- max_success_status = 400
- if settings["status_300_is_success"]: max_success_status = 300
+ def set_not_human(debug_str=""):
+ pdebug(f"update_is_visitor_human: visitor_id={visitor_id:5} is not human: Failed check: {debug_str}", lvl=3)
+ self.cur.execute(f"UPDATE visitor SET is_human = 0 WHERE visitor_id = {visitor_id}")
+ return False
+
self.cur.execute(f"SELECT browser_id, platform_id FROM visitor WHERE visitor_id = {visitor_id}")
- browsers_and_platforms = self.cur.fetchall()
- if len(browsers_and_platforms) != 1:
- pdebug(f"is_visitor_human: {visitor_id} - could not find visitor or found too many")
- return False
- browser = self.get_name("browser", browsers_and_platforms[0][0])
- if not browser in visitor_agent_browsers:
- return False
- platform = self.get_name("platform", browsers_and_platforms[0][1])
- if not platform in visitor_agent_operating_systems:
- return False
- if settings["human_needs_success"]:
+ browser_id, platform_id = self.cur.fetchall()[0]
+ browser = self.get_name("browser", browser_id)
+ if not browser in user_agent_browsers:
+ return set_not_human("browser")
+
+ platform = self.get_name("platform", platform_id)
+ if not platform in user_agent_platforms:
+ return set_not_human("platform")
+
+ max_success_status = 300
+ if settings["data-collection"]["status_300_is_success"]: max_success_status = 400
+
+ if settings["data-collection"]["human_needs_successful_request"]:
# check if at least request was successful (status < 400)
self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM request WHERE visitor_id = {visitor_id} AND status < {max_success_status})")
- if self.cur.fetchone()[0] == 1:
- # pdebug(f"is_visitor_human: Visitor {visitor_id} is human")
- pass
- else:
- # pdebug(f"is_visitor_human: Visitor {visitor_id} only had unsuccessful requests")
- return False
+ if self.cur.fetchone()[0] == 0:
+ return set_not_human("successful request")
+ # if here, is human
+ self.cur.execute(f"UPDATE visitor SET is_human = 1 WHERE visitor_id = {visitor_id}")
return True
- def get_visitor_id(self, request: Request, insert=True) -> int | None:
- """
- get the visitor_id. Adds the visitor if not already existing
- """
+ def get_visitor_id(self, request: Request, insert=True) -> tuple[int | None, bool]:
"""
get the visitor_id:
- If settings unique_visitor_is_ip_address: Check if visitor with ip address exists
- Else: check if visitor with ip_address, browser and platform exists
+ if settings unique_visitor_is_ip_address: Check if visitor with ip address exists
+ else: check if visitor with ip_address, browser and platform exists
- If visitor does not exist and insert: insert, return id
- Else: return None
+ @return visitor_id, is_new_visitor
+ if visitor does not exist:
+ if insert: return visitor_id, True
+ else: return None, False
+ else: return visitor_id, False
"""
- if settings["hash_ip_address"]:
- ip_address = hash(request.ip_address)
- else:
- ip_address = request.ip_address
+ ip_address = request.ip_address
# if insert == True, ids will be int
browser_id: int | None = self.get_id("browser", request.get_browser(), insert=insert)
platform_id: int | None = self.get_id("platform", request.get_platform(), insert=insert)
constraints = [("ip_address", ip_address)]
- if not settings["unique_visitor_is_ip_address"]:
+ if not settings["data-collection"]["unique_visitor_is_ip_address"]:
if browser_id: constraints.append(("browser_id", browser_id))
if platform_id: constraints.append(("platform_id", platform_id))
- require_update_is_human = False
+ is_new_visitor = False
if not sql_exists(self.cur, "visitor", constraints):
- require_update_is_human = True
+ is_new_visitor = True
if not insert:
- return None
+ return None, False
is_mobile = int(request.get_mobile())
ip_range_id = 0
- if settings["get_visitor_location"]:
+ if settings["data-collection"]["get_visitor_location"]:
ip_range_id = self.get_ip_range_id(request.ip_address)
- is_human = 0 # is_visitor_human cannot be called until visitor is in db
- self.cur.execute(f"INSERT INTO visitor (ip_address, ip_range_id, platform_id, browser_id, is_mobile, is_human, ip_range_id) VALUES ('{ip_address}', '{ip_range_id}', '{platform_id}', '{browser_id}', '{is_mobile}', '{is_human}');")
+ is_human = 0 # update_is_visitor_human cannot be called until visitor is in db
+ self.cur.execute(f"INSERT INTO visitor (ip_address, ip_range_id, platform_id, browser_id, is_mobile, is_human) VALUES ('{ip_address}', '{ip_range_id}', '{platform_id}', '{browser_id}', '{is_mobile}', '{is_human}');")
visitor_id = sql_select(self.cur, "visitor", constraints)[0][0]
- # TODO: if requests are not added yet, visitor might not be recognized since it does not have a successful requets yet
- if require_update_is_human:
- is_human = self.is_visitor_human(visitor_id)
- if is_human:
- self.cur.execute(f"UPDATE visitor SET is_human = 1 WHERE visitor_id = {visitor_id}")
- return visitor_id
+ return visitor_id, is_new_visitor
+ def get_visitor_ids_for_date(self, date:str) -> list[int]:
+ return [ visitor_id[0] for visitor_id in self(f"SELECT DISTINCT visitor_id FROM request WHERE {date}") ]
+
+ def get_visitor_count(self) -> int:
+ return sql_tablesize(self.cur, "visitor")
#
# REQUEST
#
- def request_exists(self, request: Request, visitor_id: int, route_id: int):
+ def get_request_count(self) -> int:
+ return sql_tablesize(self.cur, "request")
+
+ def request_exists(self, request_timestamp: int, visitor_id: int, route_id: int):
"""
- Check if a request from same visitor was made to same location in the same day, if setting "request_is_same_on_same_day" is True
- If not, always returns False
+ Return if a request from same visitor was made to same route within the timespan set by the 'ignore_duplicate_requests_within_x_seconds' option
"""
- if not settings["request_is_same_on_same_day"]: return False
- # get all requests from same visitor to same route
- self.cur.execute(f"SELECT request_id, time FROM request WHERE visitor_id = '{visitor_id}' AND = route_id = '{route_id}'")
- # check if on same day
- date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d")
- for request_id, date1 in self.cur.fetchall():
- date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
- if date0 == date1:
- pdebug(f"request_exists: Request is on same day as request {request_id}")
- return True
+ ignore_seconds = settings["data-collection"]["ignore_duplicate_requests_within_x_seconds"]
+ time_min, time_max = max(0, request_timestamp - ignore_seconds), request_timestamp + ignore_seconds
+ requests = self(f"SELECT request_id, time FROM request WHERE visitor_id = '{visitor_id}' AND route_id = '{route_id}' AND time BETWEEN {time_min} AND {time_max}")
+ if len(requests) > 0:
+ pdebug(f"request_exists: Found {len(requests)} requests within {ignore_seconds} minutes (v_id={visitor_id}, r_id={route_id}, t={request_timestamp})")
+ return True
return False
- def add_request(self, request: Request) -> (int | None):
- """returns visitor_id if new request was added, else None"""
- visitor_id = self.get_visitor_id(request)
- self.conn.commit()
- # browser_id = self.get_id("browser", request.get_browser())
- # platform_id = self.get_id("platform", request.get_platform())
+ def add_request(self, request: Request) -> tuple[int | None, bool]:
+ """
+ @returns visitor_id, is_new_visitor
+ if new request was added, else None
+ """
+ visitor_id, is_new_visitor = self.get_visitor_id(request)
referer_id = self.get_id("referer", request.referer)
route_id = self.get_id("route", request.route)
# check if request is unique
- if self.request_exists(request, visitor_id, route_id):
- # pdebug("request exists:", request)
- return None
+ if self.request_exists(request.time_local, visitor_id, route_id):
+ pdebug("add_request: exists:", request, lvl=3)
+ return None, is_new_visitor
else:
- # pdebug("new request:", request)
+ pdebug("add_request: added", request, lvl=3)
self.cur.execute(f"INSERT INTO request (visitor_id, route_id, referer_id, time, status) VALUES ({visitor_id}, {route_id}, {referer_id}, {request.time_local}, {request.status})")
- return visitor_id
+ return visitor_id, is_new_visitor
def add_requests(self, requests: list[Request]):
- added_requests = 0
+ """
+ Add a list of requests to the database
+ Adds the visitors, if needed
+ @returs added_request_count, visitors_count, new_visitors_count
+ """
+ added_request_count = 0
# check the new visitors later
- new_visitors = []
+ visitors: set[int] = set()
+ new_visitors: set[int] = set()
for i in range(len(requests)):
- if is_blacklisted(requests[i].request_route, settings["request_route_blacklist"]): continue
- if not is_whitelisted(requests[i].request_route, settings["request_route_whitelist"]): continue
- visitor = self.add_request(requests[i])
- if visitor:
- new_visitors.append(visitor)
+ if is_blacklisted(requests[i].route, settings["data-collection"]["request_route_blacklist"]): continue
+ if not is_whitelisted(requests[i].route, settings["data-collection"]["request_route_whitelist"]): continue
+ visitor_id, is_new_visitor = self.add_request(requests[i])
+ if visitor_id:
+ added_request_count += 1
+ visitors.add(visitor_id)
+ if is_new_visitor:
+ new_visitors.add(visitor_id)
# update the is_human column for all new visitors
for visitor_id in new_visitors:
- # TODO this does not look right
- if not sql_exists(self.cur, "visitor", [("visitor_id", visitor_id)]): continue
- # pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {self.cur.fetchall()}")
- self.conn.commit()
- pmessage(f"Collection Summary: Added {len(new_visitors)} new visitors and {added_requests} new requests.")
+ self.update_is_visitor_human(visitor_id)
+
+ return added_request_count, len(visitors), len(new_visitors)
def get_id(self, table: str, name: str, insert=True) -> int | None:
@@ -192,7 +214,8 @@ class Database:
if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})")
name = sanitize(replace_null(name))
# if non existent, add name
- if not sql_exists(self.cur, table, [("name", name)]):
+ pdebug(f"get_id(table={table},\tname={name}", lvl=4)
+ if not sql_exists(self.cur, table, [("name", name)], do_sanitize=False): # double sanitizing might lead to problems with quotes
if not insert: return None
self.cur.execute(f"INSERT INTO {table} (name) VALUES ('{name}')")
return self(f"SELECT {table}_id FROM {table} WHERE name = '{name}'")[0][0]
@@ -207,8 +230,7 @@ class Database:
if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})")
ret = self(f"SELECT name FROM {table} WHERE {table}_id = '{id_}'")
if len(ret) == 0: return None
- # TODO check if this returns tuple or value
- return ret[0]
+ return ret[0][0]
@@ -231,7 +253,7 @@ class Database:
"""
update the ip_range_id column of visitor with visitor_id
"""
- results = self(f"SELECT ip_address FROM visitor WHERE visitor_id = {visitor_id}")
+ results = self(f"SELECT ip_address FROM visitor WHERE visitor_id = '{visitor_id}'")
if len(results) == 0: # sanity checks
warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}")
return
@@ -248,7 +270,9 @@ class Database:
get the id of country of name
if not present, insert and return id
"""
- if not sql_exists(self.cur, "country", [("name", name)]):
+ name = sanitize(name)
+ code = sanitize(code)
+ if not sql_exists(self.cur, "country", [("name", name)], do_sanitize=False):
self.cur.execute(f"INSERT INTO country (name, code) VALUES ('{name}', '{code}')")
countries = self(f"SELECT country_id FROM country WHERE name = '{name}'")
if len(countries) > 0:
@@ -260,9 +284,11 @@ class Database:
return country_id_val
def get_city_id(self, name, region, country_id) -> int:
- if not sql_exists(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)]):
+ name = sanitize(name)
+ region = sanitize(region)
+ if not sql_exists(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)], do_sanitize=False):
self.cur.execute(f"INSERT INTO city (name, region, country_id) VALUES ('{name}', '{region}', '{country_id}')")
- cities = sql_select(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)])
+ cities = sql_select(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)], do_sanitize=False)
if len(cities) > 0:
city_id_val = cities[0][0]
else:
@@ -283,19 +309,36 @@ class Database:
"""
# indices for the csv
FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5
+
+ # FROM https://stackoverflow.com/questions/845058/how-to-get-line-count-of-a-large-file-cheaply-in-python (Quentin Pradet)
+ def _count_generator(reader):
+ b = reader(1024 * 1024)
+ while b:
+ yield b
+ b = reader(1024*1024)
+ def rawgencount(filename):
+ with open(filename, "rb") as file:
+ f_gen = _count_generator(file.raw.read)
+ return sum( buf.count(b'\n') for buf in f_gen )
+
+ pmessage(f"Recreating the GeoIP database from {geoip_city_csv_path}. This might take a long time...")
+ row_count = rawgencount(geoip_city_csv_path)
+ pmessage(f"Total rows: {row_count}")
+
with open(geoip_city_csv_path, 'r') as file:
csv = reader(file, delimiter=',', quotechar='"')
+ file.seek(0)
# execute only if file could be opened
# delete all previous data
self.cur.execute(f"DELETE FROM ip_range")
self.cur.execute(f"DELETE FROM city")
self.cur.execute(f"DELETE FROM country")
+ self.conn.commit()
self.cur.execute(f"VACUUM")
# guarantees that unkown city/country will have id 0
self.cur.execute(f"INSERT INTO country (country_id, name, code) VALUES (0, 'Unknown', 'XX') ")
self.cur.execute(f"INSERT INTO city (city_id, name, region) VALUES (0, 'Unknown', 'Unkown') ")
- print(f"Recreating the geoip database from {geoip_city_csv_path}. This might take a long time...")
# for combining city ranges into a 'City in ' range
# country_id for the range that was last added (for combining multiple csv rows in one ip_range)
@@ -307,18 +350,22 @@ class Database:
def add_range(low, high, city_name, region, country_id):
city_id = self.get_city_id(city_name, region, country_id)
- pdebug(f"update_ip_range_id: Adding range for city={city_name}, country_id={country_id}, low={low}, high={high}")
+ pdebug(f"update_ip_range_id: Adding range for city={city_name:20}, country_id={country_id:3}, low={low:16}, high={high:16}", lvl=2)
self.cur.execute(f"INSERT INTO ip_range (low, high, city_id) VALUES ({low}, {high}, {city_id})")
- for row in csv:
+ for i, row in enumerate(csv, 1):
+ # if i % 100 == 0:
+ pmessage(f"Updating GeoIP database: {i:7}/{row_count} ({100.0*i/row_count:.2f}%)", end="\r")
# these might contain problematic characters (')
- row[CITY] = sanitize(row[CITY])
- row[COUNTRY] = sanitize(row[COUNTRY])
- row[REGION] = sanitize(row[REGION])
+ # row[CITY] = sanitize(row[CITY])
+ if row[COUNTRY] == "United Kingdom of Great Britain and Northern Ireland":
+ row[COUNTRY] = "United Kingdom"
+ # row[COUNTRY] = sanitize(row[COUNTRY])
+ # row[REGION] = sanitize(row[REGION])
# make sure country exists
country_id = self.get_country_id(row[COUNTRY], row[CODE])
# only add cities for countries the user is interested in
- if row[CODE] in settings["get_cities_for_countries"]:
+ if row[CODE] in settings["data-collection"]["get_cities_for_countries"]:
add_range(row[FROM], row[TO], row[CITY], row[REGION], country_id)
else:
# if continuing
@@ -343,13 +390,13 @@ class Database:
# REQUEST
#
# TIME/DATE
- def get_earliest_date(self) -> int:
+ def get_earliest_timestamp(self) -> int:
"""return the earliest time as unixepoch"""
date = self(f"SELECT MIN(time) FROM request")[0][0]
if not isinstance(date, int): return 0
else: return date
- def get_latest_date(self) -> int:
+ def get_latest_timestamp(self) -> int:
"""return the latest time as unixepoch"""
date = self(f"SELECT MAX(time) FROM request")[0][0]
if not isinstance(date, int): return 0
diff --git a/regina/sql/create_db.sql b/regina/sql/create_db.sql
index a712aac..973c609 100644
--- a/regina/sql/create_db.sql
+++ b/regina/sql/create_db.sql
@@ -1,7 +1,7 @@
-- see database.uxf
CREATE TABLE IF NOT EXISTS visitor(
visitor_id INTEGER PRIMARY KEY,
-
+ ip_address INTEGER,
ip_range_id INTEGER,
platform_id INTEGER,
browser_id INTEGER,
@@ -28,12 +28,12 @@ CREATE TABLE IF NOT EXISTS request(
request_id INTEGER PRIMARY KEY,
visitor_id INTEGER,
route_id INTEGER,
- referer INTEGER,
+ referer_id INTEGER,
time INTEGER,
status INTEGER,
FOREIGN KEY(visitor_id) REFERENCES visitor(visitor_id),
FOREIGN KEY(route_id) REFERENCES route(route_id),
- FOREIGN KEY(referer) REFERENCES referer(referer_id)
+ FOREIGN KEY(referer_id) REFERENCES referer(referer_id)
) STRICT;
CREATE TABLE IF NOT EXISTS referer(
@@ -57,7 +57,7 @@ CREATE TABLE IF NOT EXISTS ip_range(
) STRICT;
CREATE TABLE IF NOT EXISTS city(
- city INTEGER PRIMARY KEY,
+ city_id INTEGER PRIMARY KEY,
name TEXT,
region TEXT,
country_id INTEGER,
diff --git a/regina/utility/sql_util.py b/regina/utility/sql_util.py
index b399705..808da10 100644
--- a/regina/utility/sql_util.py
+++ b/regina/utility/sql_util.py
@@ -1,6 +1,40 @@
import sqlite3 as sql
"""Various utilities"""
+def get_date_constraint(at_date=None, min_date=None, max_date=None):
+ """
+ get a condition string that sets a condition on the time to a certain date
+ the conditions can be a string representing a date or an int/float in unixepoch
+ """
+ # dates in unix time
+ s = ""
+ if at_date is not None:
+ if isinstance(at_date, str):
+ s += f"DATE(time, 'unixepoch') = '{sanitize(at_date)}' AND "
+ elif isinstance(at_date, int|float):
+ s += f"time = {int(at_date)} AND "
+ else:
+ print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
+ if min_date is not None:
+ if isinstance(min_date, str):
+ s += f"DATE(time, 'unixepoch') >= '{sanitize(min_date)}' AND "
+ elif isinstance(min_date, int|float):
+ s += f"time >= {int(min_date)} AND "
+ else:
+ print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
+ if max_date is not None:
+ if isinstance(max_date, str):
+ s += f"DATE(time, 'unixepoch') <= '{sanitize(max_date)}' AND "
+ elif isinstance(max_date, int|float):
+ s += f"time <= {int(max_date)} AND "
+ else:
+ print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
+ if s == "":
+ print(f"WARNING: get_where_date_str: no date_str generated. Returning 'time > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
+ return "time > 0"
+ return s.removesuffix(" AND ")
+
+
def replace_null(s):
if not s:
return "None"
@@ -11,10 +45,11 @@ def sanitize(s):
return s.replace("'", r"''").strip(" ")
# .replace('"', r'\"')\
-def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND") -> str:
+def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND", do_sanitize=True) -> str:
c_str = ""
for name, val in constraints:
- c_str += f"{name} = '{sanitize(val)}' {logic} "
+ if do_sanitize: val = sanitize(val)
+ c_str += f"{name} = '{val}' {logic} "
return c_str.strip(logic + " ")
def sql_get_value_str(values: list[list]) -> str:
@@ -25,12 +60,12 @@ def sql_get_value_str(values: list[list]) -> str:
c_str = c_str.strip(", ") + "), "
return c_str.strip(", ")
-def sql_exists(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND") -> bool:
- cur.execute(f"SELECT EXISTS (SELECT 1 FROM {table} WHERE {sql_get_constaint_str(constraints, logic)})")
+def sql_exists(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND", do_sanitize=True) -> bool:
+ cur.execute(f"SELECT EXISTS (SELECT 1 FROM {table} WHERE {sql_get_constaint_str(constraints, logic, do_sanitize=do_sanitize)})")
return cur.fetchone()[0] == 1
-def sql_select(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND"):
- cur.execute(f"SELECT * FROM {table} WHERE {sql_get_constaint_str(constraints, logic)}")
+def sql_select(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND", do_sanitize=True):
+ cur.execute(f"SELECT * FROM {table} WHERE {sql_get_constaint_str(constraints, logic, do_sanitize=do_sanitize)}")
return cur.fetchall()
def sql_insert(cur: sql.Cursor, table: str, values: list[list]):