bug fixes

This commit is contained in:
matthias@arch 2023-05-01 13:30:27 +02:00
parent 56b4fbc13e
commit 3457fff2c6
8 changed files with 101 additions and 83 deletions

View File

@ -1,13 +1,13 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml version="1.0" encoding="UTF-8" standalone="no"?>
<diagram program="umlet" version="15.0.0"> <diagram program="umlet" version="15.1">
<zoom_level>13</zoom_level> <zoom_level>10</zoom_level>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>299</x> <x>70</x>
<y>221</y> <y>220</y>
<w>299</w> <w>250</w>
<h>247</h> <h>190</h>
</coordinates> </coordinates>
<panel_attributes>visitor <panel_attributes>visitor
-- --
@ -27,10 +27,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>1040</x> <x>640</x>
<y>221</y> <y>220</y>
<w>234</w> <w>180</w>
<h>130</h> <h>100</h>
</coordinates> </coordinates>
<panel_attributes>filegroup <panel_attributes>filegroup
-- --
@ -44,10 +44,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>936</x> <x>560</x>
<y>221</y> <y>220</y>
<w>130</w> <w>100</w>
<h>65</h> <h>50</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=n m1=n
@ -58,10 +58,10 @@ m2=1
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>702</x> <x>380</x>
<y>208</y> <y>210</y>
<w>247</w> <w>190</w>
<h>221</h> <h>170</h>
</coordinates> </coordinates>
<panel_attributes>request <panel_attributes>request
-- --
@ -80,29 +80,29 @@ style=autoresize</panel_attributes>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>585</x> <x>310</x>
<y>221</y> <y>220</y>
<w>143</w> <w>90</w>
<h>65</h> <h>50</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
m2=n m2=n
</panel_attributes> </panel_attributes>
<additional_attributes>10.0;20.0;90.0;20.0</additional_attributes> <additional_attributes>10.0;20.0;70.0;20.0</additional_attributes>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>1040</x> <x>640</x>
<y>455</y> <y>400</y>
<w>234</w> <w>180</w>
<h>130</h> <h>100</h>
</coordinates> </coordinates>
<panel_attributes>file <panel_attributes>file
-- --
&lt;&lt;PK&gt;&gt; &lt;&lt;PK&gt;&gt;
- name: TEXT - filename: TEXT
-- --
- group_id: INTEGER - group_id: INTEGER
-- --
@ -112,10 +112,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>1079</x> <x>670</x>
<y>338</y> <y>310</y>
<w>52</w> <w>40</w>
<h>143</h> <h>110</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=n m1=n
@ -126,10 +126,10 @@ m2=1
<element> <element>
<id>UMLNote</id> <id>UMLNote</id>
<coordinates> <coordinates>
<x>845</x> <x>490</x>
<y>65</y> <y>100</y>
<w>390</w> <w>300</w>
<h>91</h> <h>70</h>
</coordinates> </coordinates>
<panel_attributes>One group contains multiple files. <panel_attributes>One group contains multiple files.
Lets you group the images from a Lets you group the images from a
@ -140,10 +140,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>1053</x> <x>650</x>
<y>143</y> <y>160</y>
<w>39</w> <w>30</w>
<h>104</h> <h>80</h>
</coordinates> </coordinates>
<panel_attributes>lt=&lt;-</panel_attributes> <panel_attributes>lt=&lt;-</panel_attributes>
<additional_attributes>10.0;60.0;10.0;10.0</additional_attributes> <additional_attributes>10.0;60.0;10.0;10.0</additional_attributes>
@ -151,10 +151,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>676</x> <x>360</x>
<y>611</y> <y>520</y>
<w>247</w> <w>190</w>
<h>169</h> <h>130</h>
</coordinates> </coordinates>
<panel_attributes>city <panel_attributes>city
-- --
@ -170,10 +170,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>1014</x> <x>620</x>
<y>611</y> <y>520</y>
<w>156</w> <w>120</w>
<h>143</h> <h>110</h>
</coordinates> </coordinates>
<panel_attributes>country <panel_attributes>country
-- --
@ -188,10 +188,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>910</x> <x>540</x>
<y>637</y> <y>540</y>
<w>130</w> <w>100</w>
<h>65</h> <h>50</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
@ -202,10 +202,10 @@ m2=n
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>572</x> <x>280</x>
<y>637</y> <y>540</y>
<w>130</w> <w>100</w>
<h>65</h> <h>50</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
@ -216,10 +216,10 @@ m2=n
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>364</x> <x>120</x>
<y>611</y> <y>520</y>
<w>221</w> <w>170</w>
<h>169</h> <h>130</h>
</coordinates> </coordinates>
<panel_attributes>ip_range <panel_attributes>ip_range
-- --
@ -235,10 +235,10 @@ style=autoresize</panel_attributes>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>429</x> <x>170</x>
<y>455</y> <y>400</y>
<w>52</w> <w>40</w>
<h>182</h> <h>140</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1

View File

@ -58,6 +58,8 @@ get_visitor_location = False
# eg for EU: AT, BE, BG, HR, CY, CZ, DK, EE, FI, FR, DE, GZ, HU, IE, IT, LV, LT, LU, MT, NL, PL, PT, RO, SK, SI, ES, SE # eg for EU: AT, BE, BG, HR, CY, CZ, DK, EE, FI, FR, DE, GZ, HU, IE, IT, LV, LT, LU, MT, NL, PL, PT, RO, SK, SI, ES, SE
get_cities_for_countries = get_cities_for_countries =
hash_ip_address = True
# ***************************************** VISUALIZATION ***************************************** # ***************************************** VISUALIZATION *****************************************
# these changes can be changed at any point in time as they only affect the visualization of the data # these changes can be changed at any point in time as they only affect the visualization of the data

View File

@ -4,7 +4,7 @@ from ipaddress import IPv4Address, ip_address
from time import mktime from time import mktime
from datetime import datetime as dt from datetime import datetime as dt
from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, database_tables, get_filegroup, ip_range_id from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, database_tables, get_filegroup, ip_range_id
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
from regina.utility.utility import pdebug, warning, pmessage from regina.utility.utility import pdebug, warning, pmessage
from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
@ -79,31 +79,39 @@ def parse_log(logfile:str) -> list[Request]:
def visitor_exists(cursor, request) -> bool: def visitor_exists(cursor, request) -> bool:
if settings["unique_visitor_is_ip_address"]: if settings["hash_ip_address"]:
return sql_exists(cursor, t_visitor, [("ip_address", request.ip_address)]) ip_address = hash(request.ip_address)
else: else:
return sql_exists(cursor, t_visitor, [("ip_address", request.ip_address), ("visitor_agent", request.visitor_agent)]) ip_address = request.ip_address
if settings["unique_visitor_is_ip_address"]:
return sql_exists(cursor, t_visitor, [("ip_address", ip_address)])
else:
return sql_exists(cursor, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])
def get_visitor_id(request: Request, cursor: sql.Cursor) -> int: def get_visitor_id(request: Request, cursor: sql.Cursor) -> int:
""" """
get the visitor_id. Adds the visitor if not already existing get the visitor_id. Adds the visitor if not already existing
""" """
# if visitor exists if settings["hash_ip_address"]:
ip_address = hash(request.ip_address)
else:
ip_address = request.ip_address
if visitor_exists(cursor, request): if visitor_exists(cursor, request):
if settings["unique_visitor_is_ip_address"]: if settings["unique_visitor_is_ip_address"]:
visitor_id = sql_select(cursor, t_visitor, [("ip_address", request.ip_address)])[0][0] visitor_id = sql_select(cursor, t_visitor, [("ip_address", ip_address)])[0][0]
else: else:
visitor_id = sql_select(cursor, t_visitor, [("ip_address", request.ip_address), ("visitor_agent", request.visitor_agent)])[0][0] visitor_id = sql_select(cursor, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])[0][0]
else: # new visitor else: # new visitor
# new visitor_id is number of elements # new visitor_id is number of elements
visitor_id: int = sql_tablesize(cursor, t_visitor) visitor_id = sql_max(cursor, t_visitor, "visitor_id") + 1
# pdebug("new visitor:", visitor_id, request.ip_address) # pdebug("new visitor:", visitor_id, request.ip_address)
platform, browser, mobile = get_os_browser_pairs_from_agent(request.visitor_agent) platform, browser, mobile = get_os_browser_pairs_from_agent(request.visitor_agent)
ip_range_id_val = 0 ip_range_id_val = 0
if settings["get_visitor_location"]: if settings["get_visitor_location"]:
ip_range_id_val = get_ip_range_id(cursor, request.ip_address) ip_range_id_val = get_ip_range_id(cursor, request.ip_address)
is_human = 0 # is_visitor_human cannot be called until visitor is in db int(is_visitor_human(cursor, visitor_id)) is_human = 0 # is_visitor_human cannot be called until visitor is in db int(is_visitor_human(cursor, visitor_id))
cursor.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{request.ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');") cursor.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');")
return visitor_id return visitor_id
def is_visitor_human(cur: sql.Cursor, visitor_id: int): def is_visitor_human(cur: sql.Cursor, visitor_id: int):
@ -204,7 +212,7 @@ def add_requests_to_db(requests: list[Request], db_name: str):
cursor = conn.cursor() cursor = conn.cursor()
added_requests = 0 added_requests = 0
# check the new visitors later # check the new visitors later
max_visitor_id = sql_tablesize(cursor, t_visitor) max_visitor_id = sql_max(cursor, t_visitor, "visitor_id")
request_blacklist = settings["request_location_regex_blacklist"] request_blacklist = settings["request_location_regex_blacklist"]
for i in range(len(requests)): for i in range(len(requests)):
request = requests[i] request = requests[i]
@ -223,11 +231,12 @@ def add_requests_to_db(requests: list[Request], db_name: str):
pass pass
else: else:
# pdebug("new request:", request) # pdebug("new request:", request)
request_id = sql_tablesize(cursor, t_request) request_id = sql_max(cursor, t_request, "request_id") + 1
sql_insert(cursor, t_request, [[request_id, visitor_id, group_id, request.time_local, request.referer, request.status]]) sql_insert(cursor, t_request, [[request_id, visitor_id, group_id, request.time_local, request.referer, request.status]])
added_requests += 1 added_requests += 1
visitor_count = sql_tablesize(cursor, t_visitor) visitor_count = sql_tablesize(cursor, t_visitor)
for visitor_id in range(max_visitor_id, visitor_count): for visitor_id in range(max_visitor_id, visitor_count):
if not sql_exists(cursor, t_visitor, [(str(visitor_id), "visitor_id")]): continue
is_human = is_visitor_human(cursor, visitor_id) is_human = is_visitor_human(cursor, visitor_id)
cursor.execute(f"SELECT * FROM {t_visitor} WHERE visitor_id = {visitor_id}") cursor.execute(f"SELECT * FROM {t_visitor} WHERE visitor_id = {visitor_id}")
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {cursor.fetchall()}") # pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {cursor.fetchall()}")

View File

@ -3,7 +3,7 @@ import sqlite3 as sql
from csv import reader from csv import reader
from os import path, listdir from os import path, listdir
# local # local
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
from regina.utility.utility import pdebug from regina.utility.utility import pdebug
from regina.utility.globals import settings from regina.utility.globals import settings
@ -48,7 +48,7 @@ t_ip_range = "ip_range"
visitor_id = Entry("visitor_id", "INTEGER") visitor_id = Entry("visitor_id", "INTEGER")
request_id = Entry("request_id", "INTEGER") request_id = Entry("request_id", "INTEGER")
filegroup_id = Entry("group_id", "INTEGER") filegroup_id = Entry("group_id", "INTEGER")
ip_address_entry = Entry("ip_address", "TEXT") ip_address_entry = Entry("ip_address", "INTEGER")
filename_entry = Entry("filename", "TEXT") filename_entry = Entry("filename", "TEXT")
city_id = Entry("city_id", "INTEGER") city_id = Entry("city_id", "INTEGER")
country_id = Entry("country_id", "INTEGER") country_id = Entry("country_id", "INTEGER")
@ -120,7 +120,8 @@ def get_filegroup(filename: str, cursor: sql.Cursor) -> int:
if group_id_candidates: if group_id_candidates:
return group_id_candidates[0][0] return group_id_candidates[0][0]
else: # add new group file filename else: # add new group file filename
group_id = sql_tablesize(cursor, t_filegroup) group_id = sql_max(cursor, t_filegroup, "group_id") + 1
# pdebug("new file(group):", group_id, filename) # pdebug("new file(group):", group_id, filename)
# add group # add group
sql_insert(cursor, t_filegroup, [[group_id, filename]]) sql_insert(cursor, t_filegroup, [[group_id, filename]])
@ -138,7 +139,7 @@ def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
if sql_exists(cursor, t_filegroup, [("groupname", name)]): if sql_exists(cursor, t_filegroup, [("groupname", name)]):
group_id = sql_select(cursor, t_filegroup, [("groupname", name)])[0][0] group_id = sql_select(cursor, t_filegroup, [("groupname", name)])[0][0]
else: else:
group_id = sql_tablesize(cursor, t_filegroup) group_id = sql_max(cursor, t_filegroup, "group_id") + 1
sql_insert(cursor, t_filegroup, [(group_id, name)]) sql_insert(cursor, t_filegroup, [(group_id, name)])
# pdebug("create_filegroups: group_id", group_id) # pdebug("create_filegroups: group_id", group_id)
# create/edit file # create/edit file

View File

@ -128,7 +128,7 @@ def get_where_date_str(at_date=None, min_date=None, max_date=None):
else: else:
print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}") print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
if s == "": if s == "":
print(f"WARNING: get_where_date_str: no date_str generated. Returing 'date > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}") print(f"WARNING: get_where_date_str: no date_str generated. Returning 'date > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
return "date > 0" return "date > 0"
return s.removesuffix(" AND ") return s.removesuffix(" AND ")
@ -353,7 +353,6 @@ def get_city_and_country_ranking(cur:sql.Cursor, require_humans=True, regex_city
cities = cur.fetchall() cities = cur.fetchall()
cities_dict = {} cities_dict = {}
country_dict = {} country_dict = {}
# TODO: find out why regex_blacklist does not work
pdebug(f"get_city_and_country_ranking: found {len(cities)} ip_ranges") pdebug(f"get_city_and_country_ranking: found {len(cities)} ip_ranges")
validate_city_cmd = lambda _ : True validate_city_cmd = lambda _ : True

View File

@ -48,6 +48,7 @@ def help():
--update-geoip <path> path to IP-COUNTRY-REGION-CITY database in csv format --update-geoip <path> path to IP-COUNTRY-REGION-CITY database in csv format
--visualize generate the visualization website --visualize generate the visualization website
--collect fill the database from the nginx access log --collect fill the database from the nginx access log
--log-file <path> use alternate logfile
""" """
print(helpstring) print(helpstring)

View File

@ -17,6 +17,7 @@ settings = {
"unique_visitor_is_ip_address": False, "unique_visitor_is_ip_address": False,
"get_visitor_location": False, "get_visitor_location": False,
"get_cities_for_countries": [""], # list if country codes for which the ip address ranges need to be collected at city level, not country level "get_cities_for_countries": [""], # list if country codes for which the ip address ranges need to be collected at city level, not country level
"hash_ip_address": True,
# VISUALIZATION # VISUALIZATION
"get_human_percentage": False, "get_human_percentage": False,

View File

@ -2,8 +2,7 @@ import sqlite3 as sql
"""Various utilities""" """Various utilities"""
def sanitize(s): def sanitize(s):
if type(s) != str: return s if type(s) != str: return s
return s\ return s.replace("'", r"''").strip(" ")
.replace("''", "'").replace("'", r"''").strip(" ")
# .replace('"', r'\"')\ # .replace('"', r'\"')\
def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND") -> str: def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND") -> str:
@ -35,6 +34,12 @@ def sql_tablesize(cur: sql.Cursor, table: str) -> int:
cur.execute(f"SELECT Count(*) FROM {table}") cur.execute(f"SELECT Count(*) FROM {table}")
return cur.fetchone()[0] return cur.fetchone()[0]
def sql_max(cur: sql.Cursor, table: str, column: str) -> int:
cur.execute(f"SELECT MAX({column}) FROM {table}")
val = cur.fetchone()[0]
if not type(val) == int: val = 0
return val
def sql_get_count_where(cur: sql.Cursor, table, constraints) -> int: def sql_get_count_where(cur: sql.Cursor, table, constraints) -> int:
cur.execute(f"SELECT COUNT(*) FROM {table} WHERE {sql_get_constaint_str(constraints)}") cur.execute(f"SELECT COUNT(*) FROM {table} WHERE {sql_get_constaint_str(constraints)}")
return cur.fetchone()[0] return cur.fetchone()[0]