bug fixes
This commit is contained in:
parent
56b4fbc13e
commit
3457fff2c6
128
database.uxf
128
database.uxf
@ -1,13 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<diagram program="umlet" version="15.0.0">
|
||||
<zoom_level>13</zoom_level>
|
||||
<diagram program="umlet" version="15.1">
|
||||
<zoom_level>10</zoom_level>
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>299</x>
|
||||
<y>221</y>
|
||||
<w>299</w>
|
||||
<h>247</h>
|
||||
<x>70</x>
|
||||
<y>220</y>
|
||||
<w>250</w>
|
||||
<h>190</h>
|
||||
</coordinates>
|
||||
<panel_attributes>visitor
|
||||
--
|
||||
@ -27,10 +27,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>1040</x>
|
||||
<y>221</y>
|
||||
<w>234</w>
|
||||
<h>130</h>
|
||||
<x>640</x>
|
||||
<y>220</y>
|
||||
<w>180</w>
|
||||
<h>100</h>
|
||||
</coordinates>
|
||||
<panel_attributes>filegroup
|
||||
--
|
||||
@ -44,10 +44,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>936</x>
|
||||
<y>221</y>
|
||||
<w>130</w>
|
||||
<h>65</h>
|
||||
<x>560</x>
|
||||
<y>220</y>
|
||||
<w>100</w>
|
||||
<h>50</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=-
|
||||
m1=n
|
||||
@ -58,10 +58,10 @@ m2=1
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>702</x>
|
||||
<y>208</y>
|
||||
<w>247</w>
|
||||
<h>221</h>
|
||||
<x>380</x>
|
||||
<y>210</y>
|
||||
<w>190</w>
|
||||
<h>170</h>
|
||||
</coordinates>
|
||||
<panel_attributes>request
|
||||
--
|
||||
@ -80,29 +80,29 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>585</x>
|
||||
<y>221</y>
|
||||
<w>143</w>
|
||||
<h>65</h>
|
||||
<x>310</x>
|
||||
<y>220</y>
|
||||
<w>90</w>
|
||||
<h>50</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=-
|
||||
m1=1
|
||||
m2=n
|
||||
</panel_attributes>
|
||||
<additional_attributes>10.0;20.0;90.0;20.0</additional_attributes>
|
||||
<additional_attributes>10.0;20.0;70.0;20.0</additional_attributes>
|
||||
</element>
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>1040</x>
|
||||
<y>455</y>
|
||||
<w>234</w>
|
||||
<h>130</h>
|
||||
<x>640</x>
|
||||
<y>400</y>
|
||||
<w>180</w>
|
||||
<h>100</h>
|
||||
</coordinates>
|
||||
<panel_attributes>file
|
||||
--
|
||||
<<PK>>
|
||||
- name: TEXT
|
||||
- filename: TEXT
|
||||
--
|
||||
- group_id: INTEGER
|
||||
--
|
||||
@ -112,10 +112,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>1079</x>
|
||||
<y>338</y>
|
||||
<w>52</w>
|
||||
<h>143</h>
|
||||
<x>670</x>
|
||||
<y>310</y>
|
||||
<w>40</w>
|
||||
<h>110</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=-
|
||||
m1=n
|
||||
@ -126,10 +126,10 @@ m2=1
|
||||
<element>
|
||||
<id>UMLNote</id>
|
||||
<coordinates>
|
||||
<x>845</x>
|
||||
<y>65</y>
|
||||
<w>390</w>
|
||||
<h>91</h>
|
||||
<x>490</x>
|
||||
<y>100</y>
|
||||
<w>300</w>
|
||||
<h>70</h>
|
||||
</coordinates>
|
||||
<panel_attributes>One group contains multiple files.
|
||||
Lets you group the images from a
|
||||
@ -140,10 +140,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>1053</x>
|
||||
<y>143</y>
|
||||
<w>39</w>
|
||||
<h>104</h>
|
||||
<x>650</x>
|
||||
<y>160</y>
|
||||
<w>30</w>
|
||||
<h>80</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=<-</panel_attributes>
|
||||
<additional_attributes>10.0;60.0;10.0;10.0</additional_attributes>
|
||||
@ -151,10 +151,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>676</x>
|
||||
<y>611</y>
|
||||
<w>247</w>
|
||||
<h>169</h>
|
||||
<x>360</x>
|
||||
<y>520</y>
|
||||
<w>190</w>
|
||||
<h>130</h>
|
||||
</coordinates>
|
||||
<panel_attributes>city
|
||||
--
|
||||
@ -170,10 +170,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>1014</x>
|
||||
<y>611</y>
|
||||
<w>156</w>
|
||||
<h>143</h>
|
||||
<x>620</x>
|
||||
<y>520</y>
|
||||
<w>120</w>
|
||||
<h>110</h>
|
||||
</coordinates>
|
||||
<panel_attributes>country
|
||||
--
|
||||
@ -188,10 +188,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>910</x>
|
||||
<y>637</y>
|
||||
<w>130</w>
|
||||
<h>65</h>
|
||||
<x>540</x>
|
||||
<y>540</y>
|
||||
<w>100</w>
|
||||
<h>50</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=-
|
||||
m1=1
|
||||
@ -202,10 +202,10 @@ m2=n
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>572</x>
|
||||
<y>637</y>
|
||||
<w>130</w>
|
||||
<h>65</h>
|
||||
<x>280</x>
|
||||
<y>540</y>
|
||||
<w>100</w>
|
||||
<h>50</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=-
|
||||
m1=1
|
||||
@ -216,10 +216,10 @@ m2=n
|
||||
<element>
|
||||
<id>UMLClass</id>
|
||||
<coordinates>
|
||||
<x>364</x>
|
||||
<y>611</y>
|
||||
<w>221</w>
|
||||
<h>169</h>
|
||||
<x>120</x>
|
||||
<y>520</y>
|
||||
<w>170</w>
|
||||
<h>130</h>
|
||||
</coordinates>
|
||||
<panel_attributes>ip_range
|
||||
--
|
||||
@ -235,10 +235,10 @@ style=autoresize</panel_attributes>
|
||||
<element>
|
||||
<id>Relation</id>
|
||||
<coordinates>
|
||||
<x>429</x>
|
||||
<y>455</y>
|
||||
<w>52</w>
|
||||
<h>182</h>
|
||||
<x>170</x>
|
||||
<y>400</y>
|
||||
<w>40</w>
|
||||
<h>140</h>
|
||||
</coordinates>
|
||||
<panel_attributes>lt=-
|
||||
m1=1
|
||||
|
@ -58,6 +58,8 @@ get_visitor_location = False
|
||||
# eg for EU: AT, BE, BG, HR, CY, CZ, DK, EE, FI, FR, DE, GZ, HU, IE, IT, LV, LT, LU, MT, NL, PL, PT, RO, SK, SI, ES, SE
|
||||
get_cities_for_countries =
|
||||
|
||||
hash_ip_address = True
|
||||
|
||||
|
||||
# ***************************************** VISUALIZATION *****************************************
|
||||
# these changes can be changed at any point in time as they only affect the visualization of the data
|
||||
|
@ -4,7 +4,7 @@ from ipaddress import IPv4Address, ip_address
|
||||
from time import mktime
|
||||
from datetime import datetime as dt
|
||||
from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, database_tables, get_filegroup, ip_range_id
|
||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize
|
||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
||||
from regina.utility.utility import pdebug, warning, pmessage
|
||||
from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
|
||||
|
||||
@ -79,31 +79,39 @@ def parse_log(logfile:str) -> list[Request]:
|
||||
|
||||
|
||||
def visitor_exists(cursor, request) -> bool:
|
||||
if settings["unique_visitor_is_ip_address"]:
|
||||
return sql_exists(cursor, t_visitor, [("ip_address", request.ip_address)])
|
||||
if settings["hash_ip_address"]:
|
||||
ip_address = hash(request.ip_address)
|
||||
else:
|
||||
return sql_exists(cursor, t_visitor, [("ip_address", request.ip_address), ("visitor_agent", request.visitor_agent)])
|
||||
ip_address = request.ip_address
|
||||
if settings["unique_visitor_is_ip_address"]:
|
||||
return sql_exists(cursor, t_visitor, [("ip_address", ip_address)])
|
||||
else:
|
||||
return sql_exists(cursor, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])
|
||||
|
||||
def get_visitor_id(request: Request, cursor: sql.Cursor) -> int:
|
||||
"""
|
||||
get the visitor_id. Adds the visitor if not already existing
|
||||
"""
|
||||
# if visitor exists
|
||||
if settings["hash_ip_address"]:
|
||||
ip_address = hash(request.ip_address)
|
||||
else:
|
||||
ip_address = request.ip_address
|
||||
|
||||
if visitor_exists(cursor, request):
|
||||
if settings["unique_visitor_is_ip_address"]:
|
||||
visitor_id = sql_select(cursor, t_visitor, [("ip_address", request.ip_address)])[0][0]
|
||||
visitor_id = sql_select(cursor, t_visitor, [("ip_address", ip_address)])[0][0]
|
||||
else:
|
||||
visitor_id = sql_select(cursor, t_visitor, [("ip_address", request.ip_address), ("visitor_agent", request.visitor_agent)])[0][0]
|
||||
visitor_id = sql_select(cursor, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])[0][0]
|
||||
else: # new visitor
|
||||
# new visitor_id is number of elements
|
||||
visitor_id: int = sql_tablesize(cursor, t_visitor)
|
||||
visitor_id = sql_max(cursor, t_visitor, "visitor_id") + 1
|
||||
# pdebug("new visitor:", visitor_id, request.ip_address)
|
||||
platform, browser, mobile = get_os_browser_pairs_from_agent(request.visitor_agent)
|
||||
ip_range_id_val = 0
|
||||
if settings["get_visitor_location"]:
|
||||
ip_range_id_val = get_ip_range_id(cursor, request.ip_address)
|
||||
is_human = 0 # is_visitor_human cannot be called until visitor is in db int(is_visitor_human(cursor, visitor_id))
|
||||
cursor.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{request.ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');")
|
||||
cursor.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');")
|
||||
return visitor_id
|
||||
|
||||
def is_visitor_human(cur: sql.Cursor, visitor_id: int):
|
||||
@ -204,7 +212,7 @@ def add_requests_to_db(requests: list[Request], db_name: str):
|
||||
cursor = conn.cursor()
|
||||
added_requests = 0
|
||||
# check the new visitors later
|
||||
max_visitor_id = sql_tablesize(cursor, t_visitor)
|
||||
max_visitor_id = sql_max(cursor, t_visitor, "visitor_id")
|
||||
request_blacklist = settings["request_location_regex_blacklist"]
|
||||
for i in range(len(requests)):
|
||||
request = requests[i]
|
||||
@ -223,11 +231,12 @@ def add_requests_to_db(requests: list[Request], db_name: str):
|
||||
pass
|
||||
else:
|
||||
# pdebug("new request:", request)
|
||||
request_id = sql_tablesize(cursor, t_request)
|
||||
request_id = sql_max(cursor, t_request, "request_id") + 1
|
||||
sql_insert(cursor, t_request, [[request_id, visitor_id, group_id, request.time_local, request.referer, request.status]])
|
||||
added_requests += 1
|
||||
visitor_count = sql_tablesize(cursor, t_visitor)
|
||||
for visitor_id in range(max_visitor_id, visitor_count):
|
||||
if not sql_exists(cursor, t_visitor, [(str(visitor_id), "visitor_id")]): continue
|
||||
is_human = is_visitor_human(cursor, visitor_id)
|
||||
cursor.execute(f"SELECT * FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
||||
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {cursor.fetchall()}")
|
||||
|
@ -3,7 +3,7 @@ import sqlite3 as sql
|
||||
from csv import reader
|
||||
from os import path, listdir
|
||||
# local
|
||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize
|
||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
||||
from regina.utility.utility import pdebug
|
||||
from regina.utility.globals import settings
|
||||
|
||||
@ -48,7 +48,7 @@ t_ip_range = "ip_range"
|
||||
visitor_id = Entry("visitor_id", "INTEGER")
|
||||
request_id = Entry("request_id", "INTEGER")
|
||||
filegroup_id = Entry("group_id", "INTEGER")
|
||||
ip_address_entry = Entry("ip_address", "TEXT")
|
||||
ip_address_entry = Entry("ip_address", "INTEGER")
|
||||
filename_entry = Entry("filename", "TEXT")
|
||||
city_id = Entry("city_id", "INTEGER")
|
||||
country_id = Entry("country_id", "INTEGER")
|
||||
@ -120,7 +120,8 @@ def get_filegroup(filename: str, cursor: sql.Cursor) -> int:
|
||||
if group_id_candidates:
|
||||
return group_id_candidates[0][0]
|
||||
else: # add new group file filename
|
||||
group_id = sql_tablesize(cursor, t_filegroup)
|
||||
group_id = sql_max(cursor, t_filegroup, "group_id") + 1
|
||||
|
||||
# pdebug("new file(group):", group_id, filename)
|
||||
# add group
|
||||
sql_insert(cursor, t_filegroup, [[group_id, filename]])
|
||||
@ -138,7 +139,7 @@ def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
|
||||
if sql_exists(cursor, t_filegroup, [("groupname", name)]):
|
||||
group_id = sql_select(cursor, t_filegroup, [("groupname", name)])[0][0]
|
||||
else:
|
||||
group_id = sql_tablesize(cursor, t_filegroup)
|
||||
group_id = sql_max(cursor, t_filegroup, "group_id") + 1
|
||||
sql_insert(cursor, t_filegroup, [(group_id, name)])
|
||||
# pdebug("create_filegroups: group_id", group_id)
|
||||
# create/edit file
|
||||
|
@ -128,7 +128,7 @@ def get_where_date_str(at_date=None, min_date=None, max_date=None):
|
||||
else:
|
||||
print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
|
||||
if s == "":
|
||||
print(f"WARNING: get_where_date_str: no date_str generated. Returing 'date > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
|
||||
print(f"WARNING: get_where_date_str: no date_str generated. Returning 'date > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
|
||||
return "date > 0"
|
||||
return s.removesuffix(" AND ")
|
||||
|
||||
@ -353,7 +353,6 @@ def get_city_and_country_ranking(cur:sql.Cursor, require_humans=True, regex_city
|
||||
cities = cur.fetchall()
|
||||
cities_dict = {}
|
||||
country_dict = {}
|
||||
# TODO: find out why regex_blacklist does not work
|
||||
pdebug(f"get_city_and_country_ranking: found {len(cities)} ip_ranges")
|
||||
|
||||
validate_city_cmd = lambda _ : True
|
||||
|
@ -48,6 +48,7 @@ def help():
|
||||
--update-geoip <path> path to IP-COUNTRY-REGION-CITY database in csv format
|
||||
--visualize generate the visualization website
|
||||
--collect fill the database from the nginx access log
|
||||
--log-file <path> use alternate logfile
|
||||
"""
|
||||
print(helpstring)
|
||||
|
||||
|
@ -17,6 +17,7 @@ settings = {
|
||||
"unique_visitor_is_ip_address": False,
|
||||
"get_visitor_location": False,
|
||||
"get_cities_for_countries": [""], # list if country codes for which the ip address ranges need to be collected at city level, not country level
|
||||
"hash_ip_address": True,
|
||||
|
||||
# VISUALIZATION
|
||||
"get_human_percentage": False,
|
||||
|
@ -2,8 +2,7 @@ import sqlite3 as sql
|
||||
"""Various utilities"""
|
||||
def sanitize(s):
|
||||
if type(s) != str: return s
|
||||
return s\
|
||||
.replace("''", "'").replace("'", r"''").strip(" ")
|
||||
return s.replace("'", r"''").strip(" ")
|
||||
# .replace('"', r'\"')\
|
||||
|
||||
def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND") -> str:
|
||||
@ -35,6 +34,12 @@ def sql_tablesize(cur: sql.Cursor, table: str) -> int:
|
||||
cur.execute(f"SELECT Count(*) FROM {table}")
|
||||
return cur.fetchone()[0]
|
||||
|
||||
def sql_max(cur: sql.Cursor, table: str, column: str) -> int:
|
||||
cur.execute(f"SELECT MAX({column}) FROM {table}")
|
||||
val = cur.fetchone()[0]
|
||||
if not type(val) == int: val = 0
|
||||
return val
|
||||
|
||||
def sql_get_count_where(cur: sql.Cursor, table, constraints) -> int:
|
||||
cur.execute(f"SELECT COUNT(*) FROM {table} WHERE {sql_get_constaint_str(constraints)}")
|
||||
return cur.fetchone()[0]
|
||||
|
Loading…
Reference in New Issue
Block a user