Changed database structure
This commit is contained in:
parent
ecc75560e3
commit
a49f15b9f0
277
database.uxf
277
database.uxf
@ -1,13 +1,13 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
<diagram program="umlet" version="15.1">
|
<diagram program="umlet" version="15.1">
|
||||||
<zoom_level>10</zoom_level>
|
<zoom_level>8</zoom_level>
|
||||||
<element>
|
<element>
|
||||||
<id>UMLClass</id>
|
<id>UMLClass</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>70</x>
|
<x>96</x>
|
||||||
<y>220</y>
|
<y>248</y>
|
||||||
<w>250</w>
|
<w>160</w>
|
||||||
<h>190</h>
|
<h>144</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>visitor
|
<panel_attributes>visitor
|
||||||
--
|
--
|
||||||
@ -15,53 +15,35 @@
|
|||||||
- visitor_id: INTEGER
|
- visitor_id: INTEGER
|
||||||
--
|
--
|
||||||
- ip_address: INTEGER
|
- ip_address: INTEGER
|
||||||
- visitor agent string: TEXT
|
- platform_id: INTEGER
|
||||||
- platform: TEXT
|
- browser_id: INTEGER
|
||||||
- browser: TEXT
|
|
||||||
- mobile: INTEGER
|
- mobile: INTEGER
|
||||||
- is_human: INTEGER
|
- is_human: INTEGER
|
||||||
- range_id: INTEGER
|
- range_id: INTEGER
|
||||||
style=autoresize</panel_attributes>
|
|
||||||
<additional_attributes/>
|
|
||||||
</element>
|
|
||||||
<element>
|
|
||||||
<id>UMLClass</id>
|
|
||||||
<coordinates>
|
|
||||||
<x>640</x>
|
|
||||||
<y>220</y>
|
|
||||||
<w>180</w>
|
|
||||||
<h>100</h>
|
|
||||||
</coordinates>
|
|
||||||
<panel_attributes>filegroup
|
|
||||||
--
|
|
||||||
<<PK>>
|
|
||||||
- group_id: INTEGER
|
|
||||||
--
|
|
||||||
- group_name: TEXT
|
|
||||||
style=autoresize</panel_attributes>
|
style=autoresize</panel_attributes>
|
||||||
<additional_attributes/>
|
<additional_attributes/>
|
||||||
</element>
|
</element>
|
||||||
<element>
|
<element>
|
||||||
<id>Relation</id>
|
<id>Relation</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>560</x>
|
<x>216</x>
|
||||||
<y>220</y>
|
<y>168</y>
|
||||||
<w>100</w>
|
<w>32</w>
|
||||||
<h>50</h>
|
<h>96</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>lt=-
|
<panel_attributes>lt=-
|
||||||
m1=n
|
m1=n
|
||||||
m2=1
|
m2=1
|
||||||
</panel_attributes>
|
</panel_attributes>
|
||||||
<additional_attributes>10.0;20.0;80.0;20.0</additional_attributes>
|
<additional_attributes>10.0;100.0;10.0;10.0</additional_attributes>
|
||||||
</element>
|
</element>
|
||||||
<element>
|
<element>
|
||||||
<id>UMLClass</id>
|
<id>UMLClass</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>380</x>
|
<x>352</x>
|
||||||
<y>210</y>
|
<y>240</y>
|
||||||
<w>190</w>
|
<w>152</w>
|
||||||
<h>170</h>
|
<h>136</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>request
|
<panel_attributes>request
|
||||||
--
|
--
|
||||||
@ -69,10 +51,10 @@ m2=1
|
|||||||
- request_id: INTEGER
|
- request_id: INTEGER
|
||||||
--
|
--
|
||||||
- visitor_id: INTEGER
|
- visitor_id: INTEGER
|
||||||
- group_id: INTEGER
|
- route_id: INTEGER
|
||||||
|
- referer_id: INTEGER
|
||||||
--
|
--
|
||||||
- date: TEXT
|
- time: INTEGER
|
||||||
- referer: TEXT
|
|
||||||
- status: INTEGER
|
- status: INTEGER
|
||||||
style=autoresize</panel_attributes>
|
style=autoresize</panel_attributes>
|
||||||
<additional_attributes/>
|
<additional_attributes/>
|
||||||
@ -80,81 +62,42 @@ style=autoresize</panel_attributes>
|
|||||||
<element>
|
<element>
|
||||||
<id>Relation</id>
|
<id>Relation</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>310</x>
|
<x>248</x>
|
||||||
<y>220</y>
|
<y>248</y>
|
||||||
<w>90</w>
|
<w>120</w>
|
||||||
<h>50</h>
|
<h>40</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>lt=-
|
<panel_attributes>lt=-
|
||||||
m1=1
|
m1=1
|
||||||
m2=n
|
m2=n
|
||||||
</panel_attributes>
|
</panel_attributes>
|
||||||
<additional_attributes>10.0;20.0;70.0;20.0</additional_attributes>
|
<additional_attributes>10.0;20.0;130.0;20.0</additional_attributes>
|
||||||
</element>
|
</element>
|
||||||
<element>
|
<element>
|
||||||
<id>UMLClass</id>
|
<id>UMLClass</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>640</x>
|
<x>16</x>
|
||||||
<y>400</y>
|
<y>96</y>
|
||||||
<w>180</w>
|
<w>160</w>
|
||||||
<h>100</h>
|
|
||||||
</coordinates>
|
|
||||||
<panel_attributes>file
|
|
||||||
--
|
|
||||||
<<PK>>
|
|
||||||
- filename: TEXT
|
|
||||||
--
|
|
||||||
- group_id: INTEGER
|
|
||||||
--
|
|
||||||
style=autoresize</panel_attributes>
|
|
||||||
<additional_attributes/>
|
|
||||||
</element>
|
|
||||||
<element>
|
|
||||||
<id>Relation</id>
|
|
||||||
<coordinates>
|
|
||||||
<x>670</x>
|
|
||||||
<y>310</y>
|
|
||||||
<w>40</w>
|
|
||||||
<h>110</h>
|
|
||||||
</coordinates>
|
|
||||||
<panel_attributes>lt=-
|
|
||||||
m1=n
|
|
||||||
m2=1
|
|
||||||
</panel_attributes>
|
|
||||||
<additional_attributes>10.0;90.0;10.0;10.0</additional_attributes>
|
|
||||||
</element>
|
|
||||||
<element>
|
|
||||||
<id>UMLNote</id>
|
|
||||||
<coordinates>
|
|
||||||
<x>490</x>
|
|
||||||
<y>100</y>
|
|
||||||
<w>300</w>
|
|
||||||
<h>70</h>
|
|
||||||
</coordinates>
|
|
||||||
<panel_attributes>One group contains multiple files.
|
|
||||||
Lets you group the images from a
|
|
||||||
html with the html itself
|
|
||||||
style=autoresize</panel_attributes>
|
|
||||||
<additional_attributes/>
|
|
||||||
</element>
|
|
||||||
<element>
|
|
||||||
<id>Relation</id>
|
|
||||||
<coordinates>
|
|
||||||
<x>650</x>
|
|
||||||
<y>160</y>
|
|
||||||
<w>30</w>
|
|
||||||
<h>80</h>
|
<h>80</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>lt=<-</panel_attributes>
|
<panel_attributes>platform
|
||||||
<additional_attributes>10.0;60.0;10.0;10.0</additional_attributes>
|
--
|
||||||
|
<<PK>>
|
||||||
|
- platform_id: INTEGER
|
||||||
|
--
|
||||||
|
- name: TEXT UNIQUE
|
||||||
|
--
|
||||||
|
style=autoresize</panel_attributes>
|
||||||
|
<additional_attributes/>
|
||||||
</element>
|
</element>
|
||||||
<element>
|
<element>
|
||||||
<id>UMLClass</id>
|
<id>UMLClass</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>360</x>
|
<x>328</x>
|
||||||
<y>520</y>
|
<y>488</y>
|
||||||
<w>190</w>
|
<w>152</w>
|
||||||
<h>130</h>
|
<h>104</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>city
|
<panel_attributes>city
|
||||||
--
|
--
|
||||||
@ -170,15 +113,15 @@ style=autoresize</panel_attributes>
|
|||||||
<element>
|
<element>
|
||||||
<id>UMLClass</id>
|
<id>UMLClass</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>620</x>
|
<x>536</x>
|
||||||
<y>520</y>
|
<y>488</y>
|
||||||
<w>120</w>
|
<w>152</w>
|
||||||
<h>110</h>
|
<h>88</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>country
|
<panel_attributes>country
|
||||||
--
|
--
|
||||||
<<PK>>
|
<<PK>>
|
||||||
- country_id
|
- country_id: INTEGER
|
||||||
--
|
--
|
||||||
- name: TEXT
|
- name: TEXT
|
||||||
- code: TEXT
|
- code: TEXT
|
||||||
@ -188,10 +131,10 @@ style=autoresize</panel_attributes>
|
|||||||
<element>
|
<element>
|
||||||
<id>Relation</id>
|
<id>Relation</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>540</x>
|
<x>472</x>
|
||||||
<y>540</y>
|
<y>504</y>
|
||||||
<w>100</w>
|
<w>80</w>
|
||||||
<h>50</h>
|
<h>40</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>lt=-
|
<panel_attributes>lt=-
|
||||||
m1=1
|
m1=1
|
||||||
@ -202,10 +145,10 @@ m2=n
|
|||||||
<element>
|
<element>
|
||||||
<id>Relation</id>
|
<id>Relation</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>280</x>
|
<x>264</x>
|
||||||
<y>540</y>
|
<y>504</y>
|
||||||
<w>100</w>
|
<w>80</w>
|
||||||
<h>50</h>
|
<h>40</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>lt=-
|
<panel_attributes>lt=-
|
||||||
m1=1
|
m1=1
|
||||||
@ -216,10 +159,10 @@ m2=n
|
|||||||
<element>
|
<element>
|
||||||
<id>UMLClass</id>
|
<id>UMLClass</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>120</x>
|
<x>136</x>
|
||||||
<y>520</y>
|
<y>488</y>
|
||||||
<w>170</w>
|
<w>136</w>
|
||||||
<h>130</h>
|
<h>104</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>ip_range
|
<panel_attributes>ip_range
|
||||||
--
|
--
|
||||||
@ -235,15 +178,111 @@ style=autoresize</panel_attributes>
|
|||||||
<element>
|
<element>
|
||||||
<id>Relation</id>
|
<id>Relation</id>
|
||||||
<coordinates>
|
<coordinates>
|
||||||
<x>170</x>
|
<x>176</x>
|
||||||
<y>400</y>
|
<y>384</y>
|
||||||
<w>40</w>
|
<w>32</w>
|
||||||
<h>140</h>
|
<h>120</h>
|
||||||
</coordinates>
|
</coordinates>
|
||||||
<panel_attributes>lt=-
|
<panel_attributes>lt=-
|
||||||
m1=1
|
m1=1
|
||||||
m2=n
|
m2=n
|
||||||
</panel_attributes>
|
</panel_attributes>
|
||||||
<additional_attributes>10.0;120.0;10.0;10.0</additional_attributes>
|
<additional_attributes>10.0;130.0;10.0;10.0</additional_attributes>
|
||||||
|
</element>
|
||||||
|
<element>
|
||||||
|
<id>UMLClass</id>
|
||||||
|
<coordinates>
|
||||||
|
<x>576</x>
|
||||||
|
<y>264</y>
|
||||||
|
<w>144</w>
|
||||||
|
<h>80</h>
|
||||||
|
</coordinates>
|
||||||
|
<panel_attributes>route
|
||||||
|
--
|
||||||
|
<<PK>>
|
||||||
|
- route_id: INTEGER
|
||||||
|
--
|
||||||
|
- name: TEXT UNIQUE
|
||||||
|
--
|
||||||
|
style=autoresize</panel_attributes>
|
||||||
|
<additional_attributes/>
|
||||||
|
</element>
|
||||||
|
<element>
|
||||||
|
<id>UMLClass</id>
|
||||||
|
<coordinates>
|
||||||
|
<x>208</x>
|
||||||
|
<y>96</y>
|
||||||
|
<w>152</w>
|
||||||
|
<h>80</h>
|
||||||
|
</coordinates>
|
||||||
|
<panel_attributes>browser
|
||||||
|
--
|
||||||
|
<<PK>>
|
||||||
|
- browser_id: INTEGER
|
||||||
|
--
|
||||||
|
- name: TEXT UNIQUE
|
||||||
|
--
|
||||||
|
style=autoresize</panel_attributes>
|
||||||
|
<additional_attributes/>
|
||||||
|
</element>
|
||||||
|
<element>
|
||||||
|
<id>Relation</id>
|
||||||
|
<coordinates>
|
||||||
|
<x>144</x>
|
||||||
|
<y>168</y>
|
||||||
|
<w>32</w>
|
||||||
|
<h>96</h>
|
||||||
|
</coordinates>
|
||||||
|
<panel_attributes>lt=-
|
||||||
|
m1=n
|
||||||
|
m2=1
|
||||||
|
</panel_attributes>
|
||||||
|
<additional_attributes>10.0;100.0;10.0;10.0</additional_attributes>
|
||||||
|
</element>
|
||||||
|
<element>
|
||||||
|
<id>UMLClass</id>
|
||||||
|
<coordinates>
|
||||||
|
<x>392</x>
|
||||||
|
<y>96</y>
|
||||||
|
<w>152</w>
|
||||||
|
<h>80</h>
|
||||||
|
</coordinates>
|
||||||
|
<panel_attributes>referer
|
||||||
|
--
|
||||||
|
<<PK>>
|
||||||
|
- referer_id: INTEGER
|
||||||
|
--
|
||||||
|
- name: TEXT UNIQUE
|
||||||
|
--
|
||||||
|
style=autoresize</panel_attributes>
|
||||||
|
<additional_attributes/>
|
||||||
|
</element>
|
||||||
|
<element>
|
||||||
|
<id>Relation</id>
|
||||||
|
<coordinates>
|
||||||
|
<x>400</x>
|
||||||
|
<y>168</y>
|
||||||
|
<w>32</w>
|
||||||
|
<h>88</h>
|
||||||
|
</coordinates>
|
||||||
|
<panel_attributes>lt=-
|
||||||
|
m1=n
|
||||||
|
m2=1
|
||||||
|
</panel_attributes>
|
||||||
|
<additional_attributes>10.0;90.0;10.0;10.0</additional_attributes>
|
||||||
|
</element>
|
||||||
|
<element>
|
||||||
|
<id>Relation</id>
|
||||||
|
<coordinates>
|
||||||
|
<x>496</x>
|
||||||
|
<y>288</y>
|
||||||
|
<w>96</w>
|
||||||
|
<h>40</h>
|
||||||
|
</coordinates>
|
||||||
|
<panel_attributes>lt=-
|
||||||
|
m1=n
|
||||||
|
m2=1
|
||||||
|
</panel_attributes>
|
||||||
|
<additional_attributes>10.0;20.0;100.0;20.0</additional_attributes>
|
||||||
</element>
|
</element>
|
||||||
</diagram>
|
</diagram>
|
||||||
|
@ -5,8 +5,18 @@ from os import path, listdir
|
|||||||
import pkg_resources
|
import pkg_resources
|
||||||
import re
|
import re
|
||||||
from datetime import datetime as dt
|
from datetime import datetime as dt
|
||||||
|
|
||||||
|
if __name__ == "__main__": # make relative imports work as described here: https://peps.python.org/pep-0366/#proposed-change
|
||||||
|
if __package__ is None:
|
||||||
|
__package__ = "regina"
|
||||||
|
import sys
|
||||||
|
from os import path
|
||||||
|
filepath = path.realpath(path.abspath(__file__))
|
||||||
|
print(path.dirname(path.dirname(path.dirname(filepath))))
|
||||||
|
sys.path.insert(0, path.dirname(path.dirname(path.dirname(filepath))))
|
||||||
|
|
||||||
# local
|
# local
|
||||||
from .utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
from .utility.sql_util import replace_null, sanitize, sql_select, sql_exists
|
||||||
from .utility.utility import pdebug, get_filepath, warning, pmessage
|
from .utility.utility import pdebug, get_filepath, warning, pmessage
|
||||||
from .utility.globals import settings
|
from .utility.globals import settings
|
||||||
from .db_operation.request import Request
|
from .db_operation.request import Request
|
||||||
@ -16,97 +26,6 @@ from .utility.globals import visitor_agent_operating_systems, visitor_agent_brow
|
|||||||
create reginas database as shown in the uml diagram database.uxf
|
create reginas database as shown in the uml diagram database.uxf
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Entry:
|
|
||||||
"""
|
|
||||||
represents an sql entry
|
|
||||||
type_ is INTEGER, TEXT, REAL...
|
|
||||||
"""
|
|
||||||
def __init__(self, name, type_) -> None:
|
|
||||||
self.name = name
|
|
||||||
self.type_ = type_
|
|
||||||
def __repr__(self):
|
|
||||||
return f"[{self.name}] {self.type_}"
|
|
||||||
|
|
||||||
class Table:
|
|
||||||
def __init__(self, name, key: Entry, entries: list[Entry]=[], constaints: list[str]=[]):
|
|
||||||
self.name = name
|
|
||||||
self.key = key
|
|
||||||
self.entries = entries
|
|
||||||
self.constaints = constaints
|
|
||||||
def create_sql_str(self):
|
|
||||||
return f"CREATE TABLE IF NOT EXISTS {self.name}\n({self})\n"
|
|
||||||
def __repr__(self):
|
|
||||||
s = f"{self.key} PRIMARY KEY"
|
|
||||||
for entry in self.entries:
|
|
||||||
s += f", {entry}"
|
|
||||||
for c in self.constaints:
|
|
||||||
s += f", {c}"
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
t_request = "request"
|
|
||||||
t_file = "file"
|
|
||||||
t_filegroup = "filegroup"
|
|
||||||
t_visitor = "visitor"
|
|
||||||
t_city = "city"
|
|
||||||
t_country = "country"
|
|
||||||
t_ip_range = "ip_range"
|
|
||||||
|
|
||||||
visitor_id = Entry("visitor_id", "INTEGER")
|
|
||||||
request_id = Entry("request_id", "INTEGER")
|
|
||||||
filegroup_id = Entry("group_id", "INTEGER")
|
|
||||||
ip_address_entry = Entry("ip_address", "INTEGER")
|
|
||||||
filename_entry = Entry("filename", "TEXT")
|
|
||||||
city_id = Entry("city_id", "INTEGER")
|
|
||||||
country_id = Entry("country_id", "INTEGER")
|
|
||||||
ip_range_id = Entry("ip_range_id", "INTEGER")
|
|
||||||
|
|
||||||
database_tables = {
|
|
||||||
t_visitor: Table(t_visitor, visitor_id, [
|
|
||||||
Entry("ip_address", "INTEGER"),
|
|
||||||
Entry("visitor_agent", "TEXT"),
|
|
||||||
Entry("platform", "TEXT"),
|
|
||||||
Entry("browser", "TEXT"),
|
|
||||||
Entry("mobile", "INTEGER"),
|
|
||||||
Entry("is_human", "INTEGER"),
|
|
||||||
ip_range_id,
|
|
||||||
],
|
|
||||||
[f"UNIQUE({visitor_id.name})"]),
|
|
||||||
t_file: Table(t_file, filename_entry,
|
|
||||||
[filegroup_id],
|
|
||||||
[f"UNIQUE({filename_entry.name})"]),
|
|
||||||
t_filegroup: Table(t_filegroup, filegroup_id,
|
|
||||||
[Entry("groupname", "TEXT")],
|
|
||||||
[f"UNIQUE({filegroup_id.name})"]),
|
|
||||||
t_request: Table(t_request, request_id, [
|
|
||||||
visitor_id,
|
|
||||||
filegroup_id,
|
|
||||||
Entry("date", "INTEGER"),
|
|
||||||
Entry("referer", "TEXT"),
|
|
||||||
Entry("status", "INTEGER")
|
|
||||||
],
|
|
||||||
["UNIQUE(request_id)"]),
|
|
||||||
t_ip_range: Table(t_ip_range, ip_range_id, [
|
|
||||||
Entry("lower", "INTEGER"),
|
|
||||||
Entry("upper", "INTEGER"),
|
|
||||||
city_id,
|
|
||||||
],
|
|
||||||
[f"UNIQUE({ip_range_id.name})"]),
|
|
||||||
t_city: Table(t_city, city_id, [
|
|
||||||
country_id,
|
|
||||||
Entry("name", "TEXT"),
|
|
||||||
Entry("region", "TEXT"),
|
|
||||||
],
|
|
||||||
[f"UNIQUE({city_id.name})"]),
|
|
||||||
t_country: Table(t_country, country_id, [
|
|
||||||
Entry("name", "TEXT"),
|
|
||||||
Entry("code", "TEXT"),
|
|
||||||
],
|
|
||||||
[f"UNIQUE({country_id.name})"]),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Database:
|
class Database:
|
||||||
def __init__(self, database_path):
|
def __init__(self, database_path):
|
||||||
self.conn = sql.connect(database_path)
|
self.conn = sql.connect(database_path)
|
||||||
@ -118,6 +37,7 @@ class Database:
|
|||||||
with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file:
|
with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file:
|
||||||
create_db = file.read()
|
create_db = file.read()
|
||||||
self.cur.execute(create_db)
|
self.cur.execute(create_db)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
def __call__(self, s):
|
def __call__(self, s):
|
||||||
"""execute a command and return fetchall()"""
|
"""execute a command and return fetchall()"""
|
||||||
@ -127,42 +47,27 @@ class Database:
|
|||||||
#
|
#
|
||||||
# VISITOR
|
# VISITOR
|
||||||
#
|
#
|
||||||
def visitor_exists(self, request) -> bool:
|
|
||||||
if settings["hash_ip_address"]:
|
|
||||||
ip_address = hash(request.ip_address)
|
|
||||||
else:
|
|
||||||
ip_address = request.ip_address
|
|
||||||
if settings["unique_visitor_is_ip_address"]:
|
|
||||||
return sql_exists(self.cur, t_visitor, [("ip_address", ip_address)])
|
|
||||||
else:
|
|
||||||
return sql_exists(self.cur, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])
|
|
||||||
|
|
||||||
def is_visitor_human(self, visitor_id: int):
|
def is_visitor_human(self, visitor_id: int):
|
||||||
"""
|
"""
|
||||||
check if they have a known platform AND browser
|
check if they have a known platform AND browser
|
||||||
check if at least one request did not result in an error (http status >= 400)
|
if settings "human_needs_success": check if at least one request did not result in an error (http status >= 400)
|
||||||
"""
|
"""
|
||||||
max_success_status = 400
|
max_success_status = 400
|
||||||
if settings["status_300_is_success"]: max_success_status = 300
|
if settings["status_300_is_success"]: max_success_status = 300
|
||||||
self.cur.execute(f"SELECT browser, platform FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
self.cur.execute(f"SELECT browser_id, platform_id FROM visitor WHERE visitor_id = {visitor_id}")
|
||||||
browsers_and_platforms = self.cur.fetchall()
|
browsers_and_platforms = self.cur.fetchall()
|
||||||
if len(browsers_and_platforms) != 1:
|
if len(browsers_and_platforms) != 1:
|
||||||
pdebug(f"is_visitor_human: {visitor_id} - could not find visitor or found too many")
|
pdebug(f"is_visitor_human: {visitor_id} - could not find visitor or found too many")
|
||||||
return False
|
return False
|
||||||
if not browsers_and_platforms[0][0] in visitor_agent_browsers:
|
browser = self.get_name("browser", browsers_and_platforms[0][0])
|
||||||
|
if not browser in visitor_agent_browsers:
|
||||||
return False
|
return False
|
||||||
if not browsers_and_platforms[0][1] in visitor_agent_operating_systems:
|
platform = self.get_name("platform", browsers_and_platforms[0][1])
|
||||||
|
if not platform in visitor_agent_operating_systems:
|
||||||
return False
|
return False
|
||||||
# check if has browser
|
|
||||||
# self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_visitor} WHERE visitor_id = {visitor_id} AND platform IS NOT NULL AND browser IS NOT NULL)")
|
|
||||||
# if no browser and platform
|
|
||||||
# exists = self.cur.fetchone()
|
|
||||||
# if exists is None or exists[0] == 0:
|
|
||||||
# return False
|
|
||||||
# if human needs successful request
|
|
||||||
if settings["human_needs_success"]:
|
if settings["human_needs_success"]:
|
||||||
# check if at least request was successful (status < 400)
|
# check if at least request was successful (status < 400)
|
||||||
self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_request} WHERE visitor_id = {visitor_id} AND status < {max_success_status})")
|
self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM request WHERE visitor_id = {visitor_id} AND status < {max_success_status})")
|
||||||
if self.cur.fetchone()[0] == 1:
|
if self.cur.fetchone()[0] == 1:
|
||||||
# pdebug(f"is_visitor_human: Visitor {visitor_id} is human")
|
# pdebug(f"is_visitor_human: Visitor {visitor_id} is human")
|
||||||
pass
|
pass
|
||||||
@ -171,67 +76,85 @@ class Database:
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_visitor_id(self, request: Request) -> int:
|
def get_visitor_id(self, request: Request, insert=True) -> int | None:
|
||||||
"""
|
"""
|
||||||
get the visitor_id. Adds the visitor if not already existing
|
get the visitor_id. Adds the visitor if not already existing
|
||||||
"""
|
"""
|
||||||
|
"""
|
||||||
|
get the visitor_id:
|
||||||
|
If settings unique_visitor_is_ip_address: Check if visitor with ip address exists
|
||||||
|
Else: check if visitor with ip_address, browser and platform exists
|
||||||
|
|
||||||
|
If visitor does not exist and insert: insert, return id
|
||||||
|
Else: return None
|
||||||
|
"""
|
||||||
if settings["hash_ip_address"]:
|
if settings["hash_ip_address"]:
|
||||||
ip_address = hash(request.ip_address)
|
ip_address = hash(request.ip_address)
|
||||||
else:
|
else:
|
||||||
ip_address = request.ip_address
|
ip_address = request.ip_address
|
||||||
|
|
||||||
if self.visitor_exists(request):
|
# if insert == True, ids will be int
|
||||||
if settings["unique_visitor_is_ip_address"]:
|
browser_id: int | None = self.get_id("browser", request.get_browser(), insert=insert)
|
||||||
visitor_id = sql_select(self.cur, t_visitor, [("ip_address", ip_address)])[0][0]
|
platform_id: int | None = self.get_id("platform", request.get_platform(), insert=insert)
|
||||||
else:
|
constraints = [("ip_address", ip_address)]
|
||||||
visitor_id = sql_select(self.cur, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])[0][0]
|
if not settings["unique_visitor_is_ip_address"]:
|
||||||
else: # new visitor
|
if browser_id: constraints.append(("browser_id", browser_id))
|
||||||
# new visitor_id is number of elements
|
if platform_id: constraints.append(("platform_id", platform_id))
|
||||||
visitor_id = sql_max(self.cur, t_visitor, "visitor_id") + 1
|
require_update_is_human = False
|
||||||
# pdebug("new visitor:", visitor_id, request.ip_address)
|
if not sql_exists(self.cur, "visitor", constraints):
|
||||||
platform, browser, mobile = get_os_browser_pairs_from_agent(request.visitor_agent)
|
require_update_is_human = True
|
||||||
ip_range_id_val = 0
|
if not insert:
|
||||||
|
return None
|
||||||
|
is_mobile = int(request.get_mobile())
|
||||||
|
ip_range_id = 0
|
||||||
if settings["get_visitor_location"]:
|
if settings["get_visitor_location"]:
|
||||||
ip_range_id_val = get_ip_range_id(self.cur, request.ip_address)
|
ip_range_id = self.get_ip_range_id(request.ip_address)
|
||||||
is_human = 0 # is_visitor_human cannot be called until visitor is in db int(is_visitor_human(self.cur, visitor_id))
|
is_human = 0 # is_visitor_human cannot be called until visitor is in db
|
||||||
self.cur.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');")
|
self.cur.execute(f"INSERT INTO visitor (ip_address, ip_range_id, platform_id, browser_id, is_mobile, is_human, ip_range_id) VALUES ('{ip_address}', '{ip_range_id}', '{platform_id}', '{browser_id}', '{is_mobile}', '{is_human}');")
|
||||||
|
visitor_id = sql_select(self.cur, "visitor", constraints)[0][0]
|
||||||
|
# TODO: if requests are not added yet, visitor might not be recognized since it does not have a successful requets yet
|
||||||
|
if require_update_is_human:
|
||||||
|
is_human = self.is_visitor_human(visitor_id)
|
||||||
|
if is_human:
|
||||||
|
self.cur.execute(f"UPDATE visitor SET is_human = 1 WHERE visitor_id = {visitor_id}")
|
||||||
return visitor_id
|
return visitor_id
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# REQUEST
|
# REQUEST
|
||||||
#
|
#
|
||||||
def request_exists(self, request: Request, visitor_id: int, group_id: int):
|
def request_exists(self, request: Request, visitor_id: int, route_id: int):
|
||||||
# get all requests from same visitor to same location
|
"""
|
||||||
# TODO this looks wrong
|
Check if a request from same visitor was made to same location in the same day, if setting "request_is_same_on_same_day" is True
|
||||||
self.cur.execute(f"SELECT request_id, date FROM {t_request} WHERE visitor_id = '{visitor_id}' AND group_id = '{group_id}'")
|
If not, always returns False
|
||||||
|
"""
|
||||||
|
if not settings["request_is_same_on_same_day"]: return False
|
||||||
|
# get all requests from same visitor to same route
|
||||||
|
self.cur.execute(f"SELECT request_id, time FROM request WHERE visitor_id = '{visitor_id}' AND = route_id = '{route_id}'")
|
||||||
|
# check if on same day
|
||||||
date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d")
|
date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d")
|
||||||
for request_id, date1 in self.cur.fetchall():
|
for request_id, date1 in self.cur.fetchall():
|
||||||
if settings["request_is_same_on_same_day"]:
|
date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
|
||||||
date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
|
if date0 == date1:
|
||||||
if date0 == date1:
|
pdebug(f"request_exists: Request is on same day as request {request_id}")
|
||||||
pdebug(f"request_exists: Request is on same day as request {request_id}")
|
return True
|
||||||
return True
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def add_request(self, request: Request) -> (int | None):
|
def add_request(self, request: Request) -> (int | None):
|
||||||
"""returns visitor_id if new request was added, else None"""
|
"""returns visitor_id if new request was added, else None"""
|
||||||
# skip requests to blacklisted locations
|
|
||||||
if request_blacklist:
|
|
||||||
if re.fullmatch(request_blacklist, request.request_file):
|
|
||||||
# pdebug(f"add_requests_to_db: request on blacklist '{request.request_file}'")
|
|
||||||
return None
|
|
||||||
# pdebug("add_requests_to_db:", i, "request:", request)
|
|
||||||
visitor_id = self.get_visitor_id(request)
|
visitor_id = self.get_visitor_id(request)
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
group_id: int = self.get_filegroup(request.request_file)
|
# browser_id = self.get_id("browser", request.get_browser())
|
||||||
|
# platform_id = self.get_id("platform", request.get_platform())
|
||||||
|
referer_id = self.get_id("referer", request.referer)
|
||||||
|
route_id = self.get_id("route", request.route)
|
||||||
# check if request is unique
|
# check if request is unique
|
||||||
if self.request_exists(request, visitor_id, group_id):
|
if self.request_exists(request, visitor_id, route_id):
|
||||||
# pdebug("request exists:", request)
|
# pdebug("request exists:", request)
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
# pdebug("new request:", request)
|
# pdebug("new request:", request)
|
||||||
sql_insert(t_request, [[None, visitor_id, group_id, request.time_local, request.referer, request.status]])
|
self.cur.execute(f"INSERT INTO request (visitor_id, route_id, referer_id, time, status) VALUES ({visitor_id}, {route_id}, {referer_id}, {request.time_local}, {request.status})")
|
||||||
return visitor_id
|
return visitor_id
|
||||||
|
|
||||||
def add_requests(self, requests: list[Request]):
|
def add_requests(self, requests: list[Request]):
|
||||||
@ -246,53 +169,50 @@ class Database:
|
|||||||
|
|
||||||
# update the is_human column for all new visitors
|
# update the is_human column for all new visitors
|
||||||
for visitor_id in new_visitors:
|
for visitor_id in new_visitors:
|
||||||
if not sql_exists(self.cur, t_visitor, [(str(visitor_id), "visitor_id")]): continue
|
# TODO this does not look right
|
||||||
is_human = self.is_visitor_human(visitor_id)
|
if not sql_exists(self.cur, "visitor", [("visitor_id", visitor_id)]): continue
|
||||||
self.cur.execute(f"SELECT * FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
|
||||||
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {self.cur.fetchall()}")
|
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {self.cur.fetchall()}")
|
||||||
if is_human:
|
|
||||||
self.cur.execute(f"UPDATE {t_visitor} SET is_human = 1 WHERE visitor_id = {visitor_id}")
|
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
pmessage(f"Collection Summary: Added {len(new_visitors)} new visitors and {added_requests} new requests.")
|
pmessage(f"Collection Summary: Added {len(new_visitors)} new visitors and {added_requests} new requests.")
|
||||||
|
|
||||||
#
|
|
||||||
# FILE(GROUP)
|
|
||||||
#
|
|
||||||
def get_filegroup(self, filename: str) -> int:
|
|
||||||
"""
|
|
||||||
get the filegroup
|
|
||||||
returns the group where
|
|
||||||
1) filename is the groupname
|
|
||||||
2) the filetype of filename is the groupname
|
|
||||||
3) new group with filename as gorupname
|
|
||||||
"""
|
|
||||||
# pdebug(f"get_filegroup: {filename}")
|
|
||||||
if sql_exists(self.cur, t_file, [("filename", filename)]):
|
|
||||||
return sql_select(self.cur, t_file, [("filename", filename)])[0][1]
|
|
||||||
else:
|
|
||||||
suffix = filename.split('.')[-1]
|
|
||||||
self.cur.execute(f"SELECT group_id FROM {t_filegroup} WHERE groupname = '{suffix}'")
|
|
||||||
# self.cur.execute(f"SELECT group_id FROM {t_filegroup} WHERE groupname LIKE '%.{suffix}'")
|
|
||||||
group_id_candidates = self.cur.fetchall()
|
|
||||||
# pdebug(f"get_filegroup: file={filename} candidates={group_id_candidates}")
|
|
||||||
if group_id_candidates:
|
|
||||||
return group_id_candidates[0][0]
|
|
||||||
else: # add new group file filename
|
|
||||||
group_id = sql_max(self.cur, t_filegroup, "group_id") + 1
|
|
||||||
|
|
||||||
# pdebug("new file(group):", group_id, filename)
|
def get_id(self, table: str, name: str, insert=True) -> int | None:
|
||||||
# add group
|
"""
|
||||||
sql_insert(self.cur, t_filegroup, [[group_id, filename]])
|
get the id of name in table
|
||||||
# add file
|
if name is not in table:
|
||||||
sql_insert(self.cur, t_file, [[filename, group_id]])
|
if insert: add and return id
|
||||||
return group_id
|
else: return None
|
||||||
|
supported tables: platform, browser, referer, route, city
|
||||||
|
"""
|
||||||
|
supported_tables = ["platform", "browser", "referer", "route", "city"]
|
||||||
|
if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})")
|
||||||
|
name = sanitize(replace_null(name))
|
||||||
|
# if non existent, add name
|
||||||
|
if not sql_exists(self.cur, table, [("name", name)]):
|
||||||
|
if not insert: return None
|
||||||
|
self.cur.execute(f"INSERT INTO {table} (name) VALUES ('{name}')")
|
||||||
|
return self(f"SELECT {table}_id FROM {table} WHERE name = '{name}'")[0][0]
|
||||||
|
|
||||||
|
def get_name(self, table: str, id_: int) -> (str | None):
|
||||||
|
"""
|
||||||
|
get the name of id in table
|
||||||
|
if id is not in table, returns None
|
||||||
|
supported tables: platform, browser, referer, route, city
|
||||||
|
"""
|
||||||
|
supported_tables = ["platform", "browser", "referer", "route", "city"]
|
||||||
|
if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})")
|
||||||
|
ret = self(f"SELECT name FROM {table} WHERE {table}_id = '{id_}'")
|
||||||
|
if len(ret) == 0: return None
|
||||||
|
# TODO check if this returns tuple or value
|
||||||
|
return ret[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# GEOIP
|
# GEOIP
|
||||||
#
|
#
|
||||||
def get_ip_range_id(self, ip_address: int):
|
def get_ip_range_id(self, ip_address: int) -> int:
|
||||||
self.cur.execute(f"SELECT {ip_range_id.name} FROM {t_ip_range} WHERE '{ip_address}' BETWEEN lower AND upper")
|
results = self(f"SELECT ip_range_id FROM ip_range WHERE '{ip_address}' BETWEEN low AND high")
|
||||||
results = self.cur.fetchall()
|
|
||||||
ip_range_id_val = 0
|
ip_range_id_val = 0
|
||||||
if len(results) == 0:
|
if len(results) == 0:
|
||||||
pass
|
pass
|
||||||
@ -302,174 +222,114 @@ class Database:
|
|||||||
ip_range_id_val = results[0][0]
|
ip_range_id_val = results[0][0]
|
||||||
return ip_range_id_val
|
return ip_range_id_val
|
||||||
|
|
||||||
|
|
||||||
def update_ip_range_id(self, visitor_id: int):
|
def update_ip_range_id(self, visitor_id: int):
|
||||||
self.cur.execute(f"SELECT ip_address FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
"""
|
||||||
results = self.cur.fetchall()
|
update the ip_range_id column of visitor with visitor_id
|
||||||
if len(results) == 0:
|
"""
|
||||||
|
results = self(f"SELECT ip_address FROM visitor WHERE visitor_id = {visitor_id}")
|
||||||
|
if len(results) == 0: # sanity checks
|
||||||
warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}")
|
warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}")
|
||||||
return
|
return
|
||||||
elif len(results) > 1:
|
elif len(results) > 1:
|
||||||
warning(f"update_ip_range_id: Found multiple ip_addresses for visitor_id={visitor_id}: results={results}")
|
warning(f"update_ip_range_id: Found multiple ip_addresses for visitor_id={visitor_id}: results={results}")
|
||||||
return
|
return
|
||||||
ip_address = results[0][0]
|
ip_address = results[0][0]
|
||||||
self.cur.execute(f"UPDATE {t_visitor} SET {ip_range_id.name} = '{get_ip_range_id(self.cur, ip_address)}' WHERE visitor_id = '{visitor_id}'")
|
self.cur.execute(f"UPDATE visitor SET ip_range_id = '{self.get_ip_range_id(ip_address)}' WHERE visitor_id = '{visitor_id}'")
|
||||||
|
|
||||||
def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
|
|
||||||
"""
|
|
||||||
TODO: make re-usable (alter groups when config changes)
|
def get_country_id(self, name, code) -> int:
|
||||||
"""
|
"""
|
||||||
# filegroup_str: 'name1: file1, file2, file3; name2: file33'
|
get the id of country of name
|
||||||
groups = filegroup_str.strip(";").split(";")
|
if not present, insert and return id
|
||||||
pdebug("create_filegroups:", groups)
|
"""
|
||||||
for group in groups:
|
if not sql_exists(self.cur, "country", [("name", name)]):
|
||||||
name, vals = group.split(":")
|
self.cur.execute(f"INSERT INTO country (name, code) VALUES ('{name}', '{code}')")
|
||||||
# create/get group
|
countries = self(f"SELECT country_id FROM country WHERE name = '{name}'")
|
||||||
if sql_exists(cursor, t_filegroup, [("groupname", name)]):
|
if len(countries) > 0:
|
||||||
group_id = sql_select(cursor, t_filegroup, [("groupname", name)])[0][0]
|
country_id_val = countries[0][0]
|
||||||
else:
|
else:
|
||||||
group_id = sql_max(cursor, t_filegroup, "group_id") + 1
|
warning(f"get_country_id: Could not get country_id for name='{name}'.")
|
||||||
sql_insert(cursor, t_filegroup, [(group_id, name)])
|
return 0
|
||||||
# pdebug("create_filegroups: group_id", group_id)
|
assert(type(country_id_val) == int)
|
||||||
# create/edit file
|
return country_id_val
|
||||||
for filename in vals.split(","):
|
|
||||||
if sql_exists(cursor, t_file, [("filename", filename)]): # if exist, update
|
|
||||||
cursor.execute(f"UPDATE {t_file} SET group_id = {group_id} WHERE filename = '{filename}'")
|
|
||||||
else:
|
|
||||||
sql_insert(cursor, t_file, [[filename, group_id]])
|
|
||||||
|
|
||||||
def get_files_from_dir_rec(p: str, files: list[str]):
|
def get_city_id(self, name, region, country_id) -> int:
|
||||||
"""recursivly append all files to files"""
|
if not sql_exists(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)]):
|
||||||
pdebug("get_files_from_dir_rec:",p)
|
self.cur.execute(f"INSERT INTO city (name, region, country_id) VALUES ('{name}', '{region}', '{country_id}')")
|
||||||
if path.isfile(p):
|
cities = sql_select(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)])
|
||||||
files.append(p)
|
if len(cities) > 0:
|
||||||
elif path.isdir(p):
|
city_id_val = cities[0][0]
|
||||||
for p_ in listdir(p):
|
else:
|
||||||
get_files_from_dir_rec(p + "/" + p_, files)
|
warning(f"get_city_id: Could not get city_id for name='{name}', region='{region}' and country_id='{country_id}'.")
|
||||||
|
return 0
|
||||||
|
assert(type(city_id_val) == int)
|
||||||
|
return city_id_val
|
||||||
|
|
||||||
def get_auto_filegroup_str(location_and_dirs:list[tuple[str, str]], auto_group_filetypes:list[str]) -> str:
|
def update_geoip_tables(self, geoip_city_csv_path: str):
|
||||||
"""
|
"""
|
||||||
:param list of nginx locations and the corresponding directories
|
update the geoip data with the contents of the geoip_city_csv file
|
||||||
:param auto_filetype_groups list of filetypes for auto grouping
|
|
||||||
"""
|
|
||||||
files: list[str] = []
|
|
||||||
start_i = 0
|
|
||||||
if len(location_and_dirs) > 0 and len(location_and_dirs[0]) == 2:
|
|
||||||
for location, dir_ in location_and_dirs:
|
|
||||||
get_files_from_dir_rec(dir_, files)
|
|
||||||
# replace dir_ with location, eg /www/website with /
|
|
||||||
for i in range(start_i, len(files)):
|
|
||||||
files[i] = files[i].replace(dir_, location).replace("//", "/")
|
|
||||||
filegroups = ""
|
|
||||||
# create groups for each filetype
|
|
||||||
for ft in auto_group_filetypes:
|
|
||||||
filegroups += f"{ft}:"
|
|
||||||
for file in files:
|
|
||||||
if file.endswith(f".{ft}"):
|
|
||||||
filegroups += f"{file},"
|
|
||||||
filegroups = filegroups.strip(",") + ";"
|
|
||||||
pdebug("get_auto_filegroup_str: found files:", files, "filegroups_str:", filegroups)
|
|
||||||
return filegroups
|
|
||||||
|
|
||||||
def get_country_id(cur:sql.Cursor, name, code, country_tablesize):
|
Make sure to update the visitor.ip_range_id column for all visitors.
|
||||||
# countries = sql_select(cur, t_country, [("name", name)])
|
In case something changed, they might point to a different city. (won't fix)
|
||||||
cur.execute(f"SELECT {country_id.name} FROM {t_country} WHERE name = '{name}'")
|
"""
|
||||||
countries = cur.fetchall()
|
# indices for the csv
|
||||||
if len(countries) > 0:
|
FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5
|
||||||
country_id_val = countries[0][0]
|
with open(geoip_city_csv_path, 'r') as file:
|
||||||
else: # insert new country
|
csv = reader(file, delimiter=',', quotechar='"')
|
||||||
country_id_val = country_tablesize
|
# execute only if file could be opened
|
||||||
# pdebug(f"update_geoip_tables: Adding country #{country_id_val}, name={name}")
|
# delete all previous data
|
||||||
cur.execute(f"INSERT INTO {t_country} ({country_id.name}, name, code) VALUES ({country_id_val}, '{name}', '{code}')")
|
self.cur.execute(f"DELETE FROM ip_range")
|
||||||
country_tablesize += 1
|
self.cur.execute(f"DELETE FROM city")
|
||||||
return country_id_val, country_tablesize
|
self.cur.execute(f"DELETE FROM country")
|
||||||
|
self.cur.execute(f"VACUUM")
|
||||||
|
|
||||||
def get_city_id(cur: sql.Cursor, name, region, country_id, city_tablesize):
|
# guarantees that unkown city/country will have id 0
|
||||||
# cities = sql_select(cur, t_city, [("name", name)])
|
self.cur.execute(f"INSERT INTO country (country_id, name, code) VALUES (0, 'Unknown', 'XX') ")
|
||||||
cur.execute(f"SELECT {city_id.name} FROM {t_city} WHERE name = '{name}'")
|
self.cur.execute(f"INSERT INTO city (city_id, name, region) VALUES (0, 'Unknown', 'Unkown') ")
|
||||||
cities = cur.fetchall()
|
print(f"Recreating the geoip database from {geoip_city_csv_path}. This might take a long time...")
|
||||||
if len(cities) > 0:
|
|
||||||
city_id_val = cities[0][0]
|
|
||||||
else: # insert new city
|
|
||||||
city_id_val = city_tablesize
|
|
||||||
# pdebug(f"update_geoip_tables: Adding city #{city_id_val}, name={row[CITY]}, country={country_id_val}")
|
|
||||||
cur.execute(f"INSERT INTO {t_city} ({city_id.name}, name, region, country_id) VALUES ({city_id_val}, '{name}', '{region}', '{country_id}')")
|
|
||||||
city_tablesize += 1
|
|
||||||
return city_id_val, city_tablesize
|
|
||||||
|
|
||||||
def update_geoip_tables(cur: sql.Cursor, geoip_city_csv: str):
|
# for combining city ranges into a 'City in <Country>' range
|
||||||
FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5
|
# country_id for the range that was last added (for combining multiple csv rows in one ip_range)
|
||||||
ip_range_id_val = 0
|
RANGE_DONE = -1
|
||||||
with open(geoip_city_csv, 'r') as file:
|
combine_range_country_id = RANGE_DONE
|
||||||
# delete all previous data
|
combine_range_country_name = ""
|
||||||
cur.execute(f"DELETE FROM {t_ip_range}")
|
combine_range_low = RANGE_DONE
|
||||||
cur.execute(f"VACUUM")
|
combine_range_high = RANGE_DONE
|
||||||
csv = reader(file, delimiter=',', quotechar='"')
|
|
||||||
|
|
||||||
|
def add_range(low, high, city_name, region, country_id):
|
||||||
|
city_id = self.get_city_id(city_name, region, country_id)
|
||||||
|
pdebug(f"update_ip_range_id: Adding range for city={city_name}, country_id={country_id}, low={low}, high={high}")
|
||||||
|
self.cur.execute(f"INSERT INTO ip_range (low, high, city_id) VALUES ({low}, {high}, {city_id})")
|
||||||
|
for row in csv:
|
||||||
|
# these might contain problematic characters (')
|
||||||
|
row[CITY] = sanitize(row[CITY])
|
||||||
|
row[COUNTRY] = sanitize(row[COUNTRY])
|
||||||
|
row[REGION] = sanitize(row[REGION])
|
||||||
|
|
||||||
# guarantees that unkown city/country will have id 0
|
# make sure country exists
|
||||||
if not sql_exists(cur, t_country, [("name", "Unknown")]):
|
country_id = self.get_country_id(row[COUNTRY], row[CODE])
|
||||||
cur.execute(f"INSERT INTO {t_country} ({country_id.name}, name, code) VALUES (0, 'Unknown', 'XX') ")
|
# only add cities for countries the user is interested in
|
||||||
if not sql_exists(cur, t_city, [("name", "Unknown")]):
|
if row[CODE] in settings["get_cities_for_countries"]:
|
||||||
cur.execute(f"INSERT INTO {t_city} ({city_id.name}, name, region) VALUES (0, 'Unknown', 'Unkown') ")
|
add_range(row[FROM], row[TO], row[CITY], row[REGION], country_id)
|
||||||
country_tablesize = sql_tablesize(cur, t_country)
|
else:
|
||||||
city_tablesize = sql_tablesize(cur, t_city)
|
# if continuing
|
||||||
print(f"Recreating the geoip database from {geoip_city_csv}. This might take a long time...")
|
if combine_range_country_id != RANGE_DONE:
|
||||||
combine_range_country_id = 0
|
# if continuing previous range, extend the upper range limit
|
||||||
combine_range_lower = -1
|
if combine_range_country_id == country_id:
|
||||||
combine_range_upper = -1
|
combine_range_high = row[TO]
|
||||||
combine_range_country_name = ""
|
else: # new range for country, append
|
||||||
for row in csv:
|
add_range(combine_range_low, combine_range_high, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id)
|
||||||
# these might contain problematic characters (')
|
combine_range_country_id = RANGE_DONE
|
||||||
row[CITY] = sanitize(row[CITY])
|
# not elif, this has to be executed if previous else was executed
|
||||||
row[COUNTRY] = sanitize(row[COUNTRY])
|
if combine_range_country_id == RANGE_DONE : # currently in new range, combine with later ranges
|
||||||
row[REGION] = sanitize(row[REGION])
|
combine_range_country_id = country_id
|
||||||
|
combine_range_country_name = row[COUNTRY]
|
||||||
# make sure country exists
|
combine_range_low = row[FROM]
|
||||||
country_id_val, country_tablesize = get_country_id(cur, row[COUNTRY], row[CODE], country_tablesize)
|
combine_range_high = row[TO]
|
||||||
if row[CODE] in settings["get_cities_for_countries"]:
|
if combine_range_country_id >= 0: # last range , append
|
||||||
# make sure city exists
|
add_range(combine_range_low, combine_range_high, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id)
|
||||||
city_id_val, city_tablesize = get_city_id(cur, row[CITY], row[REGION], country_id_val, city_tablesize)
|
|
||||||
pdebug(f"update_ip_range_id: ip_range_id={ip_range_id_val}, Adding range for city={row[CITY]}, country={row[COUNTRY]}, lower={row[FROM]}, upper={row[TO]}")
|
|
||||||
cur.execute(f"INSERT INTO {t_ip_range} ({ip_range_id.name}, lower, upper, {city_id.name}) VALUES ({ip_range_id_val}, {row[FROM]}, {row[TO]}, {city_id_val})")
|
|
||||||
ip_range_id_val += 1
|
|
||||||
else:
|
|
||||||
if combine_range_country_id >= 0:
|
|
||||||
if combine_range_country_id == country_id_val: combine_range_upper = row[TO]
|
|
||||||
else: # new range for country, append
|
|
||||||
# get id for dummy city
|
|
||||||
pdebug(f"update_ip_range_id: ip_range_id={ip_range_id_val}, Adding combined range for country={combine_range_country_name}, lower={combine_range_lower}, upper={combine_range_upper}")
|
|
||||||
city_id_val, city_tablesize = get_city_id(cur, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id, city_tablesize)
|
|
||||||
cur.execute(f"INSERT INTO {t_ip_range} ({ip_range_id.name}, lower, upper, {city_id.name}) VALUES ({ip_range_id_val}, {combine_range_lower}, {combine_range_upper}, {city_id_val})")
|
|
||||||
ip_range_id_val += 1
|
|
||||||
combine_range_country_id = -1
|
|
||||||
if combine_range_country_id < 0 : # combine with later ranges
|
|
||||||
combine_range_country_id = country_id_val
|
|
||||||
combine_range_lower = row[FROM]
|
|
||||||
combine_range_upper = row[TO]
|
|
||||||
combine_range_country_name = row[COUNTRY]
|
|
||||||
if combine_range_country_id >= 0: # last range , append
|
|
||||||
# get id for dummy city
|
|
||||||
pdebug(f"update_ip_range_id: ip_range_id={ip_range_id_val}, Adding combined range for country={combine_range_country_name}, lower={combine_range_lower}, upper={combine_range_upper}")
|
|
||||||
city_id_val, city_tablesize = get_city_id(cur, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id, city_tablesize)
|
|
||||||
cur.execute(f"INSERT INTO {t_ip_range} ({ip_range_id.name}, lower, upper, {city_id.name}) VALUES ({ip_range_id_val}, {combine_range_lower}, {combine_range_upper}, {city_id_val})")
|
|
||||||
ip_range_id_val += 1
|
|
||||||
|
|
||||||
|
|
||||||
def create_db(db_name, filegroup_str="", location_and_dirs:list[tuple[str, str]]=[], auto_group_filetypes=[]):
|
|
||||||
"""
|
|
||||||
create the name with database_tables
|
|
||||||
"""
|
|
||||||
print(f"creating database: '{db_name}'")
|
|
||||||
conn = sql.connect(f"{db_name}")
|
|
||||||
cursor = conn.cursor()
|
|
||||||
for table in database_tables.values():
|
|
||||||
cursor.execute(table.create_sql_str())
|
|
||||||
filegroup_str = filegroup_str.strip("; ") + ";" + get_auto_filegroup_str(location_and_dirs, auto_group_filetypes)
|
|
||||||
create_filegroups(cursor, filegroup_str)
|
|
||||||
cursor.close()
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
create_db("test.db")
|
db = Database("test.db")
|
||||||
|
@ -13,7 +13,7 @@ class Request:
|
|||||||
def __init__(self, ip_address="", time_local="", request_type="", request_file="", request_protocol="", status="", bytes_sent="", referer="", visitor_agent=""):
|
def __init__(self, ip_address="", time_local="", request_type="", request_file="", request_protocol="", status="", bytes_sent="", referer="", visitor_agent=""):
|
||||||
self.ip_address = int(IPv4Address(sanitize(ip_address)))
|
self.ip_address = int(IPv4Address(sanitize(ip_address)))
|
||||||
self.time_local = 0
|
self.time_local = 0
|
||||||
#[20/Nov/2022:00:47:36 +0100]
|
# turn [20/Nov/2022:00:47:36 +0100] to unix time
|
||||||
m = match(r"\[(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+).*\]", time_local)
|
m = match(r"\[(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+).*\]", time_local)
|
||||||
if m:
|
if m:
|
||||||
g = m.groups()
|
g = m.groups()
|
||||||
@ -29,7 +29,7 @@ class Request:
|
|||||||
else:
|
else:
|
||||||
warning(f"Request:__init__: Could not match time: '{time_local}'")
|
warning(f"Request:__init__: Could not match time: '{time_local}'")
|
||||||
self.request_type = sanitize(request_type)
|
self.request_type = sanitize(request_type)
|
||||||
self.request_file = sanitize(request_file)
|
self.request_route = sanitize(request_file)
|
||||||
self.request_protocol = sanitize(request_protocol)
|
self.request_protocol = sanitize(request_protocol)
|
||||||
self.status = sanitize(status)
|
self.status = sanitize(status)
|
||||||
self.bytes_sent = sanitize(bytes_sent)
|
self.bytes_sent = sanitize(bytes_sent)
|
||||||
@ -37,9 +37,9 @@ class Request:
|
|||||||
self.visitor_agent = sanitize(visitor_agent)
|
self.visitor_agent = sanitize(visitor_agent)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"{self.ip_address} - {self.time_local} - {self.request_file} - {self.visitor_agent} - {self.status}"
|
return f"{self.ip_address} - {self.time_local} - {self.request_route} - {self.visitor_agent} - {self.status}"
|
||||||
|
|
||||||
def get_os(self):
|
def get_platform(self):
|
||||||
# for groups in findall(re_visitor_agent, visitor_agent):
|
# for groups in findall(re_visitor_agent, visitor_agent):
|
||||||
operating_system = ""
|
operating_system = ""
|
||||||
for os in visitor_agent_operating_systems:
|
for os in visitor_agent_operating_systems:
|
||||||
|
Loading…
Reference in New Issue
Block a user