class wrapper for the db
This commit is contained in:
parent
3457fff2c6
commit
ecc75560e3
@ -1,49 +1,12 @@
|
|||||||
import sqlite3 as sql
|
import sqlite3 as sql
|
||||||
from re import fullmatch, match
|
from re import fullmatch, match
|
||||||
from ipaddress import IPv4Address, ip_address
|
|
||||||
from time import mktime
|
|
||||||
from datetime import datetime as dt
|
|
||||||
from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, database_tables, get_filegroup, ip_range_id
|
from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, database_tables, get_filegroup, ip_range_id
|
||||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
||||||
from regina.utility.utility import pdebug, warning, pmessage
|
from regina.utility.utility import pdebug, warning, pmessage
|
||||||
from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
collect information from the access log and put it into the database
|
collect information from the access log and put it into the database
|
||||||
"""
|
"""
|
||||||
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aut", "Sep", "Oct", "Nov", "Dec"]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Request:
|
|
||||||
def __init__(self, ip_address="", time_local="", request_type="", request_file="", request_protocol="", status="", bytes_sent="", referer="", visitor_agent=""):
|
|
||||||
self.ip_address = int(IPv4Address(sanitize(ip_address)))
|
|
||||||
self.time_local = 0
|
|
||||||
#[20/Nov/2022:00:47:36 +0100]
|
|
||||||
m = match(r"\[(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+).*\]", time_local)
|
|
||||||
if m:
|
|
||||||
g = m.groups()
|
|
||||||
try:
|
|
||||||
if g[1] in months:
|
|
||||||
datetime_ = dt(int(g[2]), months.index(g[1])+1, int(g[0]), int(g[3]), int(g[4]), int(g[5]))
|
|
||||||
# pdebug(f"Request __init__: datetime {datetime_}, from {g}")
|
|
||||||
self.time_local = int(mktime(datetime_.timetuple()))
|
|
||||||
else:
|
|
||||||
warning(f"Request:__init__: Unkown month: '{g[1]}'. Using timestamp {self.time_local}")
|
|
||||||
except Exception as e:
|
|
||||||
warning(f"Request:__init__: {e}")
|
|
||||||
else:
|
|
||||||
warning(f"Request:__init__: Could not match time: '{time_local}'")
|
|
||||||
self.request_type = sanitize(request_type)
|
|
||||||
self.request_file = sanitize(request_file)
|
|
||||||
self.request_protocol = sanitize(request_protocol)
|
|
||||||
self.status = sanitize(status)
|
|
||||||
self.bytes_sent = sanitize(bytes_sent)
|
|
||||||
self.referer = sanitize(referer)
|
|
||||||
self.visitor_agent = sanitize(visitor_agent)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"{self.ip_address} - {self.time_local} - {self.request_file} - {self.visitor_agent} - {self.status}"
|
|
||||||
|
|
||||||
re_remote_addr = r"[0-9a-fA-F.:]+"
|
re_remote_addr = r"[0-9a-fA-F.:]+"
|
||||||
re_remote_visitor = ".*"
|
re_remote_visitor = ".*"
|
||||||
@ -54,6 +17,7 @@ re_body_bytes_sent = r'\d+'
|
|||||||
re_http_referer = r'"([^"]*)"'
|
re_http_referer = r'"([^"]*)"'
|
||||||
re_http_visitor_agent = r'"([^"]*)"'
|
re_http_visitor_agent = r'"([^"]*)"'
|
||||||
re_log_format: str = f'({re_remote_addr}) - ({re_remote_visitor}) ({re_time_local}) ({re_request}) ({re_status}) ({re_body_bytes_sent}) {re_http_referer} {re_http_visitor_agent}'
|
re_log_format: str = f'({re_remote_addr}) - ({re_remote_visitor}) ({re_time_local}) ({re_request}) ({re_status}) ({re_body_bytes_sent}) {re_http_referer} {re_http_visitor_agent}'
|
||||||
|
|
||||||
def parse_log(logfile:str) -> list[Request]:
|
def parse_log(logfile:str) -> list[Request]:
|
||||||
"""
|
"""
|
||||||
create Request objects from each line in the logfile
|
create Request objects from each line in the logfile
|
||||||
@ -77,171 +41,3 @@ def parse_log(logfile:str) -> list[Request]:
|
|||||||
status=g[4], bytes_sent=g[5], referer=g[6], visitor_agent=g[7]))
|
status=g[4], bytes_sent=g[5], referer=g[6], visitor_agent=g[7]))
|
||||||
return requests
|
return requests
|
||||||
|
|
||||||
|
|
||||||
def visitor_exists(cursor, request) -> bool:
|
|
||||||
if settings["hash_ip_address"]:
|
|
||||||
ip_address = hash(request.ip_address)
|
|
||||||
else:
|
|
||||||
ip_address = request.ip_address
|
|
||||||
if settings["unique_visitor_is_ip_address"]:
|
|
||||||
return sql_exists(cursor, t_visitor, [("ip_address", ip_address)])
|
|
||||||
else:
|
|
||||||
return sql_exists(cursor, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])
|
|
||||||
|
|
||||||
def get_visitor_id(request: Request, cursor: sql.Cursor) -> int:
|
|
||||||
"""
|
|
||||||
get the visitor_id. Adds the visitor if not already existing
|
|
||||||
"""
|
|
||||||
if settings["hash_ip_address"]:
|
|
||||||
ip_address = hash(request.ip_address)
|
|
||||||
else:
|
|
||||||
ip_address = request.ip_address
|
|
||||||
|
|
||||||
if visitor_exists(cursor, request):
|
|
||||||
if settings["unique_visitor_is_ip_address"]:
|
|
||||||
visitor_id = sql_select(cursor, t_visitor, [("ip_address", ip_address)])[0][0]
|
|
||||||
else:
|
|
||||||
visitor_id = sql_select(cursor, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])[0][0]
|
|
||||||
else: # new visitor
|
|
||||||
# new visitor_id is number of elements
|
|
||||||
visitor_id = sql_max(cursor, t_visitor, "visitor_id") + 1
|
|
||||||
# pdebug("new visitor:", visitor_id, request.ip_address)
|
|
||||||
platform, browser, mobile = get_os_browser_pairs_from_agent(request.visitor_agent)
|
|
||||||
ip_range_id_val = 0
|
|
||||||
if settings["get_visitor_location"]:
|
|
||||||
ip_range_id_val = get_ip_range_id(cursor, request.ip_address)
|
|
||||||
is_human = 0 # is_visitor_human cannot be called until visitor is in db int(is_visitor_human(cursor, visitor_id))
|
|
||||||
cursor.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');")
|
|
||||||
return visitor_id
|
|
||||||
|
|
||||||
def is_visitor_human(cur: sql.Cursor, visitor_id: int):
|
|
||||||
global settings
|
|
||||||
"""
|
|
||||||
check if they have a known platform AND browser
|
|
||||||
check if at least one request did not result in an error (http status >= 400)
|
|
||||||
"""
|
|
||||||
max_success_status = 400
|
|
||||||
if settings["status_300_is_success"]: max_success_status = 300
|
|
||||||
cur.execute(f"SELECT browser, platform FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
|
||||||
browsers_and_platforms = cur.fetchall()
|
|
||||||
if len(browsers_and_platforms) != 1:
|
|
||||||
pdebug(f"is_visitor_human: {visitor_id} - could not find visitor or found too many")
|
|
||||||
return False
|
|
||||||
if not browsers_and_platforms[0][0] in visitor_agent_browsers:
|
|
||||||
return False
|
|
||||||
if not browsers_and_platforms[0][1] in visitor_agent_operating_systems:
|
|
||||||
return False
|
|
||||||
# check if has browser
|
|
||||||
# cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_visitor} WHERE visitor_id = {visitor_id} AND platform IS NOT NULL AND browser IS NOT NULL)")
|
|
||||||
# if no browser and platform
|
|
||||||
# exists = cur.fetchone()
|
|
||||||
# if exists is None or exists[0] == 0:
|
|
||||||
# return False
|
|
||||||
# if human needs successful request
|
|
||||||
if settings["human_needs_success"]:
|
|
||||||
# check if at least request was successful (status < 400)
|
|
||||||
cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_request} WHERE visitor_id = {visitor_id} AND status < {max_success_status})")
|
|
||||||
if cur.fetchone()[0] == 1:
|
|
||||||
# pdebug(f"is_visitor_human: Visitor {visitor_id} is human")
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# pdebug(f"is_visitor_human: Visitor {visitor_id} only had unsuccessful requests")
|
|
||||||
return False
|
|
||||||
# visitor is human
|
|
||||||
return True
|
|
||||||
|
|
||||||
def request_exists(cur: sql.Cursor, request: Request, visitor_id: int, group_id: int):
|
|
||||||
# get all requests from same visitor to same location
|
|
||||||
cur.execute(f"SELECT request_id, date FROM {t_request} WHERE visitor_id = '{visitor_id}' AND group_id = '{group_id}'")
|
|
||||||
date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d")
|
|
||||||
for request_id, date1 in cur.fetchall():
|
|
||||||
if settings["request_is_same_on_same_day"]:
|
|
||||||
date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
|
|
||||||
if date0 == date1:
|
|
||||||
pdebug(f"request_exists: Request is on same day as request {request_id}")
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# re_visitor_agent = r"(?: ?([\w\- ]+)(?:\/([\w.]+))?(?: \(([^()]*)\))?)"
|
|
||||||
# 1: platform, 2: version, 3: details
|
|
||||||
def get_os_browser_pairs_from_agent(visitor_agent):
|
|
||||||
# for groups in findall(re_visitor_agent, visitor_agent):
|
|
||||||
operating_system = ""
|
|
||||||
browser = ""
|
|
||||||
mobile = "Mobi" in visitor_agent
|
|
||||||
for os in visitor_agent_operating_systems:
|
|
||||||
if os in visitor_agent:
|
|
||||||
operating_system = os
|
|
||||||
break
|
|
||||||
for br in visitor_agent_browsers:
|
|
||||||
if br in visitor_agent:
|
|
||||||
browser = br
|
|
||||||
break
|
|
||||||
# if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{visitor_agent}', found os: '{operating_system}' and browser: '{browser}'")
|
|
||||||
return operating_system, browser, mobile
|
|
||||||
|
|
||||||
|
|
||||||
def get_ip_range_id(cur: sql.Cursor, ip_address: int):
|
|
||||||
cur.execute(f"SELECT {ip_range_id.name} FROM {t_ip_range} WHERE '{ip_address}' BETWEEN lower AND upper")
|
|
||||||
results = cur.fetchall()
|
|
||||||
ip_range_id_val = 0
|
|
||||||
if len(results) == 0:
|
|
||||||
pass
|
|
||||||
elif len(results) > 1:
|
|
||||||
warning(f"get_countries: Found multiple ip_ranges for ip_address={ip_address}: results={results}")
|
|
||||||
else:
|
|
||||||
ip_range_id_val = results[0][0]
|
|
||||||
return ip_range_id_val
|
|
||||||
|
|
||||||
def update_ip_range_id(cur: sql.Cursor, visitor_id: int):
|
|
||||||
cur.execute(f"SELECT ip_address FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
|
||||||
results = cur.fetchall()
|
|
||||||
if len(results) == 0:
|
|
||||||
warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}")
|
|
||||||
return
|
|
||||||
elif len(results) > 1:
|
|
||||||
warning(f"update_ip_range_id: Found multiple ip_addresses for visitor_id={visitor_id}: results={results}")
|
|
||||||
return
|
|
||||||
ip_address = results[0][0]
|
|
||||||
cur.execute(f"UPDATE {t_visitor} SET {ip_range_id.name} = '{get_ip_range_id(cur, ip_address)}' WHERE visitor_id = '{visitor_id}'")
|
|
||||||
|
|
||||||
|
|
||||||
def add_requests_to_db(requests: list[Request], db_name: str):
|
|
||||||
conn = sql.connect(db_name)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
added_requests = 0
|
|
||||||
# check the new visitors later
|
|
||||||
max_visitor_id = sql_max(cursor, t_visitor, "visitor_id")
|
|
||||||
request_blacklist = settings["request_location_regex_blacklist"]
|
|
||||||
for i in range(len(requests)):
|
|
||||||
request = requests[i]
|
|
||||||
# skip requests to blacklisted locations
|
|
||||||
if request_blacklist:
|
|
||||||
if fullmatch(request_blacklist, request.request_file):
|
|
||||||
# pdebug(f"add_requests_to_db: request on blacklist '{request.request_file}'")
|
|
||||||
continue
|
|
||||||
# pdebug("add_requests_to_db:", i, "request:", request)
|
|
||||||
visitor_id = get_visitor_id(request, cursor)
|
|
||||||
conn.commit()
|
|
||||||
group_id: int = get_filegroup(request.request_file, cursor)
|
|
||||||
# check if request is unique
|
|
||||||
if request_exists(cursor, request, visitor_id, group_id):
|
|
||||||
# pdebug("request exists:", request)
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# pdebug("new request:", request)
|
|
||||||
request_id = sql_max(cursor, t_request, "request_id") + 1
|
|
||||||
sql_insert(cursor, t_request, [[request_id, visitor_id, group_id, request.time_local, request.referer, request.status]])
|
|
||||||
added_requests += 1
|
|
||||||
visitor_count = sql_tablesize(cursor, t_visitor)
|
|
||||||
for visitor_id in range(max_visitor_id, visitor_count):
|
|
||||||
if not sql_exists(cursor, t_visitor, [(str(visitor_id), "visitor_id")]): continue
|
|
||||||
is_human = is_visitor_human(cursor, visitor_id)
|
|
||||||
cursor.execute(f"SELECT * FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
|
||||||
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {cursor.fetchall()}")
|
|
||||||
if is_human:
|
|
||||||
cursor.execute(f"UPDATE {t_visitor} SET is_human = 1 WHERE visitor_id = {visitor_id}")
|
|
||||||
cursor.close()
|
|
||||||
conn.commit()
|
|
||||||
pmessage(f"Collection Summary: Added {visitor_count - max_visitor_id} new visitors and {added_requests} new requests.")
|
|
||||||
|
@ -2,10 +2,15 @@
|
|||||||
import sqlite3 as sql
|
import sqlite3 as sql
|
||||||
from csv import reader
|
from csv import reader
|
||||||
from os import path, listdir
|
from os import path, listdir
|
||||||
|
import pkg_resources
|
||||||
|
import re
|
||||||
|
from datetime import datetime as dt
|
||||||
# local
|
# local
|
||||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
from .utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
||||||
from regina.utility.utility import pdebug
|
from .utility.utility import pdebug, get_filepath, warning, pmessage
|
||||||
from regina.utility.globals import settings
|
from .utility.globals import settings
|
||||||
|
from .db_operation.request import Request
|
||||||
|
from .utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
|
||||||
|
|
||||||
"""
|
"""
|
||||||
create reginas database as shown in the uml diagram database.uxf
|
create reginas database as shown in the uml diagram database.uxf
|
||||||
@ -37,6 +42,8 @@ class Table:
|
|||||||
for c in self.constaints:
|
for c in self.constaints:
|
||||||
s += f", {c}"
|
s += f", {c}"
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
t_request = "request"
|
t_request = "request"
|
||||||
t_file = "file"
|
t_file = "file"
|
||||||
t_filegroup = "filegroup"
|
t_filegroup = "filegroup"
|
||||||
@ -100,36 +107,217 @@ database_tables = {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_filegroup(filename: str, cursor: sql.Cursor) -> int:
|
class Database:
|
||||||
"""
|
def __init__(self, database_path):
|
||||||
get the filegroup
|
self.conn = sql.connect(database_path)
|
||||||
returns the group where
|
self.cur = self.conn.cursor()
|
||||||
1) filename is the groupname
|
# verify that the database is created
|
||||||
2) the filetype of filename is the groupname
|
self.cur.execute("pragma schema_version")
|
||||||
3) new group with filename as gorupname
|
if self.cur.fetchone()[0] == 0: # not created
|
||||||
"""
|
pdebug(f"Database.__init__: Creating database at {database_path}")
|
||||||
# pdebug(f"get_filegroup: {filename}")
|
with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file:
|
||||||
if sql_exists(cursor, t_file, [("filename", filename)]):
|
create_db = file.read()
|
||||||
return sql_select(cursor, t_file, [("filename", filename)])[0][1]
|
self.cur.execute(create_db)
|
||||||
else:
|
|
||||||
suffix = filename.split('.')[-1]
|
|
||||||
cursor.execute(f"SELECT group_id FROM {t_filegroup} WHERE groupname = '{suffix}'")
|
|
||||||
# cursor.execute(f"SELECT group_id FROM {t_filegroup} WHERE groupname LIKE '%.{suffix}'")
|
|
||||||
group_id_candidates = cursor.fetchall()
|
|
||||||
# pdebug(f"get_filegroup: file={filename} candidates={group_id_candidates}")
|
|
||||||
if group_id_candidates:
|
|
||||||
return group_id_candidates[0][0]
|
|
||||||
else: # add new group file filename
|
|
||||||
group_id = sql_max(cursor, t_filegroup, "group_id") + 1
|
|
||||||
|
|
||||||
# pdebug("new file(group):", group_id, filename)
|
def __call__(self, s):
|
||||||
# add group
|
"""execute a command and return fetchall()"""
|
||||||
sql_insert(cursor, t_filegroup, [[group_id, filename]])
|
self.cur.execute(s)
|
||||||
# add file
|
return self.cur.fetchall()
|
||||||
sql_insert(cursor, t_file, [[filename, group_id]])
|
|
||||||
return group_id
|
#
|
||||||
|
# VISITOR
|
||||||
|
#
|
||||||
|
def visitor_exists(self, request) -> bool:
|
||||||
|
if settings["hash_ip_address"]:
|
||||||
|
ip_address = hash(request.ip_address)
|
||||||
|
else:
|
||||||
|
ip_address = request.ip_address
|
||||||
|
if settings["unique_visitor_is_ip_address"]:
|
||||||
|
return sql_exists(self.cur, t_visitor, [("ip_address", ip_address)])
|
||||||
|
else:
|
||||||
|
return sql_exists(self.cur, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])
|
||||||
|
|
||||||
|
def is_visitor_human(self, visitor_id: int):
|
||||||
|
"""
|
||||||
|
check if they have a known platform AND browser
|
||||||
|
check if at least one request did not result in an error (http status >= 400)
|
||||||
|
"""
|
||||||
|
max_success_status = 400
|
||||||
|
if settings["status_300_is_success"]: max_success_status = 300
|
||||||
|
self.cur.execute(f"SELECT browser, platform FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
||||||
|
browsers_and_platforms = self.cur.fetchall()
|
||||||
|
if len(browsers_and_platforms) != 1:
|
||||||
|
pdebug(f"is_visitor_human: {visitor_id} - could not find visitor or found too many")
|
||||||
|
return False
|
||||||
|
if not browsers_and_platforms[0][0] in visitor_agent_browsers:
|
||||||
|
return False
|
||||||
|
if not browsers_and_platforms[0][1] in visitor_agent_operating_systems:
|
||||||
|
return False
|
||||||
|
# check if has browser
|
||||||
|
# self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_visitor} WHERE visitor_id = {visitor_id} AND platform IS NOT NULL AND browser IS NOT NULL)")
|
||||||
|
# if no browser and platform
|
||||||
|
# exists = self.cur.fetchone()
|
||||||
|
# if exists is None or exists[0] == 0:
|
||||||
|
# return False
|
||||||
|
# if human needs successful request
|
||||||
|
if settings["human_needs_success"]:
|
||||||
|
# check if at least request was successful (status < 400)
|
||||||
|
self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_request} WHERE visitor_id = {visitor_id} AND status < {max_success_status})")
|
||||||
|
if self.cur.fetchone()[0] == 1:
|
||||||
|
# pdebug(f"is_visitor_human: Visitor {visitor_id} is human")
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# pdebug(f"is_visitor_human: Visitor {visitor_id} only had unsuccessful requests")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_visitor_id(self, request: Request) -> int:
|
||||||
|
"""
|
||||||
|
get the visitor_id. Adds the visitor if not already existing
|
||||||
|
"""
|
||||||
|
if settings["hash_ip_address"]:
|
||||||
|
ip_address = hash(request.ip_address)
|
||||||
|
else:
|
||||||
|
ip_address = request.ip_address
|
||||||
|
|
||||||
|
if self.visitor_exists(request):
|
||||||
|
if settings["unique_visitor_is_ip_address"]:
|
||||||
|
visitor_id = sql_select(self.cur, t_visitor, [("ip_address", ip_address)])[0][0]
|
||||||
|
else:
|
||||||
|
visitor_id = sql_select(self.cur, t_visitor, [("ip_address", ip_address), ("visitor_agent", request.visitor_agent)])[0][0]
|
||||||
|
else: # new visitor
|
||||||
|
# new visitor_id is number of elements
|
||||||
|
visitor_id = sql_max(self.cur, t_visitor, "visitor_id") + 1
|
||||||
|
# pdebug("new visitor:", visitor_id, request.ip_address)
|
||||||
|
platform, browser, mobile = get_os_browser_pairs_from_agent(request.visitor_agent)
|
||||||
|
ip_range_id_val = 0
|
||||||
|
if settings["get_visitor_location"]:
|
||||||
|
ip_range_id_val = get_ip_range_id(self.cur, request.ip_address)
|
||||||
|
is_human = 0 # is_visitor_human cannot be called until visitor is in db int(is_visitor_human(self.cur, visitor_id))
|
||||||
|
self.cur.execute(f"INSERT INTO {t_visitor} (visitor_id, ip_address, visitor_agent, platform, browser, mobile, is_human, {ip_range_id.name}) VALUES ({visitor_id}, '{ip_address}', '{request.visitor_agent}', '{platform}', '{browser}', '{int(mobile)}', '{is_human}', '{ip_range_id_val}');")
|
||||||
|
return visitor_id
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# REQUEST
|
||||||
|
#
|
||||||
|
def request_exists(self, request: Request, visitor_id: int, group_id: int):
|
||||||
|
# get all requests from same visitor to same location
|
||||||
|
# TODO this looks wrong
|
||||||
|
self.cur.execute(f"SELECT request_id, date FROM {t_request} WHERE visitor_id = '{visitor_id}' AND group_id = '{group_id}'")
|
||||||
|
date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d")
|
||||||
|
for request_id, date1 in self.cur.fetchall():
|
||||||
|
if settings["request_is_same_on_same_day"]:
|
||||||
|
date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
|
||||||
|
if date0 == date1:
|
||||||
|
pdebug(f"request_exists: Request is on same day as request {request_id}")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def add_request(self, request: Request) -> (int | None):
|
||||||
|
"""returns visitor_id if new request was added, else None"""
|
||||||
|
# skip requests to blacklisted locations
|
||||||
|
if request_blacklist:
|
||||||
|
if re.fullmatch(request_blacklist, request.request_file):
|
||||||
|
# pdebug(f"add_requests_to_db: request on blacklist '{request.request_file}'")
|
||||||
|
return None
|
||||||
|
# pdebug("add_requests_to_db:", i, "request:", request)
|
||||||
|
visitor_id = self.get_visitor_id(request)
|
||||||
|
self.conn.commit()
|
||||||
|
group_id: int = self.get_filegroup(request.request_file)
|
||||||
|
# check if request is unique
|
||||||
|
if self.request_exists(request, visitor_id, group_id):
|
||||||
|
# pdebug("request exists:", request)
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
# pdebug("new request:", request)
|
||||||
|
sql_insert(t_request, [[None, visitor_id, group_id, request.time_local, request.referer, request.status]])
|
||||||
|
return visitor_id
|
||||||
|
|
||||||
|
def add_requests(self, requests: list[Request]):
|
||||||
|
added_requests = 0
|
||||||
|
# check the new visitors later
|
||||||
|
request_blacklist = settings["request_location_regex_blacklist"]
|
||||||
|
new_visitors = []
|
||||||
|
for i in range(len(requests)):
|
||||||
|
visitor = self.add_request(requests[i])
|
||||||
|
if visitor:
|
||||||
|
new_visitors.append(visitor)
|
||||||
|
|
||||||
|
# update the is_human column for all new visitors
|
||||||
|
for visitor_id in new_visitors:
|
||||||
|
if not sql_exists(self.cur, t_visitor, [(str(visitor_id), "visitor_id")]): continue
|
||||||
|
is_human = self.is_visitor_human(visitor_id)
|
||||||
|
self.cur.execute(f"SELECT * FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
||||||
|
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {self.cur.fetchall()}")
|
||||||
|
if is_human:
|
||||||
|
self.cur.execute(f"UPDATE {t_visitor} SET is_human = 1 WHERE visitor_id = {visitor_id}")
|
||||||
|
self.conn.commit()
|
||||||
|
pmessage(f"Collection Summary: Added {len(new_visitors)} new visitors and {added_requests} new requests.")
|
||||||
|
|
||||||
|
#
|
||||||
|
# FILE(GROUP)
|
||||||
|
#
|
||||||
|
def get_filegroup(self, filename: str) -> int:
|
||||||
|
"""
|
||||||
|
get the filegroup
|
||||||
|
returns the group where
|
||||||
|
1) filename is the groupname
|
||||||
|
2) the filetype of filename is the groupname
|
||||||
|
3) new group with filename as gorupname
|
||||||
|
"""
|
||||||
|
# pdebug(f"get_filegroup: {filename}")
|
||||||
|
if sql_exists(self.cur, t_file, [("filename", filename)]):
|
||||||
|
return sql_select(self.cur, t_file, [("filename", filename)])[0][1]
|
||||||
|
else:
|
||||||
|
suffix = filename.split('.')[-1]
|
||||||
|
self.cur.execute(f"SELECT group_id FROM {t_filegroup} WHERE groupname = '{suffix}'")
|
||||||
|
# self.cur.execute(f"SELECT group_id FROM {t_filegroup} WHERE groupname LIKE '%.{suffix}'")
|
||||||
|
group_id_candidates = self.cur.fetchall()
|
||||||
|
# pdebug(f"get_filegroup: file={filename} candidates={group_id_candidates}")
|
||||||
|
if group_id_candidates:
|
||||||
|
return group_id_candidates[0][0]
|
||||||
|
else: # add new group file filename
|
||||||
|
group_id = sql_max(self.cur, t_filegroup, "group_id") + 1
|
||||||
|
|
||||||
|
# pdebug("new file(group):", group_id, filename)
|
||||||
|
# add group
|
||||||
|
sql_insert(self.cur, t_filegroup, [[group_id, filename]])
|
||||||
|
# add file
|
||||||
|
sql_insert(self.cur, t_file, [[filename, group_id]])
|
||||||
|
return group_id
|
||||||
|
|
||||||
|
#
|
||||||
|
# GEOIP
|
||||||
|
#
|
||||||
|
def get_ip_range_id(self, ip_address: int):
|
||||||
|
self.cur.execute(f"SELECT {ip_range_id.name} FROM {t_ip_range} WHERE '{ip_address}' BETWEEN lower AND upper")
|
||||||
|
results = self.cur.fetchall()
|
||||||
|
ip_range_id_val = 0
|
||||||
|
if len(results) == 0:
|
||||||
|
pass
|
||||||
|
elif len(results) > 1:
|
||||||
|
warning(f"get_ip_range_id: Found multiple ip_ranges for ip_address={ip_address}: results={results}")
|
||||||
|
else:
|
||||||
|
ip_range_id_val = results[0][0]
|
||||||
|
return ip_range_id_val
|
||||||
|
|
||||||
|
def update_ip_range_id(self, visitor_id: int):
|
||||||
|
self.cur.execute(f"SELECT ip_address FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
||||||
|
results = self.cur.fetchall()
|
||||||
|
if len(results) == 0:
|
||||||
|
warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}")
|
||||||
|
return
|
||||||
|
elif len(results) > 1:
|
||||||
|
warning(f"update_ip_range_id: Found multiple ip_addresses for visitor_id={visitor_id}: results={results}")
|
||||||
|
return
|
||||||
|
ip_address = results[0][0]
|
||||||
|
self.cur.execute(f"UPDATE {t_visitor} SET {ip_range_id.name} = '{get_ip_range_id(self.cur, ip_address)}' WHERE visitor_id = '{visitor_id}'")
|
||||||
|
|
||||||
def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
|
def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
|
||||||
|
"""
|
||||||
|
TODO: make re-usable (alter groups when config changes)
|
||||||
|
"""
|
||||||
# filegroup_str: 'name1: file1, file2, file3; name2: file33'
|
# filegroup_str: 'name1: file1, file2, file3; name2: file33'
|
||||||
groups = filegroup_str.strip(";").split(";")
|
groups = filegroup_str.strip(";").split(";")
|
||||||
pdebug("create_filegroups:", groups)
|
pdebug("create_filegroups:", groups)
|
||||||
|
62
regina/db_operation/request.py
Normal file
62
regina/db_operation/request.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
from ipaddress import IPv4Address, ip_address
|
||||||
|
from time import mktime
|
||||||
|
from re import fullmatch, match
|
||||||
|
from datetime import datetime as dt
|
||||||
|
|
||||||
|
from .utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
|
||||||
|
from .utility.utility import pdebug, warning, pmessage
|
||||||
|
from .utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
|
||||||
|
|
||||||
|
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aut", "Sep", "Oct", "Nov", "Dec"]
|
||||||
|
|
||||||
|
class Request:
|
||||||
|
def __init__(self, ip_address="", time_local="", request_type="", request_file="", request_protocol="", status="", bytes_sent="", referer="", visitor_agent=""):
|
||||||
|
self.ip_address = int(IPv4Address(sanitize(ip_address)))
|
||||||
|
self.time_local = 0
|
||||||
|
#[20/Nov/2022:00:47:36 +0100]
|
||||||
|
m = match(r"\[(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+).*\]", time_local)
|
||||||
|
if m:
|
||||||
|
g = m.groups()
|
||||||
|
try:
|
||||||
|
if g[1] in months:
|
||||||
|
datetime_ = dt(int(g[2]), months.index(g[1])+1, int(g[0]), int(g[3]), int(g[4]), int(g[5]))
|
||||||
|
# pdebug(f"Request __init__: datetime {datetime_}, from {g}")
|
||||||
|
self.time_local = int(mktime(datetime_.timetuple()))
|
||||||
|
else:
|
||||||
|
warning(f"Request:__init__: Unkown month: '{g[1]}'. Using timestamp {self.time_local}")
|
||||||
|
except Exception as e:
|
||||||
|
warning(f"Request:__init__: {e}")
|
||||||
|
else:
|
||||||
|
warning(f"Request:__init__: Could not match time: '{time_local}'")
|
||||||
|
self.request_type = sanitize(request_type)
|
||||||
|
self.request_file = sanitize(request_file)
|
||||||
|
self.request_protocol = sanitize(request_protocol)
|
||||||
|
self.status = sanitize(status)
|
||||||
|
self.bytes_sent = sanitize(bytes_sent)
|
||||||
|
self.referer = sanitize(referer)
|
||||||
|
self.visitor_agent = sanitize(visitor_agent)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.ip_address} - {self.time_local} - {self.request_file} - {self.visitor_agent} - {self.status}"
|
||||||
|
|
||||||
|
def get_os(self):
|
||||||
|
# for groups in findall(re_visitor_agent, visitor_agent):
|
||||||
|
operating_system = ""
|
||||||
|
for os in visitor_agent_operating_systems:
|
||||||
|
if os in self.visitor_agent:
|
||||||
|
operating_system = os
|
||||||
|
break
|
||||||
|
return operating_system
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
browser = ""
|
||||||
|
for br in visitor_agent_browsers:
|
||||||
|
if br in self.visitor_agent:
|
||||||
|
browser = br
|
||||||
|
break
|
||||||
|
return browser
|
||||||
|
|
||||||
|
def get_mobile(self):
|
||||||
|
return "Mobi" in self.visitor_agent
|
||||||
|
|
||||||
|
|
@ -9,7 +9,7 @@ from datetime import datetime as dt
|
|||||||
|
|
||||||
from numpy import empty
|
from numpy import empty
|
||||||
# local
|
# local
|
||||||
from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, t_city, t_country
|
from regina.db_operation.database import Database, t_request, t_visitor, t_file, t_filegroup, t_ip_range, t_city, t_country
|
||||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where
|
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where
|
||||||
from regina.utility.utility import pdebug, warning, missing_arg
|
from regina.utility.utility import pdebug, warning, missing_arg
|
||||||
from regina.utility.globals import settings
|
from regina.utility.globals import settings
|
||||||
@ -66,7 +66,7 @@ def valid_status(status: int):
|
|||||||
#
|
#
|
||||||
# FILTERS
|
# FILTERS
|
||||||
#
|
#
|
||||||
def get_os_browser_mobile_rankings(cur: sql.Cursor, visitor_ids: list[int]):
|
def get_os_browser_mobile_rankings(db: Database, visitor_ids: list[int]):
|
||||||
"""
|
"""
|
||||||
returns [(count, operating_system)], [(count, browser)], mobile_visitor_percentage
|
returns [(count, operating_system)], [(count, browser)], mobile_visitor_percentage
|
||||||
"""
|
"""
|
||||||
@ -76,8 +76,7 @@ def get_os_browser_mobile_rankings(cur: sql.Cursor, visitor_ids: list[int]):
|
|||||||
browser_count = 0.0
|
browser_count = 0.0
|
||||||
mobile_ranking = { True: 0.0, False: 0.0 }
|
mobile_ranking = { True: 0.0, False: 0.0 }
|
||||||
for visitor_id in visitor_ids:
|
for visitor_id in visitor_ids:
|
||||||
cur.execute(f"SELECT platform,browser,mobile FROM {t_visitor} WHERE visitor_id = {visitor_id}")
|
os, browser, mobile = db(f"SELECT platform,browser,mobile FROM {t_visitor} WHERE visitor_id = {visitor_id}")[0]
|
||||||
os, browser, mobile = cur.fetchone()
|
|
||||||
mobile = bool(mobile)
|
mobile = bool(mobile)
|
||||||
if os:
|
if os:
|
||||||
if os in os_ranking: os_ranking[os] += 1
|
if os in os_ranking: os_ranking[os] += 1
|
||||||
@ -134,34 +133,30 @@ def get_where_date_str(at_date=None, min_date=None, max_date=None):
|
|||||||
|
|
||||||
|
|
||||||
# get the earliest date
|
# get the earliest date
|
||||||
def get_earliest_date(cur: sql.Cursor) -> int:
|
def get_earliest_date(db: Database) -> int:
|
||||||
"""return the earliest time as unixepoch"""
|
"""return the earliest time as unixepoch"""
|
||||||
cur.execute(f"SELECT MIN(date) FROM {t_request}")
|
date = db(f"SELECT MIN(date) FROM {t_request}")[0][0]
|
||||||
date = cur.fetchone()[0]
|
|
||||||
if not isinstance(date, int): return 0
|
if not isinstance(date, int): return 0
|
||||||
else: return date
|
else: return date
|
||||||
|
|
||||||
# get the latest date
|
# get the latest date
|
||||||
def get_latest_date(cur: sql.Cursor) -> int:
|
def get_latest_date(db: Database) -> int:
|
||||||
"""return the latest time as unixepoch"""
|
"""return the latest time as unixepoch"""
|
||||||
cur.execute(f"SELECT MAX(date) FROM {t_request}")
|
date = db(f"SELECT MAX(date) FROM {t_request}")[0][0]
|
||||||
date = cur.fetchone()[0]
|
|
||||||
if not isinstance(date, int): return 0
|
if not isinstance(date, int): return 0
|
||||||
else: return date
|
else: return date
|
||||||
|
|
||||||
# get all dates
|
# get all dates
|
||||||
# the date:str parameter in all these function must be a sqlite constraint
|
# the date:str parameter in all these function must be a sqlite constraint
|
||||||
def get_days(cur: sql.Cursor, date:str) -> list[str]:
|
def get_days(db: Database, date:str) -> list[str]:
|
||||||
"""get a list of all dates in yyyy-mm-dd format"""
|
"""get a list of all dates in yyyy-mm-dd format"""
|
||||||
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request} WHERE {date}")
|
days = [ date[0] for date in db(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request} WHERE {date}")] # fetchall returns tuples (date, )
|
||||||
days = [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, )
|
|
||||||
days.sort()
|
days.sort()
|
||||||
return days
|
return days
|
||||||
|
|
||||||
def get_months(cur: sql.Cursor, date:str) -> list[str]:
|
def get_months(db: Database, date:str) -> list[str]:
|
||||||
"""get a list of all dates in yyyy-mm format"""
|
"""get a list of all dates in yyyy-mm format"""
|
||||||
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request} WHERE {date}")
|
dates = get_days(db, date)
|
||||||
dates = get_days(cur, date)
|
|
||||||
date_dict = {}
|
date_dict = {}
|
||||||
for date in dates:
|
for date in dates:
|
||||||
date_without_day = date[0:date.rfind('-')]
|
date_without_day = date[0:date.rfind('-')]
|
||||||
@ -169,14 +164,13 @@ def get_months(cur: sql.Cursor, date:str) -> list[str]:
|
|||||||
return list(date_dict.keys())
|
return list(date_dict.keys())
|
||||||
|
|
||||||
|
|
||||||
def get_visitor_agent(cur: sql.Cursor, visitor_id: int):
|
def get_visitor_agent(db: Database, visitor_id: int):
|
||||||
return sql_select(cur, t_visitor, [("visitor_id", visitor_id)])[0][2]
|
return sql_select(db.cur, t_visitor, [("visitor_id", visitor_id)])[0][2]
|
||||||
|
|
||||||
def get_unique_visitor_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
|
def get_unique_visitor_ids_for_date(db: Database, date:str) -> list[int]:
|
||||||
cur.execute(f"SELECT DISTINCT visitor_id FROM {t_request} WHERE {date}")
|
return [ visitor_id[0] for visitor_id in db(f"SELECT DISTINCT visitor_id FROM {t_request} WHERE {date}") ]
|
||||||
return [ visitor_id[0] for visitor_id in cur.fetchall() ]
|
|
||||||
|
|
||||||
def get_human_visitors(cur: sql.Cursor, unique_visitor_ids, unique_visitor_ids_human: list):
|
def get_human_visitors(db: Database, unique_visitor_ids, unique_visitor_ids_human: list):
|
||||||
"""
|
"""
|
||||||
check if they have a known platform AND browser
|
check if they have a known platform AND browser
|
||||||
check if at least one request did not result in an error (http status >= 400)
|
check if at least one request did not result in an error (http status >= 400)
|
||||||
@ -195,22 +189,22 @@ def get_human_visitors(cur: sql.Cursor, unique_visitor_ids, unique_visitor_ids_h
|
|||||||
unique_visitor_ids_human.append(visitor_id)
|
unique_visitor_ids_human.append(visitor_id)
|
||||||
# pdebug("get_human_visitors: (2)", unique_visitor_ids_human)
|
# pdebug("get_human_visitors: (2)", unique_visitor_ids_human)
|
||||||
|
|
||||||
def get_unique_request_ids_for_date(cur: sql.Cursor, date:str):
|
def get_unique_request_ids_for_date(db: Database, date:str):
|
||||||
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date}")
|
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date}")
|
||||||
return [ request_id[0] for request_id in cur.fetchall()]
|
return [ request_id[0] for request_id in cur.fetchall()]
|
||||||
|
|
||||||
def get_unique_request_ids_for_date_and_visitor(cur: sql.Cursor, date:str, visitor_id: int, unique_request_ids_human: list):
|
def get_unique_request_ids_for_date_and_visitor(db: Database, date:str, visitor_id: int, unique_request_ids_human: list):
|
||||||
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date} AND visitor_id = {visitor_id}")
|
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date} AND visitor_id = {visitor_id}")
|
||||||
# all unique requests for visitor_id
|
# all unique requests for visitor_id
|
||||||
for request_id in cur.fetchall():
|
for request_id in cur.fetchall():
|
||||||
unique_request_ids_human.append(request_id[0])
|
unique_request_ids_human.append(request_id[0])
|
||||||
|
|
||||||
# get number of requests per day
|
# get number of requests per day
|
||||||
def get_request_count_for_date(cur: sql.Cursor, date:str) -> int:
|
def get_request_count_for_date(db: Database, date:str) -> int:
|
||||||
cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE {date}")
|
cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE {date}")
|
||||||
return cur.fetchone()[0]
|
return cur.fetchone()[0]
|
||||||
|
|
||||||
def get_unique_visitor_count(cur: sql.Cursor) -> int:
|
def get_unique_visitor_count(db: Database) -> int:
|
||||||
return sql_tablesize(cur, t_visitor)
|
return sql_tablesize(cur, t_visitor)
|
||||||
|
|
||||||
|
|
||||||
@ -218,7 +212,7 @@ def get_unique_visitor_count(cur: sql.Cursor) -> int:
|
|||||||
#
|
#
|
||||||
# RANKINGS
|
# RANKINGS
|
||||||
#
|
#
|
||||||
def get_file_ranking(cur: sql.Cursor, date:str) -> list[tuple[int, str]]:
|
def get_file_ranking(db: Database, date:str) -> list[tuple[int, str]]:
|
||||||
global settings
|
global settings
|
||||||
"""
|
"""
|
||||||
:returns [(request_count, groupname)]
|
:returns [(request_count, groupname)]
|
||||||
@ -255,7 +249,7 @@ def get_file_ranking(cur: sql.Cursor, date:str) -> list[tuple[int, str]]:
|
|||||||
# print(ranking)
|
# print(ranking)
|
||||||
return ranking
|
return ranking
|
||||||
|
|
||||||
def get_visitor_agent_ranking(cur: sql.Cursor, date:str) -> list[tuple[int, str]]:
|
def get_visitor_agent_ranking(db: Database, date:str) -> list[tuple[int, str]]:
|
||||||
"""
|
"""
|
||||||
:returns [(request_count, visitor_agent)]
|
:returns [(request_count, visitor_agent)]
|
||||||
"""
|
"""
|
||||||
@ -276,7 +270,7 @@ def get_visitor_agent_ranking(cur: sql.Cursor, date:str) -> list[tuple[int, str]
|
|||||||
# print(ranking)
|
# print(ranking)
|
||||||
return ranking
|
return ranking
|
||||||
|
|
||||||
def get_request_ranking(field_name: str, table: str, whitelist_regex: str, cur: sql.Cursor, date_condition:str) -> list[tuple[int, str]]:
|
def get_request_ranking(field_name: str, table: str, whitelist_regex: str, db: Database, date_condition:str) -> list[tuple[int, str]]:
|
||||||
"""
|
"""
|
||||||
1) get all the distinct entries for field_name after min_date_unix_time
|
1) get all the distinct entries for field_name after min_date_unix_time
|
||||||
2) call get_name_function with the distinct entry
|
2) call get_name_function with the distinct entry
|
||||||
|
@ -4,13 +4,23 @@
|
|||||||
from sys import argv, exit
|
from sys import argv, exit
|
||||||
from os.path import isfile
|
from os.path import isfile
|
||||||
import sqlite3 as sql
|
import sqlite3 as sql
|
||||||
from regina.db_operation.collect import parse_log, add_requests_to_db, update_ip_range_id
|
|
||||||
from regina.db_operation.database import create_db, update_geoip_tables, t_visitor
|
if __name__ == "__main__":
|
||||||
from regina.db_operation.visualize import visualize
|
if __package__ is None:
|
||||||
from regina.utility.settings_manager import read_settings_file
|
# make relative imports work as described here: https://peps.python.org/pep-0366/#proposed-change
|
||||||
from regina.utility.globals import settings, version
|
__package__ = "regina"
|
||||||
from regina.utility.utility import pmessage
|
import sys
|
||||||
from regina.utility.sql_util import sql_tablesize
|
from os import path
|
||||||
|
filepath = path.realpath(path.abspath(__file__))
|
||||||
|
sys.path.insert(0, path.dirname(path.dirname(filepath)))
|
||||||
|
|
||||||
|
from .db_operation.collect import parse_log, add_requests_to_db, update_ip_range_id
|
||||||
|
from .db_operation.database import create_db, update_geoip_tables, t_visitor
|
||||||
|
from .db_operation.visualize import visualize
|
||||||
|
from .utility.settings_manager import read_settings_file
|
||||||
|
from .utility.globals import settings, version
|
||||||
|
from .utility.utility import pmessage
|
||||||
|
from .utility.sql_util import sql_tablesize
|
||||||
|
|
||||||
"""
|
"""
|
||||||
start regina, launch either collect or visualize
|
start regina, launch either collect or visualize
|
||||||
|
50
regina/sql/create_db.sql
Normal file
50
regina/sql/create_db.sql
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS visitor(
|
||||||
|
visitor_id INTEGER PRIMARY KEY,
|
||||||
|
platform TEXT,
|
||||||
|
browser TEXT,
|
||||||
|
is_human INTEGER,
|
||||||
|
range_id INTEGER
|
||||||
|
) STRICT;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS request(
|
||||||
|
request_id INTEGER PRIMARY KEY,
|
||||||
|
visitor_id INTEGER,
|
||||||
|
FOREIGN KEY(visitor_id) REFERENCES visitor(visitor_id),
|
||||||
|
group_id INTEGER,
|
||||||
|
FOREIGN KEY(group_id) REFERENCES filegroup(group_id),
|
||||||
|
date INTEGER,
|
||||||
|
referer TEXT,
|
||||||
|
status INTEGER
|
||||||
|
) STRICT;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS filegroup(
|
||||||
|
group_id INTEGER PRIMARY KEY,
|
||||||
|
groupname TEXT
|
||||||
|
) STRICT;
|
||||||
|
CREATE TABLE IF NOT EXISTS file(
|
||||||
|
filename TEXT,
|
||||||
|
group_id INTEGER,
|
||||||
|
FOREIGN KEY(group_id) REFERENCES filegroup(group_id)
|
||||||
|
) STRICT;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS ip_range(
|
||||||
|
range_id INTEGER PRIMARY KEY,
|
||||||
|
from INTEGER,
|
||||||
|
to INTEGER,
|
||||||
|
city_id INTEGER,
|
||||||
|
FOREIGN KEY(city_id) REFERENCES city(city_id)
|
||||||
|
) STRICT;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS city(
|
||||||
|
city INTEGER PRIMARY KEY,
|
||||||
|
name TEXT,
|
||||||
|
region TEXT,
|
||||||
|
country_id INTEGER,
|
||||||
|
FOREIGN KEY(country_id) REFERENCES country(country_id)
|
||||||
|
) STRICT;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS country(
|
||||||
|
country_id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT,
|
||||||
|
code TEXT
|
||||||
|
) STRICT;
|
@ -1,5 +1,7 @@
|
|||||||
"""global variables for regina"""
|
"""global variables for regina"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
version = "1.0"
|
version = "1.0"
|
||||||
|
|
||||||
# default settings, these are overwriteable through a config file
|
# default settings, these are overwriteable through a config file
|
||||||
@ -74,3 +76,12 @@ visitor_agent_browsers = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# set directories
|
||||||
|
config_dir = os.path.join(os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~/.config")), "regina")
|
||||||
|
data_dir = os.path.join(os.environ.get("XDG_DATA_HOME", os.path.expanduser("~/.local/share")), "regina")
|
||||||
|
cache_dir = os.path.join(os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "regina")
|
||||||
|
|
||||||
|
# check if environment variables are set and use them if they are
|
||||||
|
if 'REGINA_CONFIG_DIR' in os.environ: config_dir = os.environ['REGINA_CONFIG_DIR']
|
||||||
|
if 'REGINA_DATA_DIR' in os.environ: data_dir = os.environ['REGINA_DATA_DIR']
|
||||||
|
if 'REGINA_CACHE_DIR' in os.environ: cache_dir = os.environ['REGINA_CACHE_DIR']
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# from sys import path
|
# from sys import path
|
||||||
# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
|
# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
|
||||||
from sys import exit
|
from sys import exit
|
||||||
|
from os import path
|
||||||
|
|
||||||
from regina.utility.globals import settings
|
from regina.utility.globals import settings
|
||||||
|
|
||||||
@ -29,3 +30,11 @@ def missing_arg(arg):
|
|||||||
print("Missing ", arg)
|
print("Missing ", arg)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def get_filepath(filename, directories: list):
|
||||||
|
"""search directories for file and return the full path to the file"""
|
||||||
|
for d in directories:
|
||||||
|
p = f"{path.expanduser(d)}/{filename}"
|
||||||
|
if path.isfile(p):
|
||||||
|
return p
|
||||||
|
raise FileNotFoundError(f"{filename} not in {directories}")
|
||||||
|
5
setup.py
5
setup.py
@ -1,3 +1,4 @@
|
|||||||
|
from matplotlib.pyplot import matplotlib
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
@ -12,8 +13,8 @@ setup(
|
|||||||
|
|
||||||
license="GPLv3",
|
license="GPLv3",
|
||||||
|
|
||||||
packages=find_packages(),
|
packages=["regina"],
|
||||||
install_requires=[],
|
install_requires=["matplotlib"],
|
||||||
python_requires='>=3.10',
|
python_requires='>=3.10',
|
||||||
|
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
Loading…
Reference in New Issue
Block a user