restructured
This commit is contained in:
parent
3bdad3d340
commit
9f8c9966ad
@ -1,138 +0,0 @@
|
|||||||
import sqlite3 as sql
|
|
||||||
from re import match
|
|
||||||
from time import mktime
|
|
||||||
from datetime import datetime as dt
|
|
||||||
from .database import t_request, t_user, t_file, t_filegroup, database_tables, get_filegroup
|
|
||||||
from .sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize
|
|
||||||
|
|
||||||
"""
|
|
||||||
collect information from the access log and put it into the database
|
|
||||||
"""
|
|
||||||
|
|
||||||
DEBUG = True
|
|
||||||
def pdebug(*args):
|
|
||||||
if DEBUG: print(*args)
|
|
||||||
|
|
||||||
def warning(w):
|
|
||||||
print(w)
|
|
||||||
|
|
||||||
|
|
||||||
months = ["Jan", "Feb", "Mar", "Apr", "Jun", "Jul", "Aut", "Sep", "Oct", "Nov", "Dez"]
|
|
||||||
|
|
||||||
# these oses and browser can be detected:
|
|
||||||
# lower element takes precedence
|
|
||||||
user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"]
|
|
||||||
user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"]
|
|
||||||
|
|
||||||
|
|
||||||
class Request:
|
|
||||||
def __init__(self, ip_address="", time_local="", request_type="", request_file="", request_protocol="", status="", bytes_sent="", referer="", user_agent=""):
|
|
||||||
self.ip_address = sanitize(ip_address)
|
|
||||||
self.time_local = 0
|
|
||||||
#[20/Nov/2022:00:47:36 +0100]
|
|
||||||
m = match(r"\[(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+).*\]", time_local)
|
|
||||||
if m:
|
|
||||||
g = m.groups()
|
|
||||||
try:
|
|
||||||
datetime_ = dt(int(g[2]), months.index(g[1])+1, int(g[0]), int(g[3]), int(g[4]), int(g[5]))
|
|
||||||
self.time_local = int(mktime(datetime_.timetuple()))
|
|
||||||
except Exception as e:
|
|
||||||
warning(f"Request:__init__: {e}")
|
|
||||||
else:
|
|
||||||
warning(f"Request:__init__: Could not match time: '{time_local}'")
|
|
||||||
|
|
||||||
self.request_type = sanitize(request_type)
|
|
||||||
self.request_file = sanitize(request_file)
|
|
||||||
self.request_protocol = sanitize(request_protocol)
|
|
||||||
self.status = sanitize(status)
|
|
||||||
self.bytes_sent = sanitize(bytes_sent)
|
|
||||||
self.referer = sanitize(referer)
|
|
||||||
self.user_agent = sanitize(user_agent)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"{self.ip_address} - {self.time_local} - {self.request_file} - {self.user_agent} - {self.status}"
|
|
||||||
|
|
||||||
re_remote_addr = r"[0-9a-fA-F.:]+"
|
|
||||||
re_remote_user = ".*"
|
|
||||||
re_time_local = r"\[.+\]"
|
|
||||||
re_request = r'"[^"]+"'
|
|
||||||
re_status = r'\d+'
|
|
||||||
re_body_bytes_sent = r'\d+'
|
|
||||||
re_http_referer = r'"([^"]*)"'
|
|
||||||
re_http_user_agent = r'"([^"]*)"'
|
|
||||||
re_log_format: str = f'({re_remote_addr}) - ({re_remote_user}) ({re_time_local}) ({re_request}) ({re_status}) ({re_body_bytes_sent}) {re_http_referer} {re_http_user_agent}'
|
|
||||||
def parse_log(logfile:str) -> list[Request]:
|
|
||||||
"""
|
|
||||||
create Request objects from each line in the logfile
|
|
||||||
"""
|
|
||||||
requests = []
|
|
||||||
with open(logfile, "r") as file:
|
|
||||||
lines = file.readlines()
|
|
||||||
for line in lines:
|
|
||||||
m = match(re_log_format, line)
|
|
||||||
if m is None:
|
|
||||||
warning(f"parse_log: Unmatched line: '{line}'")
|
|
||||||
continue
|
|
||||||
# print(m.groups())
|
|
||||||
g = m.groups()
|
|
||||||
request_ = m.groups()[3].split(" ")
|
|
||||||
if len(request_) != 3:
|
|
||||||
warning(f"parse_log: len('{m.groups()[3]}'.split(' ')) is {len(request_)} and not 3")
|
|
||||||
continue
|
|
||||||
requests.append(Request(ip_address=g[0], time_local=g[2],
|
|
||||||
request_type=request_[0], request_file=request_[1], request_protocol=request_[2],
|
|
||||||
status=g[4], bytes_sent=g[5], referer=g[6], user_agent=g[7]))
|
|
||||||
return requests
|
|
||||||
|
|
||||||
def get_user_id(request: Request, cursor: sql.Cursor) -> int:
|
|
||||||
"""
|
|
||||||
get the user_id. Adds the user if not already existing
|
|
||||||
"""
|
|
||||||
# if user exists
|
|
||||||
if sql_exists(cursor, t_user, [("ip_address", request.ip_address), ("user_agent", request.user_agent)]):
|
|
||||||
user_id = sql_select(cursor, t_user, [("ip_address", request.ip_address), ("user_agent", request.user_agent)])[0][0]
|
|
||||||
else: # new user
|
|
||||||
# new user_id is number of elements
|
|
||||||
user_id: int = sql_tablesize(cursor, t_user)
|
|
||||||
pdebug("new user:", user_id, request.ip_address)
|
|
||||||
platform, browser, mobile = get_os_browser_pairs_from_agent(request.user_agent)
|
|
||||||
cursor.execute(f"INSERT INTO {t_user} (user_id, ip_address, user_agent, platform, browser, mobile) VALUES ({user_id}, '{request.ip_address}', '{request.user_agent}', '{platform}', '{browser}', '{int(mobile)}');")
|
|
||||||
return user_id
|
|
||||||
|
|
||||||
# re_user_agent = r"(?: ?([\w\- ]+)(?:\/([\w.]+))?(?: \(([^()]*)\))?)"
|
|
||||||
# 1: platform, 2: version, 3: details
|
|
||||||
def get_os_browser_pairs_from_agent(user_agent):
|
|
||||||
# for groups in findall(re_user_agent, user_agent):
|
|
||||||
operating_system = ""
|
|
||||||
browser = ""
|
|
||||||
mobile = "Mobi" in user_agent
|
|
||||||
for os in user_agent_operating_systems:
|
|
||||||
if os in user_agent:
|
|
||||||
operating_system = os
|
|
||||||
break
|
|
||||||
for br in user_agent_browsers:
|
|
||||||
if br in user_agent:
|
|
||||||
browser = br
|
|
||||||
break
|
|
||||||
# if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'")
|
|
||||||
return operating_system, browser, mobile
|
|
||||||
|
|
||||||
|
|
||||||
def add_requests_to_db(requests: list[Request], db_name: str):
|
|
||||||
conn = sql.connect(db_name)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
for i in range(len(requests)):
|
|
||||||
request = requests[i]
|
|
||||||
pdebug("add_requests_to_db:", i, "request:", request)
|
|
||||||
user_id = get_user_id(request, cursor)
|
|
||||||
conn.commit()
|
|
||||||
group_id: int = get_filegroup(request.request_file, cursor)
|
|
||||||
# check if request is unique
|
|
||||||
group_id_name = database_tables[t_filegroup].key.name
|
|
||||||
user_id_name = database_tables[t_user].key.name
|
|
||||||
if sql_exists(cursor, t_request, [(group_id_name, group_id), (user_id_name, user_id), ("date", request.time_local)]):
|
|
||||||
pdebug("request exists:", request)
|
|
||||||
else:
|
|
||||||
pdebug("new request:", request)
|
|
||||||
request_id = sql_tablesize(cursor, t_request)
|
|
||||||
sql_insert(cursor, t_request, [[request_id, user_id, group_id, request.time_local, request.referer, request.status]])
|
|
@ -1,144 +0,0 @@
|
|||||||
import sqlite3 as sql
|
|
||||||
from os import path, listdir
|
|
||||||
from .sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
create reginas database as shown in the uml diagram database.uxf
|
|
||||||
"""
|
|
||||||
|
|
||||||
DEBUG = True
|
|
||||||
def pdebug(*args):
|
|
||||||
if DEBUG: print(*args)
|
|
||||||
|
|
||||||
class Entry:
|
|
||||||
"""
|
|
||||||
represents an sql entry
|
|
||||||
type_ is INTEGER, TEXT, REAL...
|
|
||||||
"""
|
|
||||||
def __init__(self, name, type_) -> None:
|
|
||||||
self.name = name
|
|
||||||
self.type_ = type_
|
|
||||||
def __repr__(self):
|
|
||||||
return f"[{self.name}] {self.type_}"
|
|
||||||
|
|
||||||
class Table:
|
|
||||||
def __init__(self, name, key: Entry, entries: list[Entry]=[], constaints: list[str]=[]):
|
|
||||||
self.name = name
|
|
||||||
self.key = key
|
|
||||||
self.entries = entries
|
|
||||||
self.constaints = constaints
|
|
||||||
def create_sql_str(self):
|
|
||||||
return f"CREATE TABLE IF NOT EXISTS {self.name}\n({self})\n"
|
|
||||||
def __repr__(self):
|
|
||||||
s = f"{self.key} PRIMARY KEY"
|
|
||||||
for entry in self.entries:
|
|
||||||
s += f", {entry}"
|
|
||||||
for c in self.constaints:
|
|
||||||
s += f", {c}"
|
|
||||||
return s
|
|
||||||
t_request = "request"
|
|
||||||
t_file = "file"
|
|
||||||
t_filegroup = "filegroup"
|
|
||||||
t_user = "user"
|
|
||||||
|
|
||||||
user_id = Entry("user_id", "INTEGER")
|
|
||||||
request_id = Entry("request_id", "INTEGER")
|
|
||||||
filegroup_id = Entry("group_id", "INTEGER")
|
|
||||||
ip_address_entry = Entry("ip_address", "TEXT")
|
|
||||||
filename_entry = Entry("filename", "TEXT")
|
|
||||||
database_tables = {
|
|
||||||
t_user: Table(t_user, user_id, [Entry("ip_address", "TEXT"), Entry("user_agent", "TEXT"), Entry("platform", "TEXT"), Entry("browser", "TEXT"), Entry("mobile", "INTEGER")], [f"UNIQUE({user_id.name})"]),
|
|
||||||
t_file: Table(t_file, filename_entry, [filegroup_id], [f"UNIQUE({filename_entry.name})"]),
|
|
||||||
t_filegroup: Table(t_filegroup, filegroup_id, [Entry("groupname", "TEXT")], [f"UNIQUE({filegroup_id.name})"]),
|
|
||||||
t_request: Table(t_request, request_id, [
|
|
||||||
user_id, filegroup_id, Entry("date", "INTEGER"), Entry("referer", "TEXT"), Entry("status", "INTEGER")
|
|
||||||
], ["UNIQUE(request_id)"]),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_filegroup(filename: str, cursor: sql.Cursor) -> int:
|
|
||||||
"""
|
|
||||||
get the user_id. Adds the user if not already existing
|
|
||||||
"""
|
|
||||||
if sql_exists(cursor, t_file, [("filename", filename)]):
|
|
||||||
return sql_select(cursor, t_file, [("filename", filename)])[0][1]
|
|
||||||
else:
|
|
||||||
group_id = sql_tablesize(cursor, t_filegroup)
|
|
||||||
# pdebug("new file(group):", group_id, filename)
|
|
||||||
# add group
|
|
||||||
sql_insert(cursor, t_filegroup, [[group_id, filename]])
|
|
||||||
# add file
|
|
||||||
sql_insert(cursor, t_file, [[filename, group_id]])
|
|
||||||
return group_id
|
|
||||||
|
|
||||||
def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
|
|
||||||
# filegroup_str: 'name1: file1, file2, file3; name2: file33'
|
|
||||||
groups = filegroup_str.strip(";").split(";")
|
|
||||||
pdebug("create_filegroups", groups)
|
|
||||||
for group in groups:
|
|
||||||
name, vals = group.split(":")
|
|
||||||
# create/get group
|
|
||||||
if sql_exists(cursor, t_filegroup, [("groupname", name)]):
|
|
||||||
group_id = sql_select(cursor, t_filegroup, [("groupname", name)])[0][0]
|
|
||||||
else:
|
|
||||||
group_id = sql_tablesize(cursor, t_filegroup)
|
|
||||||
sql_insert(cursor, t_filegroup, [(group_id, name)])
|
|
||||||
# pdebug("create_filegroups: group_id", group_id)
|
|
||||||
# create/edit file
|
|
||||||
for filename in vals.split(","):
|
|
||||||
if sql_exists(cursor, t_file, [("filename", filename)]): # if exist, update
|
|
||||||
cursor.execute(f"UPDATE {t_file} SET group_id = {group_id} WHERE filename = '{filename}'")
|
|
||||||
else:
|
|
||||||
sql_insert(cursor, t_file, [[filename, group_id]])
|
|
||||||
|
|
||||||
def get_files_from_dir_rec(p: str, files: list[str]):
|
|
||||||
"""recursivly append all files to files"""
|
|
||||||
pdebug("get_files_from_dir_rec:",p)
|
|
||||||
if path.isfile(p):
|
|
||||||
files.append(p)
|
|
||||||
elif path.isdir(p):
|
|
||||||
for p_ in listdir(p):
|
|
||||||
get_files_from_dir_rec(p + "/" + p_, files)
|
|
||||||
|
|
||||||
def get_auto_filegroup_str(location_and_dirs:list[tuple[str, str]], auto_group_filetypes:list[str]) -> str:
|
|
||||||
"""
|
|
||||||
:param list of nginx locations and the corresponding directories
|
|
||||||
:param auto_filetype_groups list of filetypes for auto grouping
|
|
||||||
"""
|
|
||||||
files: list[str] = []
|
|
||||||
start_i = 0
|
|
||||||
for location, dir_ in location_and_dirs:
|
|
||||||
get_files_from_dir_rec(dir_, files)
|
|
||||||
# replace dir_ with location, eg /www/website with /
|
|
||||||
for i in range(start_i, len(files)):
|
|
||||||
files[i] = files[i].replace(dir_, location).replace("//", "/")
|
|
||||||
filegroups = ""
|
|
||||||
# create groups for each filetype
|
|
||||||
for ft in auto_group_filetypes:
|
|
||||||
filegroups += f"{ft}:"
|
|
||||||
for file in files:
|
|
||||||
if file.endswith(f".{ft}"):
|
|
||||||
filegroups += f"{file},"
|
|
||||||
filegroups = filegroups.strip(",") + ";"
|
|
||||||
pdebug("get_auto_filegroup_str: found files:", files, "filegroups_str:", filegroups)
|
|
||||||
return filegroups
|
|
||||||
|
|
||||||
def create_db(name, filegroup_str="", location_and_dirs:list[tuple[str, str]]=[], auto_group_filetypes=[]):
|
|
||||||
"""
|
|
||||||
create the name with database_tables
|
|
||||||
"""
|
|
||||||
print(f"creating database: '{name}'")
|
|
||||||
conn = sql.connect(f"{name}")
|
|
||||||
cursor = conn.cursor()
|
|
||||||
for table in database_tables.values():
|
|
||||||
cursor.execute(table.create_sql_str())
|
|
||||||
filegroup_str = filegroup_str.strip("; ") + ";" + get_auto_filegroup_str(location_and_dirs, auto_group_filetypes)
|
|
||||||
create_filegroups(cursor, filegroup_str)
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
create_db("test.db")
|
|
121
regina/regina.py
121
regina/regina.py
@ -1,121 +0,0 @@
|
|||||||
from sys import argv, exit
|
|
||||||
from os.path import isfile
|
|
||||||
from .visualize import visualize
|
|
||||||
from .settings_manager import read_settings_file
|
|
||||||
from .collect import parse_log, add_requests_to_db
|
|
||||||
from .database import create_db
|
|
||||||
|
|
||||||
"""
|
|
||||||
start regina, launch either collect or visualize
|
|
||||||
"""
|
|
||||||
|
|
||||||
version = "1.0"
|
|
||||||
|
|
||||||
# default settings, these are overwriteable through a config file
|
|
||||||
settings = {
|
|
||||||
# GENERAL
|
|
||||||
"server-name": "",
|
|
||||||
# DATA COLLECTION
|
|
||||||
"access-log": "",
|
|
||||||
"db": "",
|
|
||||||
"locs-and-dirs": [],
|
|
||||||
"auto-group-filetypes": [],
|
|
||||||
"filegroups": "",
|
|
||||||
|
|
||||||
# VISUALIZATION
|
|
||||||
"get-human-percentage": False,
|
|
||||||
# "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
|
|
||||||
"file_ranking_regex_whitelist": r".*\.(html)",
|
|
||||||
"referer_ranking_regex_whitelist": r"^[^\-].*", # minus means empty
|
|
||||||
"user_agent_ranking_regex_whitelist": r"",
|
|
||||||
"file_ranking_plot_max_files": 15,
|
|
||||||
# "plot_figsize": (60, 40),
|
|
||||||
"plot_dpi": 300,
|
|
||||||
"img_dir": "",
|
|
||||||
"img_location": "",
|
|
||||||
"img_filetype": "svg",
|
|
||||||
"template_html": "",
|
|
||||||
"html_out_path": "",
|
|
||||||
"last_x_days": 30,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def help():
|
|
||||||
helpstring = """Command line options:
|
|
||||||
--server-name string
|
|
||||||
--log path to the access.log
|
|
||||||
--db name of the database
|
|
||||||
--settings["filegroups"] string describing settings["filegroups"], eg 'name1: file1, file2; name2: file3, file4, file5;'
|
|
||||||
--auto-group-filetypes comma separated list of filetypes, eg 'css,png,gif'
|
|
||||||
--locs-and_dirs comma separated list of nginx_location:directory pairs, eg '/:/www/website'
|
|
||||||
--config-file path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line
|
|
||||||
"""
|
|
||||||
print(helpstring)
|
|
||||||
|
|
||||||
def missing_arg_val(arg):
|
|
||||||
print("Missing argument for", arg)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
def missing_arg(arg):
|
|
||||||
print("Missing ", arg)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
def error(arg):
|
|
||||||
print("Error:", arg)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
config_file = ""
|
|
||||||
collect = False
|
|
||||||
visualize_ = False
|
|
||||||
log_file = ""
|
|
||||||
# parse args
|
|
||||||
i = 1
|
|
||||||
while i in range(1, len(argv)):
|
|
||||||
if argv[i] == "--config":
|
|
||||||
if len(argv) > i + 1: config_file = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
if argv[i] == "--log-file":
|
|
||||||
if len(argv) > i + 1: log_file = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
elif argv[i] == "--help":
|
|
||||||
help()
|
|
||||||
exit(0)
|
|
||||||
elif argv[i] == "--collect":
|
|
||||||
collect = True
|
|
||||||
elif argv[i] == "--visualize":
|
|
||||||
visualize_ = True
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
i += 1
|
|
||||||
if not collect and not visualize_:
|
|
||||||
missing_arg("--visualize or --collect")
|
|
||||||
|
|
||||||
if not config_file:
|
|
||||||
missing_arg("--config_file")
|
|
||||||
if not isfile(config_file):
|
|
||||||
error(f"Not a file: '{config_file}'")
|
|
||||||
read_settings_file(config_file, settings)
|
|
||||||
settings["version"] = version
|
|
||||||
if log_file: settings["access-log"] = log_file
|
|
||||||
|
|
||||||
print(f"regina version {version} with server-name '{settings['server-name']}' and database '{settings['db']}'")
|
|
||||||
|
|
||||||
if not settings["server-name"]: missing_arg("server-name")
|
|
||||||
if not settings["access-log"]: missing_arg("log")
|
|
||||||
if not settings["db"]: missing_arg("db")
|
|
||||||
if isinstance(settings["auto-group-filetypes"], str):
|
|
||||||
settings["auto-group-filetypes"] = settings["auto-group-filetypes"].split(",")
|
|
||||||
if isinstance(settings["locs-and-dirs"], str):
|
|
||||||
settings["locs-and-dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs-and-dirs"].split(",") ]
|
|
||||||
if collect:
|
|
||||||
if not isfile(settings["db"]):
|
|
||||||
create_db(settings["db"], settings["filegroups"], settings["locs-and-dirs"], settings["auto-group-filetypes"])
|
|
||||||
requests = parse_log(settings["access-log"])
|
|
||||||
add_requests_to_db(requests, settings["db"])
|
|
||||||
if visualize_:
|
|
||||||
if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'")
|
|
||||||
visualize(settings)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,33 +0,0 @@
|
|||||||
|
|
||||||
def get_bool(bool_str: str, fallback=False):
|
|
||||||
if bool_str in ["true", "True"]: return True
|
|
||||||
elif bool_str in ["false", "False"]: return False
|
|
||||||
return fallback
|
|
||||||
|
|
||||||
def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True):
|
|
||||||
lines = []
|
|
||||||
with open(filepath, "r") as file:
|
|
||||||
lines = file.readlines()
|
|
||||||
|
|
||||||
for i in range(len(lines)):
|
|
||||||
line = lines[i].strip("\n ")
|
|
||||||
if line.startswith("#"): continue
|
|
||||||
vals = line.split("=")
|
|
||||||
if not len(vals) == 2:
|
|
||||||
if ignore_invalid_lines: continue
|
|
||||||
else: raise KeyError(f"Invalid line: '{line}'")
|
|
||||||
vals[0] = vals[0].strip(" ")
|
|
||||||
if not allow_new_keys and vals[0] not in settings.keys():
|
|
||||||
if ignore_invalid_lines: continue
|
|
||||||
else: raise KeyError(f"Invalid key: '{vals[0]}'")
|
|
||||||
if convert_to_type and not isinstance(settings[vals[0]], str|list|None):
|
|
||||||
if isinstance(settings[vals[0]], bool):
|
|
||||||
settings[vals[0]] = get_bool(vals[1].strip(" "), fallback=settings[vals[0]])
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" "))
|
|
||||||
except Exception as e:
|
|
||||||
if not ignore_invalid_lines: raise e
|
|
||||||
else: continue
|
|
||||||
else:
|
|
||||||
settings[vals[0]] = vals[1].strip(" ")
|
|
@ -1,40 +0,0 @@
|
|||||||
import sqlite3 as sql
|
|
||||||
"""Various utilities"""
|
|
||||||
def sanitize(s):
|
|
||||||
if type(s) != str: return s
|
|
||||||
return s\
|
|
||||||
.replace("''", "'").replace("'", r"''").strip(" ")
|
|
||||||
# .replace('"', r'\"')\
|
|
||||||
|
|
||||||
def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND") -> str:
|
|
||||||
c_str = ""
|
|
||||||
for name, val in constraints:
|
|
||||||
c_str += f"{name} = '{sanitize(val)}' {logic} "
|
|
||||||
return c_str.strip(logic + " ")
|
|
||||||
|
|
||||||
def sql_get_value_str(values: list[list]) -> str:
|
|
||||||
c_str = ""
|
|
||||||
for params in values:
|
|
||||||
c_str += "("
|
|
||||||
for p in params: c_str += f"'{sanitize(p)}', "
|
|
||||||
c_str = c_str.strip(", ") + "), "
|
|
||||||
return c_str.strip(", ")
|
|
||||||
|
|
||||||
def sql_exists(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND") -> bool:
|
|
||||||
cur.execute(f"SELECT EXISTS (SELECT 1 FROM {table} WHERE {sql_get_constaint_str(constraints, logic)})")
|
|
||||||
return cur.fetchone()[0] == 1
|
|
||||||
|
|
||||||
def sql_select(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND"):
|
|
||||||
cur.execute(f"SELECT * FROM {table} WHERE {sql_get_constaint_str(constraints, logic)}")
|
|
||||||
return cur.fetchall()
|
|
||||||
|
|
||||||
def sql_insert(cur: sql.Cursor, table: str, values: list[list]):
|
|
||||||
cur.execute(f"INSERT INTO {table} VALUES {sql_get_value_str(values)}")
|
|
||||||
|
|
||||||
def sql_tablesize(cur: sql.Cursor, table: str) -> int:
|
|
||||||
cur.execute(f"SELECT Count(*) FROM {table}")
|
|
||||||
return cur.fetchone()[0]
|
|
||||||
|
|
||||||
def sql_get_count_where(cur: sql.Cursor, table, constraints) -> int:
|
|
||||||
cur.execute(f"SELECT COUNT(*) FROM {table} WHERE {sql_get_constaint_str(constraints)}")
|
|
||||||
return cur.fetchone()[0]
|
|
@ -1,523 +0,0 @@
|
|||||||
import sqlite3 as sql
|
|
||||||
from sys import exit
|
|
||||||
from re import fullmatch
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from os.path import isdir
|
|
||||||
from datetime import datetime as dt
|
|
||||||
from .database import t_request, t_user, t_file, t_filegroup
|
|
||||||
from .sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where
|
|
||||||
"""
|
|
||||||
visualize information from the databse
|
|
||||||
TODO:
|
|
||||||
- bei referrers ähnliche zusammenlegen, z.b. www.google.de und https://google.com
|
|
||||||
"""
|
|
||||||
|
|
||||||
settings = {}
|
|
||||||
|
|
||||||
palette = {
|
|
||||||
"red": "#ee4035",
|
|
||||||
"orange": "#f37736",
|
|
||||||
"yellow": "#fdf458",
|
|
||||||
"green": "#7bc043",
|
|
||||||
"blue": "#0392cf",
|
|
||||||
"purple": "#b044a0",
|
|
||||||
}
|
|
||||||
color_settings_filetypes = {
|
|
||||||
palette["red"]: ["html"],
|
|
||||||
palette["green"]: ["jpg", "png", "jpeg", "gif", "svg", "webp"],
|
|
||||||
palette["yellow"]: ["css"],
|
|
||||||
"grey": ["txt"]
|
|
||||||
}
|
|
||||||
color_settings_alternate = list(palette.values())
|
|
||||||
|
|
||||||
color_settings_browsers = {
|
|
||||||
palette["red"]: ["Safari"],
|
|
||||||
palette["orange"]: ["Firefox"],
|
|
||||||
palette["yellow"]: ["Chrome"],
|
|
||||||
"grey": ["Edge"],
|
|
||||||
palette["green"]: ["Chromium"],
|
|
||||||
palette["purple"]: ["Brave"]
|
|
||||||
}
|
|
||||||
color_settings_operating_systems = {
|
|
||||||
palette["red"]: ["Macintosh"],
|
|
||||||
palette["green"]: ["Android"],
|
|
||||||
"grey": ["iPhone", "iPad"],
|
|
||||||
palette["yellow"]: ["Linux"],
|
|
||||||
palette["purple"]: ["BSD"],
|
|
||||||
palette["blue"]: ["Windows"],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def len_list_list(l: list[list]):
|
|
||||||
size = 0
|
|
||||||
for i in range(len(l)):
|
|
||||||
size += len(l[i])
|
|
||||||
return size
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# FILTERS
|
|
||||||
#
|
|
||||||
def get_os_browser_mobile_rankings(cur: sql.Cursor, user_ids: list[int]):
|
|
||||||
"""
|
|
||||||
returns [(count, operating_system)], [(count, browser)], mobile_user_percentage
|
|
||||||
"""
|
|
||||||
os_ranking = {}
|
|
||||||
os_count = 0.0
|
|
||||||
browser_ranking = {}
|
|
||||||
browser_count = 0.0
|
|
||||||
mobile_ranking = { True: 0.0, False: 0.0 }
|
|
||||||
for user_id in user_ids:
|
|
||||||
cur.execute(f"SELECT platform,browser,mobile FROM {t_user} WHERE user_id = {user_id}")
|
|
||||||
os, browser, mobile = cur.fetchone()
|
|
||||||
mobile = bool(mobile)
|
|
||||||
if os:
|
|
||||||
if os in os_ranking: os_ranking[os] += 1
|
|
||||||
else: os_ranking[os] = 1
|
|
||||||
os_count += 1
|
|
||||||
if browser:
|
|
||||||
if browser in browser_ranking: browser_ranking[browser] += 1
|
|
||||||
else: browser_ranking[browser] = 1
|
|
||||||
browser_count += 1
|
|
||||||
if (os or browser):
|
|
||||||
mobile_ranking[mobile] += 1
|
|
||||||
try:
|
|
||||||
mobile_user_percentage = mobile_ranking[True] / (mobile_ranking[True] + mobile_ranking[False])
|
|
||||||
except ZeroDivisionError:
|
|
||||||
mobile_user_percentage = 0.0
|
|
||||||
|
|
||||||
os_ranking = [(c * 100/os_count, n) for n, c in os_ranking.items()]
|
|
||||||
os_ranking.sort()
|
|
||||||
browser_ranking = [(c * 100/browser_count, n) for n, c in browser_ranking.items()]
|
|
||||||
browser_ranking.sort()
|
|
||||||
return os_ranking, browser_ranking, mobile_user_percentage*100
|
|
||||||
|
|
||||||
#
|
|
||||||
# GETTERS
|
|
||||||
#
|
|
||||||
def get_where_date_str(at_date=None, min_date=None, max_date=None):
|
|
||||||
# dates in unix time
|
|
||||||
s = ""
|
|
||||||
if at_date is not None:
|
|
||||||
if isinstance(at_date, str):
|
|
||||||
s += f"DATE(date, 'unixepoch') = '{sanitize(at_date)}' AND "
|
|
||||||
elif isinstance(at_date, int|float):
|
|
||||||
s += f"date = {int(at_date)} AND "
|
|
||||||
else:
|
|
||||||
print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
|
|
||||||
if min_date is not None:
|
|
||||||
if isinstance(min_date, str):
|
|
||||||
s += f"DATE(date, 'unixepoch') >= '{sanitize(min_date)}' AND "
|
|
||||||
elif isinstance(min_date, int|float):
|
|
||||||
s += f"date >= {int(min_date)} AND "
|
|
||||||
else:
|
|
||||||
print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
|
|
||||||
if max_date is not None:
|
|
||||||
if isinstance(max_date, str):
|
|
||||||
s += f"DATE(date, 'unixepoch') <= '{sanitize(max_date)}' AND "
|
|
||||||
elif isinstance(max_date, int|float):
|
|
||||||
s += f"date <= {int(max_date)} AND "
|
|
||||||
else:
|
|
||||||
print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
|
|
||||||
if s == "":
|
|
||||||
print(f"WARNING: get_where_date_str: no date_str generated. Returing 'date > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
|
|
||||||
return "date > 0"
|
|
||||||
return s.removesuffix(" AND ")
|
|
||||||
|
|
||||||
|
|
||||||
# get the earliest date
|
|
||||||
def get_earliest_date(cur: sql.Cursor) -> int:
|
|
||||||
"""return the earliest time as unixepoch"""
|
|
||||||
cur.execute(f"SELECT MIN(date) FROM {t_request}")
|
|
||||||
return cur.fetchone()[0]
|
|
||||||
# get the latest date
|
|
||||||
def get_latest_date(cur: sql.Cursor) -> int:
|
|
||||||
"""return the latest time as unixepoch"""
|
|
||||||
cur.execute(f"SELECT MAX(date) FROM {t_request}")
|
|
||||||
return cur.fetchone()[0]
|
|
||||||
# get all dates
|
|
||||||
# the date:str parameter in all these function must be a sqlite constraint
|
|
||||||
def get_days(cur: sql.Cursor, date:str) -> list[str]:
|
|
||||||
"""get a list of all dates in yyyy-mm-dd format"""
|
|
||||||
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request} WHERE {date}")
|
|
||||||
return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, )
|
|
||||||
|
|
||||||
def get_months(cur: sql.Cursor, date:str) -> list[str]:
|
|
||||||
"""get a list of all dates in yyyy-mm format"""
|
|
||||||
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request} WHERE {date}")
|
|
||||||
dates = get_days(cur, date)
|
|
||||||
date_dict = {}
|
|
||||||
for date in dates:
|
|
||||||
date_without_day = date[0:date.rfind('-')]
|
|
||||||
date_dict[date_without_day] = 0
|
|
||||||
return list(date_dict.keys())
|
|
||||||
|
|
||||||
|
|
||||||
def get_user_agent(cur: sql.Cursor, user_id: int):
|
|
||||||
return sql_select(cur, t_user, [("user_id", user_id)])[0][2]
|
|
||||||
|
|
||||||
def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
|
|
||||||
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE {date}")
|
|
||||||
return [ user_id[0] for user_id in cur.fetchall() ]
|
|
||||||
|
|
||||||
def get_human_users(cur: sql.Cursor, unique_user_ids):
|
|
||||||
human_user_ids = []
|
|
||||||
for user_id in unique_user_ids:
|
|
||||||
cur.execute(f"SELECT EXISTS (SELECT 1 FROM {t_user} WHERE user_id = {user_id} AND platform IS NOT NULL AND browser IS NOT NULL)")
|
|
||||||
if cur.fetchone()[0] == 1:
|
|
||||||
human_user_ids.append(user_id)
|
|
||||||
return human_user_ids
|
|
||||||
|
|
||||||
def get_unique_request_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
|
|
||||||
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date}")
|
|
||||||
return [ request_id[0] for request_id in cur.fetchall() ]
|
|
||||||
|
|
||||||
def get_unique_request_ids_for_date_and_user(cur: sql.Cursor, date:str, user_id: int) -> list[int]:
|
|
||||||
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date} AND user_id = {user_id}")
|
|
||||||
return [ request_id[0] for request_id in cur.fetchall() ]
|
|
||||||
|
|
||||||
# get number of requests per day
|
|
||||||
def get_request_count_for_date(cur: sql.Cursor, date:str) -> int:
|
|
||||||
cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE {date}")
|
|
||||||
return cur.fetchone()[0]
|
|
||||||
|
|
||||||
def get_unique_user_count(cur: sql.Cursor) -> int:
|
|
||||||
return sql_tablesize(cur, t_user)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# RANKINGS
|
|
||||||
#
|
|
||||||
def get_file_ranking(cur: sql.Cursor, date:str) -> list[tuple[int, str]]:
|
|
||||||
global settings
|
|
||||||
"""
|
|
||||||
:returns [(request_count, filename)]
|
|
||||||
"""
|
|
||||||
ranking = []
|
|
||||||
cur.execute(f"SELECT DISTINCT group_id FROM {t_filegroup}")
|
|
||||||
for group in cur.fetchall():
|
|
||||||
group = group[0]
|
|
||||||
filename = sql_select(cur, t_file, [("group_id", group)])
|
|
||||||
if len(filename) == 0: continue
|
|
||||||
filename = filename[0][0]
|
|
||||||
if settings["file_ranking_regex_whitelist"]:
|
|
||||||
if not fullmatch(settings["file_ranking_regex_whitelist"], filename):
|
|
||||||
continue
|
|
||||||
# ranking.append((sql_get_count_where(cur, t_request, [("group_id", group)]), filename))
|
|
||||||
cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE group_id = {group} AND {date}")
|
|
||||||
ranking.append((cur.fetchone()[0], filename))
|
|
||||||
ranking.sort()
|
|
||||||
# print(ranking)
|
|
||||||
return ranking
|
|
||||||
|
|
||||||
def get_user_agent_ranking(cur: sql.Cursor, date:str) -> list[tuple[int, str]]:
|
|
||||||
"""
|
|
||||||
:returns [(request_count, user_agent)]
|
|
||||||
"""
|
|
||||||
ranking = []
|
|
||||||
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE {date}")
|
|
||||||
for user_id in cur.fetchall():
|
|
||||||
user_id = user_id[0]
|
|
||||||
user_agent = sql_select(cur, t_user, [("user_id", user_id)])
|
|
||||||
if len(user_agent) == 0: continue
|
|
||||||
user_agent = user_agent[0][2]
|
|
||||||
if settings["user_agent_ranking_regex_whitelist"]:
|
|
||||||
if not fullmatch(settings["user_agent_ranking_regex_whitelist"], user_agent):
|
|
||||||
continue
|
|
||||||
# ranking.append((sql_get_count_where(cur, t_request, [("group_id", group)]), filename))
|
|
||||||
cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE user_id = {user_id} AND {date}")
|
|
||||||
ranking.append((cur.fetchone()[0], user_agent))
|
|
||||||
ranking.sort()
|
|
||||||
# print(ranking)
|
|
||||||
return ranking
|
|
||||||
|
|
||||||
def get_ranking(field_name: str, table: str, whitelist_regex: str, cur: sql.Cursor, date:str) -> list[tuple[int, str]]:
|
|
||||||
"""
|
|
||||||
1) get all the distinct entries for field_name after min_date_unix_time
|
|
||||||
2) call get_name_function with the distinct entry
|
|
||||||
3) for every entry, get the count in table after min_date_unix_time
|
|
||||||
3) sort by count in ascending order
|
|
||||||
:returns [(request_count, name)]
|
|
||||||
"""
|
|
||||||
ranking = []
|
|
||||||
cur.execute(f"SELECT DISTINCT {field_name} FROM {table} WHERE {date}")
|
|
||||||
for name in cur.fetchall():
|
|
||||||
name = name[0]
|
|
||||||
if whitelist_regex:
|
|
||||||
if not fullmatch(whitelist_regex, name):
|
|
||||||
continue
|
|
||||||
# ranking.append((sql_get_count_where(cur, t_request, [("group_id", group)]), filename))
|
|
||||||
cur.execute(f"SELECT COUNT(*) FROM {table} WHERE {field_name} = '{name}' AND {date}")
|
|
||||||
ranking.append((cur.fetchone()[0], name))
|
|
||||||
ranking.sort()
|
|
||||||
# print(ranking)
|
|
||||||
return ranking
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# PLOTTING
|
|
||||||
#
|
|
||||||
def add_vertikal_labels_in_bar_plot(labels, max_y_val, ax, bar_plot):
|
|
||||||
for idx,rect in enumerate(bar_plot):
|
|
||||||
height = rect.get_height()
|
|
||||||
if height > 0.8 * max_y_val:
|
|
||||||
height = 0.05 * max_y_val
|
|
||||||
ax.text(rect.get_x() + rect.get_width()/2., height + 0.025 * max_y_val,
|
|
||||||
labels[idx],
|
|
||||||
ha='center', va='bottom', rotation=90)
|
|
||||||
|
|
||||||
def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[]):
|
|
||||||
"""
|
|
||||||
make a bar plot of the most requested files
|
|
||||||
"""
|
|
||||||
if not fig:
|
|
||||||
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
|
||||||
# create new axis if none is given
|
|
||||||
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
|
||||||
# fill x y data
|
|
||||||
if len(ranking) > settings["file_ranking_plot_max_files"]:
|
|
||||||
start_index = len(ranking) - settings["file_ranking_plot_max_files"]
|
|
||||||
else: start_index = 0
|
|
||||||
x_names = []
|
|
||||||
y_counts = []
|
|
||||||
colors = []
|
|
||||||
for i in range(start_index, len(ranking)):
|
|
||||||
x_names.append(ranking[i][1])
|
|
||||||
y_counts.append(ranking[i][0])
|
|
||||||
ft = ranking[i][1].split(".")[-1]
|
|
||||||
color = "blue"
|
|
||||||
if not color_settings: color = "blue"
|
|
||||||
elif isinstance(color_settings, dict):
|
|
||||||
for key, val in color_settings.items():
|
|
||||||
if ft in val: color = key
|
|
||||||
if not color: color = "blue"
|
|
||||||
elif isinstance(color_settings, list):
|
|
||||||
# print(color_settings, (i - start_index) % len(color_settings))
|
|
||||||
color = color_settings[(i - start_index) % len(color_settings)]
|
|
||||||
colors.append(color)
|
|
||||||
bar = ax.bar(x_names, y_counts, tick_label="", color=colors)
|
|
||||||
add_vertikal_labels_in_bar_plot(x_names, y_counts[-1], ax, bar)
|
|
||||||
# ax.ylabel(y_counts)
|
|
||||||
return fig
|
|
||||||
|
|
||||||
|
|
||||||
def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue"):
|
|
||||||
if not fig:
|
|
||||||
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
|
||||||
if not ax:
|
|
||||||
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
|
||||||
else:
|
|
||||||
ax = ax.twinx()
|
|
||||||
ax.set_ylabel(ylabel)
|
|
||||||
# ax.tick_params(axis="y", labelcolor="r")
|
|
||||||
ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
|
|
||||||
if label: ax.legend()
|
|
||||||
# if xlim:
|
|
||||||
# if xlim[0] != xlim[1]:
|
|
||||||
# ax.set_xlim(*xlim)
|
|
||||||
|
|
||||||
# if ylim:
|
|
||||||
# if ylim[0] != ylim[1]:
|
|
||||||
# ax.set_ylim(*ylim)
|
|
||||||
return fig, ax
|
|
||||||
|
|
||||||
def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major"):
|
|
||||||
if not fig:
|
|
||||||
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
|
||||||
if not (ax1 and ax2):
|
|
||||||
ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
|
|
||||||
ax2 = ax1.twinx()
|
|
||||||
ax2.set_ylabel(ylabel2)
|
|
||||||
# ax.tick_params(axis="y", labelcolor="r")
|
|
||||||
plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
|
|
||||||
plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
|
|
||||||
# if label1 or label2: ax1.legend()
|
|
||||||
if plots: plots += plot1 + plot2
|
|
||||||
else: plots = plot1 + plot2
|
|
||||||
plt.legend(plots, [ l.get_label() for l in plots])
|
|
||||||
|
|
||||||
if grid == "major" or grid == "minor" or grid == "both":
|
|
||||||
if grid == "minor" or "both":
|
|
||||||
ax1.minorticks_on()
|
|
||||||
ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
|
|
||||||
|
|
||||||
# if xlim:
|
|
||||||
# if xlim[0] != xlim[1]:
|
|
||||||
# ax.set_xlim(*xlim)
|
|
||||||
|
|
||||||
# if ylim:
|
|
||||||
# if ylim[0] != ylim[1]:
|
|
||||||
# ax.set_ylim(*ylim)
|
|
||||||
return fig, ax1, ax2, plots
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# MAIN
|
|
||||||
#
|
|
||||||
def missing_arg_val(arg):
|
|
||||||
print("Missing argument for", arg)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
def missing_arg(arg):
|
|
||||||
print("Missing ", arg)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
def visualize(loaded_settings: dict):
|
|
||||||
global settings
|
|
||||||
settings = loaded_settings
|
|
||||||
if not settings["db"]: missing_arg("db")
|
|
||||||
if not settings["server-name"]: missing_arg("server-name")
|
|
||||||
|
|
||||||
img_dir = settings["img_dir"]
|
|
||||||
img_filetype = settings["img_filetype"]
|
|
||||||
img_location = settings["img_location"]
|
|
||||||
names = {
|
|
||||||
# paths
|
|
||||||
"img_file_ranking_last_x_days": f"ranking_all_time_files_last_x_days.{img_filetype}",
|
|
||||||
"img_referer_ranking_last_x_days": f"ranking_all_time_referers_last_x_days.{img_filetype}",
|
|
||||||
"img_browser_ranking_last_x_days": f"ranking_all_time_browsers_last_x_days.{img_filetype}",
|
|
||||||
"img_operating_system_ranking_last_x_days": f"ranking_all_time_operating_systems_last_x_days.{img_filetype}",
|
|
||||||
"img_users_and_requests_last_x_days": f"user_request_count_daily_last_x_days.{img_filetype}",
|
|
||||||
|
|
||||||
"img_file_ranking_total": f"ranking_all_time_files_total.{img_filetype}",
|
|
||||||
"img_referer_ranking_total": f"ranking_all_time_referers_total.{img_filetype}",
|
|
||||||
"img_browser_ranking_total": f"ranking_all_time_browsers_total.{img_filetype}",
|
|
||||||
"img_operating_system_ranking_total": f"ranking_all_time_operating_systems_total.{img_filetype}",
|
|
||||||
"img_users_and_requests_total": f"user_request_count_daily_total.{img_filetype}",
|
|
||||||
# values
|
|
||||||
"mobile_user_percentage_total": 0.0,
|
|
||||||
"mobile_user_percentage_last_x_days": 0.0,
|
|
||||||
"user_count_last_x_days": 0,
|
|
||||||
"user_count_total": 0,
|
|
||||||
"request_count_last_x_days": 0,
|
|
||||||
"request_count_total": 0,
|
|
||||||
"human_user_percentage_last_x_days": 0,
|
|
||||||
"human_user_percentage_total": 0,
|
|
||||||
"human_request_percentage_last_x_days": 0,
|
|
||||||
"human_request_percentage_total": 0,
|
|
||||||
# general
|
|
||||||
"regina_version": settings["version"],
|
|
||||||
"server-name": settings["server-name"],
|
|
||||||
"last_x_days": settings["last_x_days"], # must be after all the things with last_x_days!
|
|
||||||
"earliest_date": "1990-1-1",
|
|
||||||
"generation_date": "1990-1-1 0:0:0",
|
|
||||||
}
|
|
||||||
|
|
||||||
conn = sql.connect(settings["db"])
|
|
||||||
if isdir(img_dir) and img_filetype:
|
|
||||||
gen_img = True
|
|
||||||
else:
|
|
||||||
print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
|
|
||||||
gen_img = False
|
|
||||||
cur = conn.cursor()
|
|
||||||
|
|
||||||
get_humans = settings["get-human-percentage"]
|
|
||||||
# DATE STRINGS
|
|
||||||
names["earliest_date"] = dt.fromtimestamp(get_earliest_date(cur)).strftime("%Y-%m-%d")
|
|
||||||
names["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
# LAST_X_DAYS
|
|
||||||
# last_x_days_min_date: latest_date - last_x_days
|
|
||||||
secs_per_day = 86400
|
|
||||||
last_x_days_min_date = get_latest_date(cur) - settings["last_x_days"] * secs_per_day
|
|
||||||
last_x_days_str = get_where_date_str(min_date=last_x_days_min_date)
|
|
||||||
days = get_days(cur, last_x_days_str)
|
|
||||||
days_strs = [get_where_date_str(at_date=day) for day in days]
|
|
||||||
|
|
||||||
|
|
||||||
# ALL DATES
|
|
||||||
all_time_str = get_where_date_str(min_date=0)
|
|
||||||
# all months in yyyy-mm format
|
|
||||||
months_all_time = get_months(cur, all_time_str)
|
|
||||||
# sqlite constrict to month string
|
|
||||||
months_strs = []
|
|
||||||
for year_month in months_all_time:
|
|
||||||
year, month = year_month.split("-")
|
|
||||||
# first day of the month
|
|
||||||
min_date = dt(int(year), int(month), 1).timestamp()
|
|
||||||
month = (int(month) % 12) + 1 # + 1 month
|
|
||||||
year = int(year)
|
|
||||||
if month == 1: year += 1
|
|
||||||
# first day of the next month - 1 sec
|
|
||||||
max_date = dt(year, month, 1).timestamp() - 1
|
|
||||||
months_strs.append(get_where_date_str(min_date=min_date, max_date=max_date))
|
|
||||||
|
|
||||||
for i in range(2):
|
|
||||||
suffix = ["_total", "_last_x_days"][i]
|
|
||||||
date_str = [all_time_str, last_x_days_str][i]
|
|
||||||
date_names = [months_all_time, days][i]
|
|
||||||
date_strs = [months_strs, days_strs][i]
|
|
||||||
assert(len(date_names) == len(date_strs))
|
|
||||||
|
|
||||||
# FILES
|
|
||||||
file_ranking = get_file_ranking(cur, date_str)
|
|
||||||
if gen_img:
|
|
||||||
fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes)
|
|
||||||
fig_file_ranking.savefig(f"{img_dir}/{names[f'img_file_ranking{suffix}']}")
|
|
||||||
|
|
||||||
# REFERER
|
|
||||||
referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur, date_str)
|
|
||||||
if gen_img:
|
|
||||||
fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate)
|
|
||||||
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_referer_ranking{suffix}']}")
|
|
||||||
|
|
||||||
# USER
|
|
||||||
# user_agent_ranking = get_user_agent_ranking(cur, date_str)
|
|
||||||
# for the time span
|
|
||||||
unique_user_ids = get_unique_user_ids_for_date(cur, date_str)
|
|
||||||
unique_user_ids_human = get_human_users(cur, unique_user_ids)
|
|
||||||
# for each date
|
|
||||||
unique_user_ids_dates: list[list[int]] = []
|
|
||||||
unique_request_ids_dates: list[list[int]] = []
|
|
||||||
unique_user_ids_human_dates: list[list[int]] = []
|
|
||||||
unique_request_ids_human_dates: list[list[int]] = []
|
|
||||||
for i in range(len(date_strs)):
|
|
||||||
date_str_ = date_strs[i]
|
|
||||||
unique_user_ids_dates.append(get_unique_user_ids_for_date(cur, date_str_))
|
|
||||||
unique_request_ids_dates.append(get_unique_request_ids_for_date(cur, date_str_))
|
|
||||||
if get_humans:
|
|
||||||
unique_user_ids_human_dates.append(get_human_users(cur, unique_user_ids_dates[-1]))
|
|
||||||
unique_request_ids_human_dates.append([])
|
|
||||||
for human in unique_user_ids_human_dates[-1]:
|
|
||||||
unique_request_ids_human_dates[-1] += get_unique_request_ids_for_date_and_user(cur, date_str_, human)
|
|
||||||
# print("\n\tuu", unique_user_ids_dates, "\n\tur",unique_request_ids_dates, "\n\tuuh", unique_user_ids_human_dates, "\n\turh", unique_request_ids_human_dates)
|
|
||||||
if get_humans:
|
|
||||||
try:
|
|
||||||
names[f"human_user_percentage{suffix}"] = round(100 * len_list_list(unique_user_ids_human_dates) / len_list_list(unique_user_ids_dates), 2)
|
|
||||||
names[f"human_request_percentage{suffix}"] = round(100 * len_list_list(unique_request_ids_human_dates) / len_list_list(unique_request_ids_dates), 2)
|
|
||||||
except: pass
|
|
||||||
names[f"user_count{suffix}"] = len_list_list(unique_user_ids_dates)
|
|
||||||
names[f"request_count{suffix}"] = len_list_list(unique_request_ids_dates)
|
|
||||||
if gen_img:
|
|
||||||
fig_daily, ax1, ax2, plots = plot2y(date_names, [len(user_ids) for user_ids in unique_user_ids_dates], [len(request_ids) for request_ids in unique_request_ids_dates], xlabel="Date", ylabel1="User count", label1="Unique users", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"])
|
|
||||||
if get_humans:
|
|
||||||
fig_daily, ax1, ax2, plots = plot2y(date_names, [len(user_ids) for user_ids in unique_user_ids_human_dates], [len(request_ids) for request_ids in unique_request_ids_human_dates], label1="Unique users (human)", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots)
|
|
||||||
fig_daily.savefig(f"{img_dir}/{names[f'img_users_and_requests{suffix}']}")
|
|
||||||
|
|
||||||
# os & browser
|
|
||||||
os_ranking, browser_ranking, names[f"mobile_user_percentage{suffix}"] = get_os_browser_mobile_rankings(cur, unique_user_ids_human)
|
|
||||||
if gen_img:
|
|
||||||
fig_os_rating = plot_ranking(os_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_operating_systems)
|
|
||||||
fig_os_rating.savefig(f"{img_dir}/{names[f'img_operating_system_ranking{suffix}']}")
|
|
||||||
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Share [%]", color_settings=color_settings_browsers)
|
|
||||||
fig_browser_rating.savefig(f"{img_dir}/{names[f'img_browser_ranking{suffix}']}")
|
|
||||||
|
|
||||||
# print("File Ranking", file_ranking)
|
|
||||||
# print("referer Ranking", referer_ranking)
|
|
||||||
# print("user agent ranking", user_agent_ranking)
|
|
||||||
# print("Unique Users:", get_unique_user_count(cur))
|
|
||||||
# fig_daily, ax_daily_users = plot(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], xlabel="Datum", ylabel="Einzigartige Nutzer", label="Einzigartige Nutzer", color="blue")
|
|
||||||
# fig_daily, ax_daily_requests = plot(dates, [len(request_ids) for request_ids in unique_request_ids_for_dates], fig=fig_daily, ax=ax_daily_users, xlabel="Datum", ylabel="Einzigartige Anfragen", label="Einzigartige Anfragen", color="orange")
|
|
||||||
# fig_daily.savefig(f"{img_dir}/daily.{img_filetype}")
|
|
||||||
# print("OS ranking", os_ranking)
|
|
||||||
# print("Browser ranking", browser_ranking)
|
|
||||||
# print("Mobile percentage", names["mobile_user_percentage"])
|
|
||||||
if settings["template_html"] and settings["html_out_path"]:
|
|
||||||
with open(settings["template_html"], "r") as file:
|
|
||||||
html = file.read()
|
|
||||||
for name, value in names.items():
|
|
||||||
if "img" in name:
|
|
||||||
value = f"{img_location}/{value}"
|
|
||||||
html = html.replace(f"%{name}", str(value))
|
|
||||||
with open(settings["html_out_path"], "w") as file:
|
|
||||||
file.write(html)
|
|
Loading…
Reference in New Issue
Block a user