wip
This commit is contained in:
parent
ea5189584e
commit
1ea034de17
138
regina/main.py
138
regina/main.py
@ -2,30 +2,36 @@ from enum import auto
|
|||||||
from collect import parse_log, add_requests_to_db
|
from collect import parse_log, add_requests_to_db
|
||||||
from sys import argv, exit
|
from sys import argv, exit
|
||||||
from database import create_db
|
from database import create_db
|
||||||
from os.path import isfile
|
from os.path import isfile, isdir
|
||||||
|
from visualize import visualize
|
||||||
|
from settings_manager import read_settings_file
|
||||||
|
|
||||||
def parse_config_file(path):
|
# default settings, these are overwriteable through a config file
|
||||||
server_name =""
|
settings = {
|
||||||
access_log_path = ""
|
# GENERAL
|
||||||
db_path = ""
|
"server-name": "",
|
||||||
filegroups = ""
|
# DATA COLLECTION
|
||||||
locs_and_dirs = []
|
"access-log": "",
|
||||||
auto_group_filetypes = []
|
"db": "",
|
||||||
with open(path, "r") as file:
|
"locs-and-dirs": [],
|
||||||
lines = file.readlines()
|
"auto-group-filetypes": [],
|
||||||
for line in lines:
|
"filegroups": "",
|
||||||
line = line.strip("\n ")
|
|
||||||
if line.startswith("#"): continue
|
# VISUALIZATION
|
||||||
arg, val = line.split("=")
|
"get-human-percentage": False,
|
||||||
arg = arg.strip(" ")
|
# "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
|
||||||
val = val.strip(" ")
|
"file_ranking_regex_whitelist": r".*\.(html)",
|
||||||
if arg == "server-name": server_name = val
|
"referer_ranking_regex_whitelist": r"^[^\-].*", # minus means empty
|
||||||
elif arg == "log": access_log_path = val
|
"user_agent_ranking_regex_whitelist": r"",
|
||||||
elif arg == "db": db_path = val
|
"file_ranking_plot_max_files": 15,
|
||||||
elif arg == "filegroups": filegroups = val
|
# "plot_figsize": (60, 40),
|
||||||
elif arg == "locs-and-dirs": locs_and_dirs = val
|
"plot_dpi": 300,
|
||||||
elif arg == "auto-group-filetypes": auto_group_filetypes = val
|
"img_dir": "",
|
||||||
return server_name, access_log_path, db_path, filegroups, locs_and_dirs, auto_group_filetypes
|
"img_filetype": "svg",
|
||||||
|
"template_html": "",
|
||||||
|
"html_out_path": "",
|
||||||
|
"last_x_days": 30,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def help():
|
def help():
|
||||||
@ -33,12 +39,13 @@ def help():
|
|||||||
--server-name string
|
--server-name string
|
||||||
--log path to the access.log
|
--log path to the access.log
|
||||||
--db name of the database
|
--db name of the database
|
||||||
--filegroups string describing filegroups, eg 'name1: file1, file2; name2: file3, file4, file5;'
|
--settings["filegroups"] string describing settings["filegroups"], eg 'name1: file1, file2; name2: file3, file4, file5;'
|
||||||
--auto-group-filetypes comma separated list of filetypes, eg 'css,png,gif'
|
--auto-group-filetypes comma separated list of filetypes, eg 'css,png,gif'
|
||||||
--locs-and_dirs comma separated list of nginx_location:directory pairs, eg '/:/www/website'
|
--locs-and_dirs comma separated list of nginx_location:directory pairs, eg '/:/www/website'
|
||||||
--config-file path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line
|
--config-file path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line
|
||||||
"""
|
"""
|
||||||
print(helpstring)
|
print(helpstring)
|
||||||
|
|
||||||
def missing_arg_val(arg):
|
def missing_arg_val(arg):
|
||||||
print("Missing argument for", arg)
|
print("Missing argument for", arg)
|
||||||
exit(1)
|
exit(1)
|
||||||
@ -46,62 +53,53 @@ def missing_arg_val(arg):
|
|||||||
def missing_arg(arg):
|
def missing_arg(arg):
|
||||||
print("Missing ", arg)
|
print("Missing ", arg)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
def error(arg):
|
||||||
|
print("Error:", arg)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
server_name =""
|
|
||||||
access_log_path = ""
|
|
||||||
db_path = ""
|
|
||||||
config_file = ""
|
config_file = ""
|
||||||
filegroups = ""
|
collect = False
|
||||||
auto_group_filetypes =[]
|
visualize_ = False
|
||||||
locs_and_dirs = []
|
|
||||||
# parse args
|
# parse args
|
||||||
i = 1
|
i = 1
|
||||||
while i in range(1, len(argv)):
|
while i in range(1, len(argv)):
|
||||||
if argv[i] == "--server-name":
|
if argv[i] == "--config":
|
||||||
if len(argv) > i + 1: server_name = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--db":
|
|
||||||
if len(argv) > i + 1: db_path = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--log":
|
|
||||||
if len(argv) > i + 1: access_log_path = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--config":
|
|
||||||
if len(argv) > i + 1: config_file = argv[i+1]
|
if len(argv) > i + 1: config_file = argv[i+1]
|
||||||
else: missing_arg_val(argv[i])
|
else: missing_arg_val(argv[i])
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--filegroups":
|
|
||||||
if len(argv) > i + 1: filegroups = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--auto-group-filetypes":
|
|
||||||
if len(argv) > i + 1: auto_group_filetypes = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--locs-and-dirs":
|
|
||||||
if len(argv) > i + 1: locs_and_dirs = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--help":
|
elif argv[i] == "--help":
|
||||||
help()
|
help()
|
||||||
exit(0)
|
exit(0)
|
||||||
|
elif argv[i] == "--collect":
|
||||||
|
collect = True
|
||||||
|
exit(0)
|
||||||
|
elif argv[i] == "--visualize":
|
||||||
|
visualize_ = True
|
||||||
else:
|
else:
|
||||||
i += 1
|
pass
|
||||||
|
i += 1
|
||||||
|
if not collect and not visualize_:
|
||||||
|
missing_arg("--visualize or --collect")
|
||||||
|
|
||||||
if config_file:
|
if not config_file:
|
||||||
server_name, access_log_path, db_path, filegroups, locs_and_dirs, auto_group_filetypes = parse_config_file(config_file)
|
missing_arg("--config_file")
|
||||||
|
if not isfile(config_file):
|
||||||
|
error(f"Not a file: '{config_file}'")
|
||||||
|
read_settings_file(config_file, settings)
|
||||||
|
|
||||||
if not server_name: missing_arg("--server-name")
|
if not settings["server-name"]: missing_arg("server-name")
|
||||||
if not access_log_path: missing_arg("--log")
|
if not settings["access-log"]: missing_arg("log")
|
||||||
if not db_path: missing_arg("--db")
|
if not settings["db"]: missing_arg("db")
|
||||||
if type(auto_group_filetypes) == str:
|
if type(settings["auto-group-filetypes"]) == str:
|
||||||
auto_group_filetypes = auto_group_filetypes.split(",")
|
settings["auto-group-filetypes"] = settings["auto-group-filetypes"].split(",")
|
||||||
if type(locs_and_dirs) == str:
|
if type(settings["locs-and-dirs"]) == str:
|
||||||
locs_and_dirs = [ loc_and_dir.split(":") for loc_and_dir in locs_and_dirs.split(",") ]
|
settings["locs-and-dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs-and-dirs"].split(",") ]
|
||||||
if not isfile(db_path):
|
if collect:
|
||||||
create_db(db_path, filegroups, locs_and_dirs, auto_group_filetypes)
|
if not isfile(settings["db"]):
|
||||||
requests = parse_log(access_log_path)
|
create_db(settings["db"], settings["filegroups"], settings["locs-and-dirs"], settings["auto-group-filetypes"])
|
||||||
add_requests_to_db(requests, db_path)
|
requests = parse_log(settings["access-log"])
|
||||||
|
add_requests_to_db(requests, settings["db"])
|
||||||
|
if visualize:
|
||||||
|
if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'")
|
||||||
|
visualize(settings)
|
||||||
|
@ -1,4 +1,9 @@
|
|||||||
|
|
||||||
|
def get_bool(bool_str: str, fallback=False):
|
||||||
|
if bool_str in ["true", "True"]: return True
|
||||||
|
elif bool_str in ["false", "False"]: return False
|
||||||
|
return fallback
|
||||||
|
|
||||||
def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True):
|
def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True):
|
||||||
lines = []
|
lines = []
|
||||||
with open(filepath, "r") as file:
|
with open(filepath, "r") as file:
|
||||||
@ -16,6 +21,9 @@ def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True,
|
|||||||
if ignore_invalid_lines: continue
|
if ignore_invalid_lines: continue
|
||||||
else: raise KeyError(f"Invalid key: '{vals[0]}'")
|
else: raise KeyError(f"Invalid key: '{vals[0]}'")
|
||||||
if convert_to_type and type(settings[vals[0]]) not in [str, None]:
|
if convert_to_type and type(settings[vals[0]]) not in [str, None]:
|
||||||
|
if type(settings[vals[0]]) == bool:
|
||||||
|
settings[vals[0]] = get_bool(vals[1].strip(" "), fallback=settings[vals[0]])
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" "))
|
settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" "))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -5,69 +5,59 @@ from sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize
|
|||||||
from re import fullmatch, findall
|
from re import fullmatch, findall
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import matplotlib as mpl
|
import matplotlib as mpl
|
||||||
|
from os.path import isdir
|
||||||
|
"""
|
||||||
|
TODO:
|
||||||
|
- bei referrers ähnliche zusammenlegen, z.b. www.google.de und https://google.com
|
||||||
|
"""
|
||||||
|
|
||||||
settings = {
|
settings = {}
|
||||||
# "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
|
|
||||||
"file_ranking_regex_whitelist": r".*\.(html)",
|
palette = {
|
||||||
"referer_ranking_regex_whitelist": r"",
|
"red": "#ee4035",
|
||||||
"user_agent_ranking_regex_whitelist": r"",
|
"orange": "#f37736",
|
||||||
"file_ranking_plot_max_files": 15,
|
"yellow": "#fdf458",
|
||||||
# "plot_figsize": (60, 40),
|
"green": "#7bc043",
|
||||||
"plot_dpi": 300,
|
"blue": "#0392cf",
|
||||||
"plot_tight_layout": False,
|
"purple": "#b044a0",
|
||||||
"plot_constrained_layout": False,
|
|
||||||
}
|
}
|
||||||
color_settings_filetypes = {
|
color_settings_filetypes = {
|
||||||
"red": ["html"],
|
palette["red"]: ["html"],
|
||||||
"green": ["jpg", "png", "jpeg", "gif", "svg", "webp"],
|
palette["green"]: ["jpg", "png", "jpeg", "gif", "svg", "webp"],
|
||||||
"yellow": ["css"],
|
palette["yellow"]: ["css"],
|
||||||
"grey": ["txt"]
|
"grey": ["txt"]
|
||||||
}
|
}
|
||||||
|
color_settings_alternate = list(palette.values())
|
||||||
|
|
||||||
img_ft = "svg"
|
|
||||||
# these oses and browser can be detected:
|
# these oses and browser can be detected:
|
||||||
# lower element takes precedence
|
# lower element takes precedence
|
||||||
user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"]
|
user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"]
|
||||||
user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"]
|
user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"]
|
||||||
color_settings_browsers = {
|
color_settings_browsers = {
|
||||||
"red": ["Safari"],
|
palette["red"]: ["Safari"],
|
||||||
"orange": ["Firefox"],
|
palette["orange"]: ["Firefox"],
|
||||||
"yellow": ["Chrome"],
|
palette["yellow"]: ["Chrome"],
|
||||||
"grey": ["Edge"],
|
"grey": ["Edge"],
|
||||||
"green": ["Chromium"],
|
palette["green"]: ["Chromium"],
|
||||||
"teal": ["Brave"]
|
palette["purple"]: ["Brave"]
|
||||||
}
|
}
|
||||||
color_settings_operating_systems = {
|
color_settings_operating_systems = {
|
||||||
"red": ["Macintosh"],
|
palette["red"]: ["Macintosh"],
|
||||||
"green": ["Android"],
|
palette["green"]: ["Android"],
|
||||||
"grey": ["iPhone", "iPad"],
|
"grey": ["iPhone", "iPad"],
|
||||||
"yellow": ["Linux"],
|
palette["yellow"]: ["Linux"],
|
||||||
"teal": ["BSD"],
|
palette["purple"]: ["BSD"],
|
||||||
"#6464ff": ["Windows"],
|
palette["blue"]: ["Windows"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def len_list_list(l: list[list]):
|
||||||
|
size = 0
|
||||||
|
for i in range(len(l)):
|
||||||
|
size += len(l[i])
|
||||||
|
return size
|
||||||
|
|
||||||
|
|
||||||
# get all dates
|
|
||||||
def get_dates(cur: sql.Cursor) -> list[str]:
|
|
||||||
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request}")
|
|
||||||
return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, )
|
|
||||||
|
|
||||||
def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
|
|
||||||
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
|
|
||||||
return [ user_id[0] for user_id in cur.fetchall() ]
|
|
||||||
|
|
||||||
# get number of requests per day
|
|
||||||
def get_request_count_for_date(cur: sql.Cursor, date:str) -> int:
|
|
||||||
return sql_get_count_where(cur, t_request, [("DATE(date, 'unixepoch')", date)])
|
|
||||||
|
|
||||||
def get_unique_user_count(cur: sql.Cursor) -> int:
|
|
||||||
return sql_tablesize(cur, t_user)
|
|
||||||
|
|
||||||
def get_user_agent(cur: sql.Cursor, user_id: int):
|
|
||||||
return sql_select(cur, t_user, [("user_id", user_id)])[0]
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# FILTERS
|
# FILTERS
|
||||||
#
|
#
|
||||||
@ -86,7 +76,7 @@ def get_os_browser_pairs_from_agent(user_agent):
|
|||||||
if br in user_agent:
|
if br in user_agent:
|
||||||
browser = br
|
browser = br
|
||||||
break
|
break
|
||||||
if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'")
|
# if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'")
|
||||||
return operating_system, browser, mobile
|
return operating_system, browser, mobile
|
||||||
|
|
||||||
def get_os_browser_mobile_rankings(user_agent_ranking):
|
def get_os_browser_mobile_rankings(user_agent_ranking):
|
||||||
@ -115,16 +105,60 @@ def get_os_browser_mobile_rankings(user_agent_ranking):
|
|||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
mobile_user_percentage = 0.0
|
mobile_user_percentage = 0.0
|
||||||
|
|
||||||
os_ranking = [(c/os_count, n) for n, c in os_ranking.items()]
|
os_ranking = [(c * 100/os_count, n) for n, c in os_ranking.items()]
|
||||||
os_ranking.sort()
|
os_ranking.sort()
|
||||||
browser_ranking = [(c/browser_count, n) for n, c in browser_ranking.items()]
|
browser_ranking = [(c * 100/browser_count, n) for n, c in browser_ranking.items()]
|
||||||
browser_ranking.sort()
|
browser_ranking.sort()
|
||||||
return os_ranking, browser_ranking, mobile_user_percentage
|
return os_ranking, browser_ranking, mobile_user_percentage*100
|
||||||
|
|
||||||
|
#
|
||||||
|
# GETTERS
|
||||||
|
#
|
||||||
|
# get all dates
|
||||||
|
def get_dates(cur: sql.Cursor) -> list[str]:
|
||||||
|
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request}")
|
||||||
|
return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, )
|
||||||
|
|
||||||
|
def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
|
||||||
|
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
|
||||||
|
return [ user_id[0] for user_id in cur.fetchall() ]
|
||||||
|
|
||||||
|
def get_user_agent(cur: sql.Cursor, user_id: int):
|
||||||
|
return sql_select(cur, t_user, [("user_id", user_id)])[0][2]
|
||||||
|
|
||||||
|
def get_unique_user_ids_for_date_human(cur: sql.Cursor, date: str):
|
||||||
|
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
|
||||||
|
human_user_ids = []
|
||||||
|
for user_id in cur.fetchall():
|
||||||
|
user_agent = get_user_agent(cur, user_id[0])
|
||||||
|
os, browser, mobile = get_os_browser_pairs_from_agent(user_agent)
|
||||||
|
# print("get_unique_user_ids_for_date", user_id[0], os, browser, user_agent)
|
||||||
|
if os and browser:
|
||||||
|
human_user_ids.append(user_id[0])
|
||||||
|
return human_user_ids
|
||||||
|
|
||||||
|
def get_unique_request_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
|
||||||
|
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
|
||||||
|
return [ request_id[0] for request_id in cur.fetchall() ]
|
||||||
|
|
||||||
|
def get_unique_request_ids_for_date_and_user(cur: sql.Cursor, date:str, user_id: int) -> list[int]:
|
||||||
|
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}' AND user_id = {user_id}")
|
||||||
|
return [ request_id[0] for request_id in cur.fetchall() ]
|
||||||
|
|
||||||
|
# get number of requests per day
|
||||||
|
def get_request_count_for_date(cur: sql.Cursor, date:str) -> int:
|
||||||
|
return sql_get_count_where(cur, t_request, [("DATE(date, 'unixepoch')", date)])
|
||||||
|
|
||||||
|
def get_unique_user_count(cur: sql.Cursor) -> int:
|
||||||
|
return sql_tablesize(cur, t_user)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# RANKINGS
|
# RANKINGS
|
||||||
#
|
#
|
||||||
def get_file_ranking(cur: sql.Cursor, min_date_unix_time = 0) -> list[tuple[int, str]]:
|
def get_file_ranking(cur: sql.Cursor, min_date_unix_time = 0) -> list[tuple[int, str]]:
|
||||||
|
global settings
|
||||||
"""
|
"""
|
||||||
:returns [(request_count, filename)]
|
:returns [(request_count, filename)]
|
||||||
"""
|
"""
|
||||||
@ -197,7 +231,7 @@ def add_vertikal_labels_in_bar_plot(labels, max_y_val, ax, bar_plot):
|
|||||||
height = rect.get_height()
|
height = rect.get_height()
|
||||||
if height > 0.8 * max_y_val:
|
if height > 0.8 * max_y_val:
|
||||||
height = 0.05 * max_y_val
|
height = 0.05 * max_y_val
|
||||||
ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
|
ax.text(rect.get_x() + rect.get_width()/2., height + 0.025 * max_y_val,
|
||||||
labels[idx],
|
labels[idx],
|
||||||
ha='center', va='bottom', rotation=90)
|
ha='center', va='bottom', rotation=90)
|
||||||
|
|
||||||
@ -206,7 +240,7 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
|
|||||||
make a bar plot of the most requested files
|
make a bar plot of the most requested files
|
||||||
"""
|
"""
|
||||||
if not fig:
|
if not fig:
|
||||||
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, tight_layout=settings["plot_tight_layout"], constrained_layout=settings["plot_constrained_layout"])
|
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||||
# create new axis if none is given
|
# create new axis if none is given
|
||||||
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
||||||
# fill x y data
|
# fill x y data
|
||||||
@ -227,6 +261,7 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
|
|||||||
if ft in val: color = key
|
if ft in val: color = key
|
||||||
if not color: color = "blue"
|
if not color: color = "blue"
|
||||||
elif type(color_settings) == list:
|
elif type(color_settings) == list:
|
||||||
|
# print(color_settings, (i - start_index) % len(color_settings))
|
||||||
color = color_settings[(i - start_index) % len(color_settings)]
|
color = color_settings[(i - start_index) % len(color_settings)]
|
||||||
colors.append(color)
|
colors.append(color)
|
||||||
bar = ax.bar(x_names, y_counts, tick_label="", color=colors)
|
bar = ax.bar(x_names, y_counts, tick_label="", color=colors)
|
||||||
@ -235,8 +270,54 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
|
|||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_users_per_day(days, user_counts, fig=None):
|
def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue"):
|
||||||
pass
|
if not fig:
|
||||||
|
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||||
|
if not ax:
|
||||||
|
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
||||||
|
else:
|
||||||
|
ax = ax.twinx()
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
# ax.tick_params(axis="y", labelcolor="r")
|
||||||
|
ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
|
||||||
|
if label: ax.legend()
|
||||||
|
# if xlim:
|
||||||
|
# if xlim[0] != xlim[1]:
|
||||||
|
# ax.set_xlim(*xlim)
|
||||||
|
|
||||||
|
# if ylim:
|
||||||
|
# if ylim[0] != ylim[1]:
|
||||||
|
# ax.set_ylim(*ylim)
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major"):
|
||||||
|
if not fig:
|
||||||
|
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||||
|
if not (ax1 and ax2):
|
||||||
|
ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
|
||||||
|
ax2 = ax1.twinx()
|
||||||
|
ax2.set_ylabel(ylabel2)
|
||||||
|
# ax.tick_params(axis="y", labelcolor="r")
|
||||||
|
plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
|
||||||
|
plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
|
||||||
|
# if label1 or label2: ax1.legend()
|
||||||
|
if plots: plots += plot1 + plot2
|
||||||
|
else: plots = plot1 + plot2
|
||||||
|
plt.legend(plots, [ l.get_label() for l in plots])
|
||||||
|
|
||||||
|
if grid == "major" or grid == "minor" or grid == "both":
|
||||||
|
if grid == "minor" or "both":
|
||||||
|
ax1.minorticks_on()
|
||||||
|
ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
|
||||||
|
|
||||||
|
# if xlim:
|
||||||
|
# if xlim[0] != xlim[1]:
|
||||||
|
# ax.set_xlim(*xlim)
|
||||||
|
|
||||||
|
# if ylim:
|
||||||
|
# if ylim[0] != ylim[1]:
|
||||||
|
# ax.set_ylim(*ylim)
|
||||||
|
return fig, ax1, ax2, plots
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -249,53 +330,116 @@ def missing_arg_val(arg):
|
|||||||
def missing_arg(arg):
|
def missing_arg(arg):
|
||||||
print("Missing ", arg)
|
print("Missing ", arg)
|
||||||
exit(1)
|
exit(1)
|
||||||
if __name__ == '__main__':
|
|
||||||
server_name =""
|
|
||||||
db_path = ""
|
|
||||||
# parse args
|
|
||||||
i = 1
|
|
||||||
while i in range(1, len(argv)):
|
|
||||||
if argv[i] == "--server-name":
|
|
||||||
if len(argv) > i + 1: server_name = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
elif argv[i] == "--db":
|
|
||||||
if len(argv) > i + 1: db_path = argv[i+1]
|
|
||||||
else: missing_arg_val(argv[i])
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
if not server_name: missing_arg("--server-name")
|
def visualize(loaded_settings: dict):
|
||||||
if not db_path: missing_arg("--db")
|
global settings
|
||||||
|
settings = loaded_settings
|
||||||
|
if not settings["db"]: missing_arg("db")
|
||||||
|
if not settings["server-name"]: missing_arg("server-name")
|
||||||
|
|
||||||
|
img_dir = settings["img_dir"]
|
||||||
|
img_filetype = settings["img_filetype"]
|
||||||
|
names = {
|
||||||
|
"img_file_ranking": f"{img_dir}/ranking_all_time_files.{img_filetype}",
|
||||||
|
"img_referer_ranking": f"{img_dir}/ranking_all_time_referers.{img_filetype}",
|
||||||
|
"img_browser_ranking": f"{img_dir}/ranking_all_time_browsers.{img_filetype}",
|
||||||
|
"img_operating_system_ranking": f"{img_dir}/ranking_all_time_operating_systems.{img_filetype}",
|
||||||
|
"img_daily": f"{img_dir}/user_request_count_daily.{img_filetype}",
|
||||||
|
"mobile_user_percentage": 0.0,
|
||||||
|
"server-name": settings["server-name"],
|
||||||
|
"last_x_days": settings["last_x_days"],
|
||||||
|
# order matters!
|
||||||
|
"total_user_count_x_days": 0,
|
||||||
|
"total_request_count_x_days": 0,
|
||||||
|
"total_user_count": 0,
|
||||||
|
"total_request_count": 0,
|
||||||
|
"human_user_percentage_x_days": 0,
|
||||||
|
"human_request_percentage_x_days": 0,
|
||||||
|
"human_user_percentage": 0,
|
||||||
|
"human_request_percentage": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
conn = sql.connect(settings["db"])
|
||||||
conn = sql.connect(db_path)
|
if isdir(img_dir) and img_filetype:
|
||||||
|
gen_img = True
|
||||||
|
else:
|
||||||
|
print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
|
||||||
|
gen_img = False
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
file_ranking = get_file_ranking(cur)
|
|
||||||
referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur)
|
|
||||||
user_agent_ranking = get_user_agent_ranking(cur)
|
|
||||||
for count, agent in user_agent_ranking:
|
|
||||||
get_os_browser_pairs_from_agent(agent)
|
|
||||||
fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes)
|
|
||||||
fig_file_ranking.savefig(f"ranking_files.{img_ft}")
|
|
||||||
|
|
||||||
|
get_humans = settings["get-human-percentage"]
|
||||||
|
print("\t>>>>>>", get_humans)
|
||||||
|
|
||||||
|
# files
|
||||||
|
file_ranking = get_file_ranking(cur)
|
||||||
|
if gen_img:
|
||||||
|
fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes)
|
||||||
|
fig_file_ranking.savefig(names["img_file_ranking"])
|
||||||
|
|
||||||
|
# referer
|
||||||
|
referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur)
|
||||||
|
print("Referer ranking", referer_ranking)
|
||||||
|
if gen_img:
|
||||||
|
fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate)
|
||||||
|
fig_referer_ranking.savefig(names["img_referer_ranking"])
|
||||||
|
|
||||||
|
# dates
|
||||||
dates = get_dates(cur)
|
dates = get_dates(cur)
|
||||||
unique_users_for_dates = []
|
# user
|
||||||
print(dates, unique_users_for_dates)
|
user_agent_ranking = get_user_agent_ranking(cur)
|
||||||
|
unique_user_ids_for_dates = []
|
||||||
|
unique_request_ids_for_dates = []
|
||||||
|
unique_user_ids_for_dates_human = []
|
||||||
|
unique_request_ids_for_dates_human = []
|
||||||
for date in dates:
|
for date in dates:
|
||||||
unique_users_for_dates.append(get_unique_user_ids_for_date(cur, date))
|
unique_user_ids_for_dates.append(get_unique_user_ids_for_date(cur, date))
|
||||||
print(dates, unique_users_for_dates)
|
unique_request_ids_for_dates.append(get_unique_request_ids_for_date(cur, date))
|
||||||
os_ranking, browser_ranking, mobile_user_percentage = get_os_browser_mobile_rankings(user_agent_ranking)
|
if get_humans:
|
||||||
fig_os_rating = plot_ranking(os_ranking, xlabel="Operating Systems", ylabel="Percentage", color_settings=color_settings_operating_systems)
|
unique_user_ids_for_dates_human.append(get_unique_user_ids_for_date_human(cur, date))
|
||||||
fig_os_rating.savefig(f"ranking_operating_systems.{img_ft}")
|
unique_request_ids_for_dates_human.append([])
|
||||||
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Percentage", color_settings=color_settings_browsers)
|
for human in unique_user_ids_for_dates_human[-1]:
|
||||||
fig_browser_rating.savefig(f"ranking_browsers.{img_ft}")
|
unique_request_ids_for_dates_human[-1] += get_unique_request_ids_for_date_and_user(cur, date, human)
|
||||||
|
if get_humans:
|
||||||
|
try:
|
||||||
|
names["human_user_percentage_x_days"] = round(100 * len_list_list(unique_user_ids_for_dates_human) / len_list_list(unique_user_ids_for_dates), 2)
|
||||||
|
names["human_request_percentage_x_days"] = round(100 * len_list_list(unique_request_ids_for_dates_human) / len_list_list(unique_request_ids_for_dates), 2)
|
||||||
|
except: pass
|
||||||
|
print(">>>", len_list_list(unique_request_ids_for_dates), len_list_list(unique_request_ids_for_dates_human))
|
||||||
|
names["total_user_count"] = sql_tablesize(cur, t_user)
|
||||||
|
names["total_request_count"] = sql_tablesize(cur, t_request)
|
||||||
|
names["total_user_count_x_days"] = len_list_list(unique_user_ids_for_dates)
|
||||||
|
names["total_request_count_x_days"] = len_list_list(unique_request_ids_for_dates)
|
||||||
|
|
||||||
|
# os & browser
|
||||||
|
os_ranking, browser_ranking, names["mobile_user_percentage"] = get_os_browser_mobile_rankings(user_agent_ranking)
|
||||||
|
if gen_img:
|
||||||
|
fig_os_rating = plot_ranking(os_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_operating_systems)
|
||||||
|
fig_os_rating.savefig(names["img_operating_system_ranking"])
|
||||||
|
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Share [%]", color_settings=color_settings_browsers)
|
||||||
|
fig_browser_rating.savefig(names["img_browser_ranking"])
|
||||||
|
|
||||||
# print("File Ranking", file_ranking)
|
# print("File Ranking", file_ranking)
|
||||||
# print("referer Ranking", referer_ranking)
|
# print("referer Ranking", referer_ranking)
|
||||||
# print("user agent ranking", user_agent_ranking)
|
# print("user agent ranking", user_agent_ranking)
|
||||||
# print("Unique Users:", get_unique_user_count(cur))
|
# print("Unique Users:", get_unique_user_count(cur))
|
||||||
|
# fig_daily, ax_daily_users = plot(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], xlabel="Datum", ylabel="Einzigartige Nutzer", label="Einzigartige Nutzer", color="blue")
|
||||||
|
# fig_daily, ax_daily_requests = plot(dates, [len(request_ids) for request_ids in unique_request_ids_for_dates], fig=fig_daily, ax=ax_daily_users, xlabel="Datum", ylabel="Einzigartige Anfragen", label="Einzigartige Anfragen", color="orange")
|
||||||
|
# fig_daily.savefig(f"{img_dir}/daily.{img_filetype}")
|
||||||
|
if gen_img:
|
||||||
|
fig_daily, ax1, ax2, plots = plot2y(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], [len(request_ids) for request_ids in unique_request_ids_for_dates], xlabel="Date", ylabel1="User count", label1="Unique users", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"])
|
||||||
|
if get_humans:
|
||||||
|
fig_daily, ax1, ax2, plots = plot2y(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates_human], [len(request_ids) for request_ids in unique_request_ids_for_dates_human], label1="Unique users (human)", ylabel2="Einzigartige Anfragen", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots)
|
||||||
|
fig_daily.savefig(names["img_daily"])
|
||||||
print("OS ranking", os_ranking)
|
print("OS ranking", os_ranking)
|
||||||
print("Browser ranking", browser_ranking)
|
print("Browser ranking", browser_ranking)
|
||||||
print("Mobile percentage", mobile_user_percentage)
|
print("Mobile percentage", names["mobile_user_percentage"])
|
||||||
|
print(dates, "\n\tuu", unique_user_ids_for_dates, "\n\tur",unique_request_ids_for_dates, "\n\tuuh", unique_user_ids_for_dates_human, "\n\turh", unique_request_ids_for_dates_human)
|
||||||
|
if settings["template_html"] and settings["html_out_path"]:
|
||||||
|
with open(settings["template_html"], "r") as file:
|
||||||
|
html = file.read()
|
||||||
|
for name, value in names.items():
|
||||||
|
html = html.replace(f"%{name}", str(value))
|
||||||
|
with open(settings["html_out_path"], "w") as file:
|
||||||
|
file.write(html)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user