From 1ea034de17bfa8c96d122cacef7eba9685611abc Mon Sep 17 00:00:00 2001 From: "Matthias@Dell" Date: Wed, 23 Nov 2022 16:49:25 +0100 Subject: [PATCH] wip --- regina/main.py | 138 ++++++++-------- regina/settings_manager.py | 8 + regina/visualize.py | 320 +++++++++++++++++++++++++++---------- 3 files changed, 308 insertions(+), 158 deletions(-) diff --git a/regina/main.py b/regina/main.py index 1631741..714557c 100644 --- a/regina/main.py +++ b/regina/main.py @@ -2,30 +2,36 @@ from enum import auto from collect import parse_log, add_requests_to_db from sys import argv, exit from database import create_db -from os.path import isfile +from os.path import isfile, isdir +from visualize import visualize +from settings_manager import read_settings_file -def parse_config_file(path): - server_name ="" - access_log_path = "" - db_path = "" - filegroups = "" - locs_and_dirs = [] - auto_group_filetypes = [] - with open(path, "r") as file: - lines = file.readlines() - for line in lines: - line = line.strip("\n ") - if line.startswith("#"): continue - arg, val = line.split("=") - arg = arg.strip(" ") - val = val.strip(" ") - if arg == "server-name": server_name = val - elif arg == "log": access_log_path = val - elif arg == "db": db_path = val - elif arg == "filegroups": filegroups = val - elif arg == "locs-and-dirs": locs_and_dirs = val - elif arg == "auto-group-filetypes": auto_group_filetypes = val - return server_name, access_log_path, db_path, filegroups, locs_and_dirs, auto_group_filetypes +# default settings, these are overwriteable through a config file +settings = { + # GENERAL + "server-name": "", + # DATA COLLECTION + "access-log": "", + "db": "", + "locs-and-dirs": [], + "auto-group-filetypes": [], + "filegroups": "", + + # VISUALIZATION + "get-human-percentage": False, + # "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))", + "file_ranking_regex_whitelist": r".*\.(html)", + "referer_ranking_regex_whitelist": r"^[^\-].*", # minus means empty + "user_agent_ranking_regex_whitelist": r"", + "file_ranking_plot_max_files": 15, + # "plot_figsize": (60, 40), + "plot_dpi": 300, + "img_dir": "", + "img_filetype": "svg", + "template_html": "", + "html_out_path": "", + "last_x_days": 30, +} def help(): @@ -33,12 +39,13 @@ def help(): --server-name string --log path to the access.log --db name of the database - --filegroups string describing filegroups, eg 'name1: file1, file2; name2: file3, file4, file5;' + --settings["filegroups"] string describing settings["filegroups"], eg 'name1: file1, file2; name2: file3, file4, file5;' --auto-group-filetypes comma separated list of filetypes, eg 'css,png,gif' --locs-and_dirs comma separated list of nginx_location:directory pairs, eg '/:/www/website' --config-file path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line """ print(helpstring) + def missing_arg_val(arg): print("Missing argument for", arg) exit(1) @@ -46,62 +53,53 @@ def missing_arg_val(arg): def missing_arg(arg): print("Missing ", arg) exit(1) + +def error(arg): + print("Error:", arg) + exit(1) + if __name__ == '__main__': - server_name ="" - access_log_path = "" - db_path = "" config_file = "" - filegroups = "" - auto_group_filetypes =[] - locs_and_dirs = [] + collect = False + visualize_ = False # parse args i = 1 while i in range(1, len(argv)): - if argv[i] == "--server-name": - if len(argv) > i + 1: server_name = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--db": - if len(argv) > i + 1: db_path = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--log": - if len(argv) > i + 1: access_log_path = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--config": + if argv[i] == "--config": if len(argv) > i + 1: config_file = argv[i+1] else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--filegroups": - if len(argv) > i + 1: filegroups = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--auto-group-filetypes": - if len(argv) > i + 1: auto_group_filetypes = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--locs-and-dirs": - if len(argv) > i + 1: locs_and_dirs = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 elif argv[i] == "--help": help() exit(0) + elif argv[i] == "--collect": + collect = True + exit(0) + elif argv[i] == "--visualize": + visualize_ = True else: - i += 1 + pass + i += 1 + if not collect and not visualize_: + missing_arg("--visualize or --collect") - if config_file: - server_name, access_log_path, db_path, filegroups, locs_and_dirs, auto_group_filetypes = parse_config_file(config_file) + if not config_file: + missing_arg("--config_file") + if not isfile(config_file): + error(f"Not a file: '{config_file}'") + read_settings_file(config_file, settings) - if not server_name: missing_arg("--server-name") - if not access_log_path: missing_arg("--log") - if not db_path: missing_arg("--db") - if type(auto_group_filetypes) == str: - auto_group_filetypes = auto_group_filetypes.split(",") - if type(locs_and_dirs) == str: - locs_and_dirs = [ loc_and_dir.split(":") for loc_and_dir in locs_and_dirs.split(",") ] - if not isfile(db_path): - create_db(db_path, filegroups, locs_and_dirs, auto_group_filetypes) - requests = parse_log(access_log_path) - add_requests_to_db(requests, db_path) + if not settings["server-name"]: missing_arg("server-name") + if not settings["access-log"]: missing_arg("log") + if not settings["db"]: missing_arg("db") + if type(settings["auto-group-filetypes"]) == str: + settings["auto-group-filetypes"] = settings["auto-group-filetypes"].split(",") + if type(settings["locs-and-dirs"]) == str: + settings["locs-and-dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs-and-dirs"].split(",") ] + if collect: + if not isfile(settings["db"]): + create_db(settings["db"], settings["filegroups"], settings["locs-and-dirs"], settings["auto-group-filetypes"]) + requests = parse_log(settings["access-log"]) + add_requests_to_db(requests, settings["db"]) + if visualize: + if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'") + visualize(settings) diff --git a/regina/settings_manager.py b/regina/settings_manager.py index 2e9b656..993b76a 100644 --- a/regina/settings_manager.py +++ b/regina/settings_manager.py @@ -1,4 +1,9 @@ +def get_bool(bool_str: str, fallback=False): + if bool_str in ["true", "True"]: return True + elif bool_str in ["false", "False"]: return False + return fallback + def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True): lines = [] with open(filepath, "r") as file: @@ -16,6 +21,9 @@ def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, if ignore_invalid_lines: continue else: raise KeyError(f"Invalid key: '{vals[0]}'") if convert_to_type and type(settings[vals[0]]) not in [str, None]: + if type(settings[vals[0]]) == bool: + settings[vals[0]] = get_bool(vals[1].strip(" "), fallback=settings[vals[0]]) + continue try: settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" ")) except Exception as e: diff --git a/regina/visualize.py b/regina/visualize.py index a34cd14..21097e7 100644 --- a/regina/visualize.py +++ b/regina/visualize.py @@ -5,69 +5,59 @@ from sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize from re import fullmatch, findall import matplotlib.pyplot as plt import matplotlib as mpl +from os.path import isdir +""" +TODO: +- bei referrers ähnliche zusammenlegen, z.b. www.google.de und https://google.com +""" -settings = { - # "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))", - "file_ranking_regex_whitelist": r".*\.(html)", - "referer_ranking_regex_whitelist": r"", - "user_agent_ranking_regex_whitelist": r"", - "file_ranking_plot_max_files": 15, - # "plot_figsize": (60, 40), - "plot_dpi": 300, - "plot_tight_layout": False, - "plot_constrained_layout": False, +settings = {} + +palette = { + "red": "#ee4035", + "orange": "#f37736", + "yellow": "#fdf458", + "green": "#7bc043", + "blue": "#0392cf", + "purple": "#b044a0", } color_settings_filetypes = { - "red": ["html"], - "green": ["jpg", "png", "jpeg", "gif", "svg", "webp"], - "yellow": ["css"], + palette["red"]: ["html"], + palette["green"]: ["jpg", "png", "jpeg", "gif", "svg", "webp"], + palette["yellow"]: ["css"], "grey": ["txt"] } +color_settings_alternate = list(palette.values()) -img_ft = "svg" # these oses and browser can be detected: # lower element takes precedence user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"] user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"] color_settings_browsers = { - "red": ["Safari"], - "orange": ["Firefox"], - "yellow": ["Chrome"], + palette["red"]: ["Safari"], + palette["orange"]: ["Firefox"], + palette["yellow"]: ["Chrome"], "grey": ["Edge"], - "green": ["Chromium"], - "teal": ["Brave"] + palette["green"]: ["Chromium"], + palette["purple"]: ["Brave"] } color_settings_operating_systems = { - "red": ["Macintosh"], - "green": ["Android"], + palette["red"]: ["Macintosh"], + palette["green"]: ["Android"], "grey": ["iPhone", "iPad"], - "yellow": ["Linux"], - "teal": ["BSD"], - "#6464ff": ["Windows"], + palette["yellow"]: ["Linux"], + palette["purple"]: ["BSD"], + palette["blue"]: ["Windows"], } +def len_list_list(l: list[list]): + size = 0 + for i in range(len(l)): + size += len(l[i]) + return size -# get all dates -def get_dates(cur: sql.Cursor) -> list[str]: - cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request}") - return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, ) - -def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]: - cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'") - return [ user_id[0] for user_id in cur.fetchall() ] - -# get number of requests per day -def get_request_count_for_date(cur: sql.Cursor, date:str) -> int: - return sql_get_count_where(cur, t_request, [("DATE(date, 'unixepoch')", date)]) - -def get_unique_user_count(cur: sql.Cursor) -> int: - return sql_tablesize(cur, t_user) - -def get_user_agent(cur: sql.Cursor, user_id: int): - return sql_select(cur, t_user, [("user_id", user_id)])[0] - # # FILTERS # @@ -86,7 +76,7 @@ def get_os_browser_pairs_from_agent(user_agent): if br in user_agent: browser = br break - if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'") + # if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'") return operating_system, browser, mobile def get_os_browser_mobile_rankings(user_agent_ranking): @@ -115,16 +105,60 @@ def get_os_browser_mobile_rankings(user_agent_ranking): except ZeroDivisionError: mobile_user_percentage = 0.0 - os_ranking = [(c/os_count, n) for n, c in os_ranking.items()] + os_ranking = [(c * 100/os_count, n) for n, c in os_ranking.items()] os_ranking.sort() - browser_ranking = [(c/browser_count, n) for n, c in browser_ranking.items()] + browser_ranking = [(c * 100/browser_count, n) for n, c in browser_ranking.items()] browser_ranking.sort() - return os_ranking, browser_ranking, mobile_user_percentage + return os_ranking, browser_ranking, mobile_user_percentage*100 + +# +# GETTERS +# +# get all dates +def get_dates(cur: sql.Cursor) -> list[str]: + cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request}") + return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, ) + +def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]: + cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'") + return [ user_id[0] for user_id in cur.fetchall() ] + +def get_user_agent(cur: sql.Cursor, user_id: int): + return sql_select(cur, t_user, [("user_id", user_id)])[0][2] + +def get_unique_user_ids_for_date_human(cur: sql.Cursor, date: str): + cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'") + human_user_ids = [] + for user_id in cur.fetchall(): + user_agent = get_user_agent(cur, user_id[0]) + os, browser, mobile = get_os_browser_pairs_from_agent(user_agent) + # print("get_unique_user_ids_for_date", user_id[0], os, browser, user_agent) + if os and browser: + human_user_ids.append(user_id[0]) + return human_user_ids + +def get_unique_request_ids_for_date(cur: sql.Cursor, date:str) -> list[int]: + cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'") + return [ request_id[0] for request_id in cur.fetchall() ] + +def get_unique_request_ids_for_date_and_user(cur: sql.Cursor, date:str, user_id: int) -> list[int]: + cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}' AND user_id = {user_id}") + return [ request_id[0] for request_id in cur.fetchall() ] + +# get number of requests per day +def get_request_count_for_date(cur: sql.Cursor, date:str) -> int: + return sql_get_count_where(cur, t_request, [("DATE(date, 'unixepoch')", date)]) + +def get_unique_user_count(cur: sql.Cursor) -> int: + return sql_tablesize(cur, t_user) + + # # RANKINGS # def get_file_ranking(cur: sql.Cursor, min_date_unix_time = 0) -> list[tuple[int, str]]: + global settings """ :returns [(request_count, filename)] """ @@ -197,7 +231,7 @@ def add_vertikal_labels_in_bar_plot(labels, max_y_val, ax, bar_plot): height = rect.get_height() if height > 0.8 * max_y_val: height = 0.05 * max_y_val - ax.text(rect.get_x() + rect.get_width()/2., 1.05*height, + ax.text(rect.get_x() + rect.get_width()/2., height + 0.025 * max_y_val, labels[idx], ha='center', va='bottom', rotation=90) @@ -206,7 +240,7 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", make a bar plot of the most requested files """ if not fig: - fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, tight_layout=settings["plot_tight_layout"], constrained_layout=settings["plot_constrained_layout"]) + fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None) # create new axis if none is given ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel) # fill x y data @@ -227,6 +261,7 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", if ft in val: color = key if not color: color = "blue" elif type(color_settings) == list: + # print(color_settings, (i - start_index) % len(color_settings)) color = color_settings[(i - start_index) % len(color_settings)] colors.append(color) bar = ax.bar(x_names, y_counts, tick_label="", color=colors) @@ -235,8 +270,54 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", return fig -def plot_users_per_day(days, user_counts, fig=None): - pass +def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue"): + if not fig: + fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None) + if not ax: + ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel) + else: + ax = ax.twinx() + ax.set_ylabel(ylabel) + # ax.tick_params(axis="y", labelcolor="r") + ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color) + if label: ax.legend() + # if xlim: + # if xlim[0] != xlim[1]: + # ax.set_xlim(*xlim) + + # if ylim: + # if ylim[0] != ylim[1]: + # ax.set_ylim(*ylim) + return fig, ax + +def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major"): + if not fig: + fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None) + if not (ax1 and ax2): + ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1) + ax2 = ax1.twinx() + ax2.set_ylabel(ylabel2) + # ax.tick_params(axis="y", labelcolor="r") + plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1) + plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2) + # if label1 or label2: ax1.legend() + if plots: plots += plot1 + plot2 + else: plots = plot1 + plot2 + plt.legend(plots, [ l.get_label() for l in plots]) + + if grid == "major" or grid == "minor" or grid == "both": + if grid == "minor" or "both": + ax1.minorticks_on() + ax1.grid(visible=True, which=grid, linestyle="-", color="#888") + + # if xlim: + # if xlim[0] != xlim[1]: + # ax.set_xlim(*xlim) + + # if ylim: + # if ylim[0] != ylim[1]: + # ax.set_ylim(*ylim) + return fig, ax1, ax2, plots # @@ -249,53 +330,116 @@ def missing_arg_val(arg): def missing_arg(arg): print("Missing ", arg) exit(1) -if __name__ == '__main__': - server_name ="" - db_path = "" - # parse args - i = 1 - while i in range(1, len(argv)): - if argv[i] == "--server-name": - if len(argv) > i + 1: server_name = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - elif argv[i] == "--db": - if len(argv) > i + 1: db_path = argv[i+1] - else: missing_arg_val(argv[i]) - i += 1 - else: - i += 1 - if not server_name: missing_arg("--server-name") - if not db_path: missing_arg("--db") +def visualize(loaded_settings: dict): + global settings + settings = loaded_settings + if not settings["db"]: missing_arg("db") + if not settings["server-name"]: missing_arg("server-name") + img_dir = settings["img_dir"] + img_filetype = settings["img_filetype"] + names = { + "img_file_ranking": f"{img_dir}/ranking_all_time_files.{img_filetype}", + "img_referer_ranking": f"{img_dir}/ranking_all_time_referers.{img_filetype}", + "img_browser_ranking": f"{img_dir}/ranking_all_time_browsers.{img_filetype}", + "img_operating_system_ranking": f"{img_dir}/ranking_all_time_operating_systems.{img_filetype}", + "img_daily": f"{img_dir}/user_request_count_daily.{img_filetype}", + "mobile_user_percentage": 0.0, + "server-name": settings["server-name"], + "last_x_days": settings["last_x_days"], + # order matters! + "total_user_count_x_days": 0, + "total_request_count_x_days": 0, + "total_user_count": 0, + "total_request_count": 0, + "human_user_percentage_x_days": 0, + "human_request_percentage_x_days": 0, + "human_user_percentage": 0, + "human_request_percentage": 0, + } - - conn = sql.connect(db_path) + conn = sql.connect(settings["db"]) + if isdir(img_dir) and img_filetype: + gen_img = True + else: + print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'") + gen_img = False cur = conn.cursor() - file_ranking = get_file_ranking(cur) - referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur) - user_agent_ranking = get_user_agent_ranking(cur) - for count, agent in user_agent_ranking: - get_os_browser_pairs_from_agent(agent) - fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes) - fig_file_ranking.savefig(f"ranking_files.{img_ft}") + get_humans = settings["get-human-percentage"] + print("\t>>>>>>", get_humans) + + # files + file_ranking = get_file_ranking(cur) + if gen_img: + fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes) + fig_file_ranking.savefig(names["img_file_ranking"]) + + # referer + referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur) + print("Referer ranking", referer_ranking) + if gen_img: + fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate) + fig_referer_ranking.savefig(names["img_referer_ranking"]) + + # dates dates = get_dates(cur) - unique_users_for_dates = [] - print(dates, unique_users_for_dates) + # user + user_agent_ranking = get_user_agent_ranking(cur) + unique_user_ids_for_dates = [] + unique_request_ids_for_dates = [] + unique_user_ids_for_dates_human = [] + unique_request_ids_for_dates_human = [] for date in dates: - unique_users_for_dates.append(get_unique_user_ids_for_date(cur, date)) - print(dates, unique_users_for_dates) - os_ranking, browser_ranking, mobile_user_percentage = get_os_browser_mobile_rankings(user_agent_ranking) - fig_os_rating = plot_ranking(os_ranking, xlabel="Operating Systems", ylabel="Percentage", color_settings=color_settings_operating_systems) - fig_os_rating.savefig(f"ranking_operating_systems.{img_ft}") - fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Percentage", color_settings=color_settings_browsers) - fig_browser_rating.savefig(f"ranking_browsers.{img_ft}") + unique_user_ids_for_dates.append(get_unique_user_ids_for_date(cur, date)) + unique_request_ids_for_dates.append(get_unique_request_ids_for_date(cur, date)) + if get_humans: + unique_user_ids_for_dates_human.append(get_unique_user_ids_for_date_human(cur, date)) + unique_request_ids_for_dates_human.append([]) + for human in unique_user_ids_for_dates_human[-1]: + unique_request_ids_for_dates_human[-1] += get_unique_request_ids_for_date_and_user(cur, date, human) + if get_humans: + try: + names["human_user_percentage_x_days"] = round(100 * len_list_list(unique_user_ids_for_dates_human) / len_list_list(unique_user_ids_for_dates), 2) + names["human_request_percentage_x_days"] = round(100 * len_list_list(unique_request_ids_for_dates_human) / len_list_list(unique_request_ids_for_dates), 2) + except: pass + print(">>>", len_list_list(unique_request_ids_for_dates), len_list_list(unique_request_ids_for_dates_human)) + names["total_user_count"] = sql_tablesize(cur, t_user) + names["total_request_count"] = sql_tablesize(cur, t_request) + names["total_user_count_x_days"] = len_list_list(unique_user_ids_for_dates) + names["total_request_count_x_days"] = len_list_list(unique_request_ids_for_dates) + + # os & browser + os_ranking, browser_ranking, names["mobile_user_percentage"] = get_os_browser_mobile_rankings(user_agent_ranking) + if gen_img: + fig_os_rating = plot_ranking(os_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_operating_systems) + fig_os_rating.savefig(names["img_operating_system_ranking"]) + fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Share [%]", color_settings=color_settings_browsers) + fig_browser_rating.savefig(names["img_browser_ranking"]) + # print("File Ranking", file_ranking) # print("referer Ranking", referer_ranking) # print("user agent ranking", user_agent_ranking) # print("Unique Users:", get_unique_user_count(cur)) + # fig_daily, ax_daily_users = plot(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], xlabel="Datum", ylabel="Einzigartige Nutzer", label="Einzigartige Nutzer", color="blue") + # fig_daily, ax_daily_requests = plot(dates, [len(request_ids) for request_ids in unique_request_ids_for_dates], fig=fig_daily, ax=ax_daily_users, xlabel="Datum", ylabel="Einzigartige Anfragen", label="Einzigartige Anfragen", color="orange") + # fig_daily.savefig(f"{img_dir}/daily.{img_filetype}") + if gen_img: + fig_daily, ax1, ax2, plots = plot2y(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], [len(request_ids) for request_ids in unique_request_ids_for_dates], xlabel="Date", ylabel1="User count", label1="Unique users", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"]) + if get_humans: + fig_daily, ax1, ax2, plots = plot2y(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates_human], [len(request_ids) for request_ids in unique_request_ids_for_dates_human], label1="Unique users (human)", ylabel2="Einzigartige Anfragen", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots) + fig_daily.savefig(names["img_daily"]) print("OS ranking", os_ranking) print("Browser ranking", browser_ranking) - print("Mobile percentage", mobile_user_percentage) + print("Mobile percentage", names["mobile_user_percentage"]) + print(dates, "\n\tuu", unique_user_ids_for_dates, "\n\tur",unique_request_ids_for_dates, "\n\tuuh", unique_user_ids_for_dates_human, "\n\turh", unique_request_ids_for_dates_human) + if settings["template_html"] and settings["html_out_path"]: + with open(settings["template_html"], "r") as file: + html = file.read() + for name, value in names.items(): + html = html.replace(f"%{name}", str(value)) + with open(settings["html_out_path"], "w") as file: + file.write(html) + +