This commit is contained in:
Matthias@Dell 2022-11-23 16:49:25 +01:00
parent ea5189584e
commit 1ea034de17
3 changed files with 308 additions and 158 deletions

View File

@ -2,30 +2,36 @@ from enum import auto
from collect import parse_log, add_requests_to_db from collect import parse_log, add_requests_to_db
from sys import argv, exit from sys import argv, exit
from database import create_db from database import create_db
from os.path import isfile from os.path import isfile, isdir
from visualize import visualize
from settings_manager import read_settings_file
def parse_config_file(path): # default settings, these are overwriteable through a config file
server_name ="" settings = {
access_log_path = "" # GENERAL
db_path = "" "server-name": "",
filegroups = "" # DATA COLLECTION
locs_and_dirs = [] "access-log": "",
auto_group_filetypes = [] "db": "",
with open(path, "r") as file: "locs-and-dirs": [],
lines = file.readlines() "auto-group-filetypes": [],
for line in lines: "filegroups": "",
line = line.strip("\n ")
if line.startswith("#"): continue # VISUALIZATION
arg, val = line.split("=") "get-human-percentage": False,
arg = arg.strip(" ") # "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
val = val.strip(" ") "file_ranking_regex_whitelist": r".*\.(html)",
if arg == "server-name": server_name = val "referer_ranking_regex_whitelist": r"^[^\-].*", # minus means empty
elif arg == "log": access_log_path = val "user_agent_ranking_regex_whitelist": r"",
elif arg == "db": db_path = val "file_ranking_plot_max_files": 15,
elif arg == "filegroups": filegroups = val # "plot_figsize": (60, 40),
elif arg == "locs-and-dirs": locs_and_dirs = val "plot_dpi": 300,
elif arg == "auto-group-filetypes": auto_group_filetypes = val "img_dir": "",
return server_name, access_log_path, db_path, filegroups, locs_and_dirs, auto_group_filetypes "img_filetype": "svg",
"template_html": "",
"html_out_path": "",
"last_x_days": 30,
}
def help(): def help():
@ -33,12 +39,13 @@ def help():
--server-name string --server-name string
--log path to the access.log --log path to the access.log
--db name of the database --db name of the database
--filegroups string describing filegroups, eg 'name1: file1, file2; name2: file3, file4, file5;' --settings["filegroups"] string describing settings["filegroups"], eg 'name1: file1, file2; name2: file3, file4, file5;'
--auto-group-filetypes comma separated list of filetypes, eg 'css,png,gif' --auto-group-filetypes comma separated list of filetypes, eg 'css,png,gif'
--locs-and_dirs comma separated list of nginx_location:directory pairs, eg '/:/www/website' --locs-and_dirs comma separated list of nginx_location:directory pairs, eg '/:/www/website'
--config-file path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line --config-file path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line
""" """
print(helpstring) print(helpstring)
def missing_arg_val(arg): def missing_arg_val(arg):
print("Missing argument for", arg) print("Missing argument for", arg)
exit(1) exit(1)
@ -46,62 +53,53 @@ def missing_arg_val(arg):
def missing_arg(arg): def missing_arg(arg):
print("Missing ", arg) print("Missing ", arg)
exit(1) exit(1)
def error(arg):
print("Error:", arg)
exit(1)
if __name__ == '__main__': if __name__ == '__main__':
server_name =""
access_log_path = ""
db_path = ""
config_file = "" config_file = ""
filegroups = "" collect = False
auto_group_filetypes =[] visualize_ = False
locs_and_dirs = []
# parse args # parse args
i = 1 i = 1
while i in range(1, len(argv)): while i in range(1, len(argv)):
if argv[i] == "--server-name": if argv[i] == "--config":
if len(argv) > i + 1: server_name = argv[i+1]
else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--db":
if len(argv) > i + 1: db_path = argv[i+1]
else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--log":
if len(argv) > i + 1: access_log_path = argv[i+1]
else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--config":
if len(argv) > i + 1: config_file = argv[i+1] if len(argv) > i + 1: config_file = argv[i+1]
else: missing_arg_val(argv[i]) else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--filegroups":
if len(argv) > i + 1: filegroups = argv[i+1]
else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--auto-group-filetypes":
if len(argv) > i + 1: auto_group_filetypes = argv[i+1]
else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--locs-and-dirs":
if len(argv) > i + 1: locs_and_dirs = argv[i+1]
else: missing_arg_val(argv[i])
i += 1
elif argv[i] == "--help": elif argv[i] == "--help":
help() help()
exit(0) exit(0)
elif argv[i] == "--collect":
collect = True
exit(0)
elif argv[i] == "--visualize":
visualize_ = True
else: else:
pass
i += 1 i += 1
if not collect and not visualize_:
missing_arg("--visualize or --collect")
if config_file: if not config_file:
server_name, access_log_path, db_path, filegroups, locs_and_dirs, auto_group_filetypes = parse_config_file(config_file) missing_arg("--config_file")
if not isfile(config_file):
error(f"Not a file: '{config_file}'")
read_settings_file(config_file, settings)
if not server_name: missing_arg("--server-name") if not settings["server-name"]: missing_arg("server-name")
if not access_log_path: missing_arg("--log") if not settings["access-log"]: missing_arg("log")
if not db_path: missing_arg("--db") if not settings["db"]: missing_arg("db")
if type(auto_group_filetypes) == str: if type(settings["auto-group-filetypes"]) == str:
auto_group_filetypes = auto_group_filetypes.split(",") settings["auto-group-filetypes"] = settings["auto-group-filetypes"].split(",")
if type(locs_and_dirs) == str: if type(settings["locs-and-dirs"]) == str:
locs_and_dirs = [ loc_and_dir.split(":") for loc_and_dir in locs_and_dirs.split(",") ] settings["locs-and-dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs-and-dirs"].split(",") ]
if not isfile(db_path): if collect:
create_db(db_path, filegroups, locs_and_dirs, auto_group_filetypes) if not isfile(settings["db"]):
requests = parse_log(access_log_path) create_db(settings["db"], settings["filegroups"], settings["locs-and-dirs"], settings["auto-group-filetypes"])
add_requests_to_db(requests, db_path) requests = parse_log(settings["access-log"])
add_requests_to_db(requests, settings["db"])
if visualize:
if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'")
visualize(settings)

View File

@ -1,4 +1,9 @@
def get_bool(bool_str: str, fallback=False):
if bool_str in ["true", "True"]: return True
elif bool_str in ["false", "False"]: return False
return fallback
def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True): def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True):
lines = [] lines = []
with open(filepath, "r") as file: with open(filepath, "r") as file:
@ -16,6 +21,9 @@ def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True,
if ignore_invalid_lines: continue if ignore_invalid_lines: continue
else: raise KeyError(f"Invalid key: '{vals[0]}'") else: raise KeyError(f"Invalid key: '{vals[0]}'")
if convert_to_type and type(settings[vals[0]]) not in [str, None]: if convert_to_type and type(settings[vals[0]]) not in [str, None]:
if type(settings[vals[0]]) == bool:
settings[vals[0]] = get_bool(vals[1].strip(" "), fallback=settings[vals[0]])
continue
try: try:
settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" ")) settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" "))
except Exception as e: except Exception as e:

View File

@ -5,69 +5,59 @@ from sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize
from re import fullmatch, findall from re import fullmatch, findall
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib as mpl import matplotlib as mpl
from os.path import isdir
"""
TODO:
- bei referrers ähnliche zusammenlegen, z.b. www.google.de und https://google.com
"""
settings = { settings = {}
# "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
"file_ranking_regex_whitelist": r".*\.(html)", palette = {
"referer_ranking_regex_whitelist": r"", "red": "#ee4035",
"user_agent_ranking_regex_whitelist": r"", "orange": "#f37736",
"file_ranking_plot_max_files": 15, "yellow": "#fdf458",
# "plot_figsize": (60, 40), "green": "#7bc043",
"plot_dpi": 300, "blue": "#0392cf",
"plot_tight_layout": False, "purple": "#b044a0",
"plot_constrained_layout": False,
} }
color_settings_filetypes = { color_settings_filetypes = {
"red": ["html"], palette["red"]: ["html"],
"green": ["jpg", "png", "jpeg", "gif", "svg", "webp"], palette["green"]: ["jpg", "png", "jpeg", "gif", "svg", "webp"],
"yellow": ["css"], palette["yellow"]: ["css"],
"grey": ["txt"] "grey": ["txt"]
} }
color_settings_alternate = list(palette.values())
img_ft = "svg"
# these oses and browser can be detected: # these oses and browser can be detected:
# lower element takes precedence # lower element takes precedence
user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"] user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"]
user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"] user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"]
color_settings_browsers = { color_settings_browsers = {
"red": ["Safari"], palette["red"]: ["Safari"],
"orange": ["Firefox"], palette["orange"]: ["Firefox"],
"yellow": ["Chrome"], palette["yellow"]: ["Chrome"],
"grey": ["Edge"], "grey": ["Edge"],
"green": ["Chromium"], palette["green"]: ["Chromium"],
"teal": ["Brave"] palette["purple"]: ["Brave"]
} }
color_settings_operating_systems = { color_settings_operating_systems = {
"red": ["Macintosh"], palette["red"]: ["Macintosh"],
"green": ["Android"], palette["green"]: ["Android"],
"grey": ["iPhone", "iPad"], "grey": ["iPhone", "iPad"],
"yellow": ["Linux"], palette["yellow"]: ["Linux"],
"teal": ["BSD"], palette["purple"]: ["BSD"],
"#6464ff": ["Windows"], palette["blue"]: ["Windows"],
} }
def len_list_list(l: list[list]):
size = 0
for i in range(len(l)):
size += len(l[i])
return size
# get all dates
def get_dates(cur: sql.Cursor) -> list[str]:
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request}")
return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, )
def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
return [ user_id[0] for user_id in cur.fetchall() ]
# get number of requests per day
def get_request_count_for_date(cur: sql.Cursor, date:str) -> int:
return sql_get_count_where(cur, t_request, [("DATE(date, 'unixepoch')", date)])
def get_unique_user_count(cur: sql.Cursor) -> int:
return sql_tablesize(cur, t_user)
def get_user_agent(cur: sql.Cursor, user_id: int):
return sql_select(cur, t_user, [("user_id", user_id)])[0]
# #
# FILTERS # FILTERS
# #
@ -86,7 +76,7 @@ def get_os_browser_pairs_from_agent(user_agent):
if br in user_agent: if br in user_agent:
browser = br browser = br
break break
if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'") # if not operating_system or not browser: print(f"Warning: get_os_browser_pairs_from_agent: Could not find all information for agent '{user_agent}', found os: '{operating_system}' and browser: '{browser}'")
return operating_system, browser, mobile return operating_system, browser, mobile
def get_os_browser_mobile_rankings(user_agent_ranking): def get_os_browser_mobile_rankings(user_agent_ranking):
@ -115,16 +105,60 @@ def get_os_browser_mobile_rankings(user_agent_ranking):
except ZeroDivisionError: except ZeroDivisionError:
mobile_user_percentage = 0.0 mobile_user_percentage = 0.0
os_ranking = [(c/os_count, n) for n, c in os_ranking.items()] os_ranking = [(c * 100/os_count, n) for n, c in os_ranking.items()]
os_ranking.sort() os_ranking.sort()
browser_ranking = [(c/browser_count, n) for n, c in browser_ranking.items()] browser_ranking = [(c * 100/browser_count, n) for n, c in browser_ranking.items()]
browser_ranking.sort() browser_ranking.sort()
return os_ranking, browser_ranking, mobile_user_percentage return os_ranking, browser_ranking, mobile_user_percentage*100
#
# GETTERS
#
# get all dates
def get_dates(cur: sql.Cursor) -> list[str]:
cur.execute(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request}")
return [ date[0] for date in cur.fetchall() ] # fetchall returns tuples (date, )
def get_unique_user_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
return [ user_id[0] for user_id in cur.fetchall() ]
def get_user_agent(cur: sql.Cursor, user_id: int):
return sql_select(cur, t_user, [("user_id", user_id)])[0][2]
def get_unique_user_ids_for_date_human(cur: sql.Cursor, date: str):
cur.execute(f"SELECT DISTINCT user_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
human_user_ids = []
for user_id in cur.fetchall():
user_agent = get_user_agent(cur, user_id[0])
os, browser, mobile = get_os_browser_pairs_from_agent(user_agent)
# print("get_unique_user_ids_for_date", user_id[0], os, browser, user_agent)
if os and browser:
human_user_ids.append(user_id[0])
return human_user_ids
def get_unique_request_ids_for_date(cur: sql.Cursor, date:str) -> list[int]:
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}'")
return [ request_id[0] for request_id in cur.fetchall() ]
def get_unique_request_ids_for_date_and_user(cur: sql.Cursor, date:str, user_id: int) -> list[int]:
cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE DATE(date, 'unixepoch') = '{sanitize(date)}' AND user_id = {user_id}")
return [ request_id[0] for request_id in cur.fetchall() ]
# get number of requests per day
def get_request_count_for_date(cur: sql.Cursor, date:str) -> int:
return sql_get_count_where(cur, t_request, [("DATE(date, 'unixepoch')", date)])
def get_unique_user_count(cur: sql.Cursor) -> int:
return sql_tablesize(cur, t_user)
# #
# RANKINGS # RANKINGS
# #
def get_file_ranking(cur: sql.Cursor, min_date_unix_time = 0) -> list[tuple[int, str]]: def get_file_ranking(cur: sql.Cursor, min_date_unix_time = 0) -> list[tuple[int, str]]:
global settings
""" """
:returns [(request_count, filename)] :returns [(request_count, filename)]
""" """
@ -197,7 +231,7 @@ def add_vertikal_labels_in_bar_plot(labels, max_y_val, ax, bar_plot):
height = rect.get_height() height = rect.get_height()
if height > 0.8 * max_y_val: if height > 0.8 * max_y_val:
height = 0.05 * max_y_val height = 0.05 * max_y_val
ax.text(rect.get_x() + rect.get_width()/2., 1.05*height, ax.text(rect.get_x() + rect.get_width()/2., height + 0.025 * max_y_val,
labels[idx], labels[idx],
ha='center', va='bottom', rotation=90) ha='center', va='bottom', rotation=90)
@ -206,7 +240,7 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
make a bar plot of the most requested files make a bar plot of the most requested files
""" """
if not fig: if not fig:
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, tight_layout=settings["plot_tight_layout"], constrained_layout=settings["plot_constrained_layout"]) fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
# create new axis if none is given # create new axis if none is given
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel) ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
# fill x y data # fill x y data
@ -227,6 +261,7 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
if ft in val: color = key if ft in val: color = key
if not color: color = "blue" if not color: color = "blue"
elif type(color_settings) == list: elif type(color_settings) == list:
# print(color_settings, (i - start_index) % len(color_settings))
color = color_settings[(i - start_index) % len(color_settings)] color = color_settings[(i - start_index) % len(color_settings)]
colors.append(color) colors.append(color)
bar = ax.bar(x_names, y_counts, tick_label="", color=colors) bar = ax.bar(x_names, y_counts, tick_label="", color=colors)
@ -235,8 +270,54 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
return fig return fig
def plot_users_per_day(days, user_counts, fig=None): def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue"):
pass if not fig:
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
if not ax:
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
else:
ax = ax.twinx()
ax.set_ylabel(ylabel)
# ax.tick_params(axis="y", labelcolor="r")
ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
if label: ax.legend()
# if xlim:
# if xlim[0] != xlim[1]:
# ax.set_xlim(*xlim)
# if ylim:
# if ylim[0] != ylim[1]:
# ax.set_ylim(*ylim)
return fig, ax
def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major"):
if not fig:
fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
if not (ax1 and ax2):
ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
ax2 = ax1.twinx()
ax2.set_ylabel(ylabel2)
# ax.tick_params(axis="y", labelcolor="r")
plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
# if label1 or label2: ax1.legend()
if plots: plots += plot1 + plot2
else: plots = plot1 + plot2
plt.legend(plots, [ l.get_label() for l in plots])
if grid == "major" or grid == "minor" or grid == "both":
if grid == "minor" or "both":
ax1.minorticks_on()
ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
# if xlim:
# if xlim[0] != xlim[1]:
# ax.set_xlim(*xlim)
# if ylim:
# if ylim[0] != ylim[1]:
# ax.set_ylim(*ylim)
return fig, ax1, ax2, plots
# #
@ -249,53 +330,116 @@ def missing_arg_val(arg):
def missing_arg(arg): def missing_arg(arg):
print("Missing ", arg) print("Missing ", arg)
exit(1) exit(1)
if __name__ == '__main__':
server_name ="" def visualize(loaded_settings: dict):
db_path = "" global settings
# parse args settings = loaded_settings
i = 1 if not settings["db"]: missing_arg("db")
while i in range(1, len(argv)): if not settings["server-name"]: missing_arg("server-name")
if argv[i] == "--server-name":
if len(argv) > i + 1: server_name = argv[i+1] img_dir = settings["img_dir"]
else: missing_arg_val(argv[i]) img_filetype = settings["img_filetype"]
i += 1 names = {
elif argv[i] == "--db": "img_file_ranking": f"{img_dir}/ranking_all_time_files.{img_filetype}",
if len(argv) > i + 1: db_path = argv[i+1] "img_referer_ranking": f"{img_dir}/ranking_all_time_referers.{img_filetype}",
else: missing_arg_val(argv[i]) "img_browser_ranking": f"{img_dir}/ranking_all_time_browsers.{img_filetype}",
i += 1 "img_operating_system_ranking": f"{img_dir}/ranking_all_time_operating_systems.{img_filetype}",
"img_daily": f"{img_dir}/user_request_count_daily.{img_filetype}",
"mobile_user_percentage": 0.0,
"server-name": settings["server-name"],
"last_x_days": settings["last_x_days"],
# order matters!
"total_user_count_x_days": 0,
"total_request_count_x_days": 0,
"total_user_count": 0,
"total_request_count": 0,
"human_user_percentage_x_days": 0,
"human_request_percentage_x_days": 0,
"human_user_percentage": 0,
"human_request_percentage": 0,
}
conn = sql.connect(settings["db"])
if isdir(img_dir) and img_filetype:
gen_img = True
else: else:
i += 1 print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
gen_img = False
if not server_name: missing_arg("--server-name")
if not db_path: missing_arg("--db")
conn = sql.connect(db_path)
cur = conn.cursor() cur = conn.cursor()
file_ranking = get_file_ranking(cur)
referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur)
user_agent_ranking = get_user_agent_ranking(cur)
for count, agent in user_agent_ranking:
get_os_browser_pairs_from_agent(agent)
fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes)
fig_file_ranking.savefig(f"ranking_files.{img_ft}")
get_humans = settings["get-human-percentage"]
print("\t>>>>>>", get_humans)
# files
file_ranking = get_file_ranking(cur)
if gen_img:
fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes)
fig_file_ranking.savefig(names["img_file_ranking"])
# referer
referer_ranking = get_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur)
print("Referer ranking", referer_ranking)
if gen_img:
fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate)
fig_referer_ranking.savefig(names["img_referer_ranking"])
# dates
dates = get_dates(cur) dates = get_dates(cur)
unique_users_for_dates = [] # user
print(dates, unique_users_for_dates) user_agent_ranking = get_user_agent_ranking(cur)
unique_user_ids_for_dates = []
unique_request_ids_for_dates = []
unique_user_ids_for_dates_human = []
unique_request_ids_for_dates_human = []
for date in dates: for date in dates:
unique_users_for_dates.append(get_unique_user_ids_for_date(cur, date)) unique_user_ids_for_dates.append(get_unique_user_ids_for_date(cur, date))
print(dates, unique_users_for_dates) unique_request_ids_for_dates.append(get_unique_request_ids_for_date(cur, date))
os_ranking, browser_ranking, mobile_user_percentage = get_os_browser_mobile_rankings(user_agent_ranking) if get_humans:
fig_os_rating = plot_ranking(os_ranking, xlabel="Operating Systems", ylabel="Percentage", color_settings=color_settings_operating_systems) unique_user_ids_for_dates_human.append(get_unique_user_ids_for_date_human(cur, date))
fig_os_rating.savefig(f"ranking_operating_systems.{img_ft}") unique_request_ids_for_dates_human.append([])
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Percentage", color_settings=color_settings_browsers) for human in unique_user_ids_for_dates_human[-1]:
fig_browser_rating.savefig(f"ranking_browsers.{img_ft}") unique_request_ids_for_dates_human[-1] += get_unique_request_ids_for_date_and_user(cur, date, human)
if get_humans:
try:
names["human_user_percentage_x_days"] = round(100 * len_list_list(unique_user_ids_for_dates_human) / len_list_list(unique_user_ids_for_dates), 2)
names["human_request_percentage_x_days"] = round(100 * len_list_list(unique_request_ids_for_dates_human) / len_list_list(unique_request_ids_for_dates), 2)
except: pass
print(">>>", len_list_list(unique_request_ids_for_dates), len_list_list(unique_request_ids_for_dates_human))
names["total_user_count"] = sql_tablesize(cur, t_user)
names["total_request_count"] = sql_tablesize(cur, t_request)
names["total_user_count_x_days"] = len_list_list(unique_user_ids_for_dates)
names["total_request_count_x_days"] = len_list_list(unique_request_ids_for_dates)
# os & browser
os_ranking, browser_ranking, names["mobile_user_percentage"] = get_os_browser_mobile_rankings(user_agent_ranking)
if gen_img:
fig_os_rating = plot_ranking(os_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_operating_systems)
fig_os_rating.savefig(names["img_operating_system_ranking"])
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Share [%]", color_settings=color_settings_browsers)
fig_browser_rating.savefig(names["img_browser_ranking"])
# print("File Ranking", file_ranking) # print("File Ranking", file_ranking)
# print("referer Ranking", referer_ranking) # print("referer Ranking", referer_ranking)
# print("user agent ranking", user_agent_ranking) # print("user agent ranking", user_agent_ranking)
# print("Unique Users:", get_unique_user_count(cur)) # print("Unique Users:", get_unique_user_count(cur))
# fig_daily, ax_daily_users = plot(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], xlabel="Datum", ylabel="Einzigartige Nutzer", label="Einzigartige Nutzer", color="blue")
# fig_daily, ax_daily_requests = plot(dates, [len(request_ids) for request_ids in unique_request_ids_for_dates], fig=fig_daily, ax=ax_daily_users, xlabel="Datum", ylabel="Einzigartige Anfragen", label="Einzigartige Anfragen", color="orange")
# fig_daily.savefig(f"{img_dir}/daily.{img_filetype}")
if gen_img:
fig_daily, ax1, ax2, plots = plot2y(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates], [len(request_ids) for request_ids in unique_request_ids_for_dates], xlabel="Date", ylabel1="User count", label1="Unique users", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"])
if get_humans:
fig_daily, ax1, ax2, plots = plot2y(dates, [len(user_ids) for user_ids in unique_user_ids_for_dates_human], [len(request_ids) for request_ids in unique_request_ids_for_dates_human], label1="Unique users (human)", ylabel2="Einzigartige Anfragen", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots)
fig_daily.savefig(names["img_daily"])
print("OS ranking", os_ranking) print("OS ranking", os_ranking)
print("Browser ranking", browser_ranking) print("Browser ranking", browser_ranking)
print("Mobile percentage", mobile_user_percentage) print("Mobile percentage", names["mobile_user_percentage"])
print(dates, "\n\tuu", unique_user_ids_for_dates, "\n\tur",unique_request_ids_for_dates, "\n\tuuh", unique_user_ids_for_dates_human, "\n\turh", unique_request_ids_for_dates_human)
if settings["template_html"] and settings["html_out_path"]:
with open(settings["template_html"], "r") as file:
html = file.read()
for name, value in names.items():
html = html.replace(f"%{name}", str(value))
with open(settings["html_out_path"], "w") as file:
file.write(html)