refactored visualization

rankings and statistics now use more sql features for better performance
added data export
changed html variable names
This commit is contained in:
matthias@arch 2023-05-15 21:58:02 +02:00
parent cf1294882b
commit 4a97335b96
5 changed files with 545 additions and 407 deletions

View File

@ -0,0 +1,53 @@
from regina.database import Database
def get_visitor_count_between(db: Database, timestamps: tuple[int, int], only_human=False):
return db(f"""SELECT COUNT(visitor_id)
FROM visitor AS v
WHERE EXISTS (
SELECT 1
FROM request as r
WHERE r.visitor_id = v.visitor_id
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
)
{'AND v.is_human = 1' if only_human else ''}""")[0][0]
def get_request_count_between(db: Database, timestamps: tuple[int, int], only_human=False):
return db(f"""SELECT COUNT(r.request_id)
FROM request AS r, visitor AS v
WHERE r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
{'AND v.is_human = 1' if only_human else ''}""")[0][0]
def get_new_visitor_count_between(db: Database, timestamps: tuple[int, int]):
return db(f"""SELECT COUNT(*)
FROM visitor AS v
JOIN (
SELECT visitor_id, MIN(time) AS first_request_time
FROM request
GROUP BY visitor_id
) AS r ON v.visitor_id = r.visitor_id
WHERE r.first_request_time BETWEEN {timestamps[0]} AND {timestamps[1]}""")[0][0]
def get_request_from_new_visitor_count_between(db: Database, timestamps: tuple[int, int]):
return db(f"""SELECT COUNT(*)
FROM request AS r
JOIN (
SELECT visitor_id, MIN(time) AS first_request_time
FROM request
GROUP BY visitor_id
) AS v ON r.visitor_id = v.visitor_id
WHERE v.first_request_time BETWEEN {timestamps[0]} AND {timestamps[1]}""")[0][0]
def get_mobile_visitor_count_between(db: Database, timestamps: tuple[int, int], only_human=True) -> float:
return db(f"""SELECT COUNT(*)
FROM visitor AS v
WHERE EXISTS (
SELECT 1
FROM request as r
WHERE r.visitor_id = v.visitor_id
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
)
{'AND v.is_human = 1' if only_human else ''}
AND v.is_mobile = 1""")[0][0]

View File

@ -2,54 +2,68 @@ from re import fullmatch
from regina.database import Database from regina.database import Database
from regina.utility.globals import settings from regina.utility.globals import settings
from regina.utility.utility import pdebug, warning, missing_arg, is_blacklisted, is_whitelisted from regina.utility.utility import pdebug, warning, is_blacklisted, is_whitelisted
from regina.utility.sql_util import sanitize
from regina.data_visualization.utility import is_valid_status, cleanup_referer from regina.data_visualization.utility import is_valid_status, cleanup_referer
def get_route_ranking(db: Database, date_condition:str) -> list[tuple[int, str]]: def get_route_ranking(db: Database, timestamps: tuple[int, int]) -> list[tuple[int, str]]:
""" """
:returns [(request_count, route name)] :returns [(request_count, route name)]
""" """
ranking = [] ranking = []
for (route_id, name) in db(f"SELECT route_id, name FROM route"): for (route_id, name) in db(f"SELECT route_id, name FROM route"):
if is_blacklisted(name, settings["route_ranking_blacklist"]): continue if is_blacklisted(name, settings["rankings"]["route_blacklist"]): continue
if not is_whitelisted(name, settings["route_ranking_whitelist"]): continue if not is_whitelisted(name, settings["rankings"]["route_whitelist"]): continue
if settings["route_ranking_ignore_404"]: # use only succesful routes if settings["rankings"]["route_ignore_404"]: # use only succesful routes
success = False success = False
for (status) in db(f"SELECT status FROM request WHERE route_id = {route_id}"): for (status, ) in db(f"SELECT status FROM request WHERE route_id = {route_id}"):
if is_valid_status(status): if is_valid_status(status):
pdebug(f"get_route_ranking: success code {status} for route with route_id {route_id} and name {name}") pdebug(f"get_route_ranking: success code {status} for route with route_id {route_id} and name {name}", lvl=4)
success = True success = True
break break
if not success: if not success:
pdebug(f"get_route_ranking: route with route_id {route_id} and name {name} has only requests resulting in error") pdebug(f"get_route_ranking: route with route_id {route_id} and name {name} has only requests resulting in error", lvl=3)
continue continue
db.execute(f"SELECT COUNT(*) FROM request WHERE route_id = {route_id} AND {date_condition}") db.execute(f"SELECT COUNT(*) FROM request WHERE route_id = {route_id} AND time BETWEEN {timestamps[0]} AND {timestamps[1]}")
ranking.append((db.fetchone()[0], name)) ranking.append((db.fetchone()[0], name))
ranking.sort() ranking.sort()
return ranking return ranking
def route_ranking_group_routes(route_ranking: list[tuple[int, str]]):
def get_ranking(db: Database, table: str, field_name: str, date_condition:str, whitelist_regex: str|list[str]|None=None, blacklist_regex: str|list[str]|None=None) -> list[tuple[int, str]]:
""" """
1) get all the distinct entries for field_name after min_date_unix_time group the routes in the route ranking according the groups defined in the config section "route-groups"
2) call get_name_function with the distinct entry """
3) skip if not fully matching regex whitelist ranking = {}
4) skip if fully matching regex blacklist for count, route in route_ranking:
5) for every entry, get the count in table after min_date_unix_time ingroup = False
6) sort by count in ascending order for group_name, group_regexp in settings["route-groups"].items():
@returns [(count, name)] if fullmatch(group_regexp, route):
if group_name in ranking:
ranking[group_name] += count
else:
ranking[group_name] = count
ingroup = True
if not ingroup:
ranking[route] = count
ranking = [ (c, name) for name, c in ranking.items() ]
ranking.sort()
return ranking
def get_referer_ranking(db: Database, timestamps: tuple[int, int]) -> list[tuple[int, str]]:
"""
@returns [(count, referer)]
""" """
ranking = [] ranking = []
for (name) in db(f"SELECT DISTINCT {field_name} FROM {table} WHERE {date_condition}"): for referer_id, name in db(f"SELECT referer_id, name FROM referer"):
if is_blacklisted(name, blacklist_regex): continue if is_blacklisted(name, settings["rankings"]["referer_blacklist"]): continue
if not is_whitelisted(name, whitelist_regex): continue if not is_whitelisted(name, settings["rankings"]["referer_whitelist"]): continue
db.execute(f"SELECT COUNT(*) FROM {table} WHERE {field_name} = '{name}' AND {date_condition}") db.execute(f"SELECT COUNT(*) FROM request WHERE referer_id = {referer_id} AND time BETWEEN {timestamps[0]} AND {timestamps[1]}")
ranking.append((db.fetchone()[0], name)) ranking.append((db.fetchone()[0], name))
ranking.sort() ranking.sort()
return ranking return ranking
def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]): def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]):
unique_referers = dict() unique_referers = dict()
for count, referer in referer_ranking: for count, referer in referer_ranking:
@ -64,88 +78,129 @@ def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]):
referer_ranking.sort() referer_ranking.sort()
def get_city_and_country_ranking(db: Database, require_humans=True): def get_city_ranking(db: Database, timestamps: tuple[int, int], add_country_code=True, only_human=True):
""" """
@returns [(count, "city (CO)")], [(count, country)] @returns [(count, city (Country Code))]
""" """
cities_dict = {} ranking = []
country_dict = {} results = db(f"""SELECT co.code, ci.name,COUNT(v.visitor_id)
FROM country as co, city as ci, visitor as v, ip_range as i
sql_cmd = f"SELECT ci.name, co.code, co.name FROM country AS co, city as ci, visitor as v, ip_range as i WHERE v.ip_range_id = i.ip_range_id AND i.city_id = ci.city_id AND ci.country_id = co.country_id" WHERE ci.city_id = i.city_id
if require_humans: sql_cmd += " AND v.is_human = 1" AND co.country_id = ci.country_id
result = db(sql_cmd) AND i.ip_range_id = v.ip_range_id
AND EXISTS(
for (city, country_code, country) in result: SELECT 1
if city in cities_dict: FROM request AS r
cities_dict[city][0] += 1 WHERE r.visitor_id = v.visitor_id
else: AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
if is_blacklisted(city, settings["city_ranking_blacklist"]): continue )
if not is_whitelisted(city, settings["city_ranking_whitelist"]): continue {'AND v.is_human = 1' if only_human else ''}
cities_dict[city] = [1, country_code, country] # count, country code GROUP BY ci.name
ORDER BY COUNT(v.visitor_id)
if country in country_dict: """)
country_dict[country] += 1 for code, name, count in results:
else: if is_blacklisted(name, settings["rankings"]["city_blacklist"]): continue
if is_blacklisted(country, settings["country_ranking_blacklist"]): continue if not is_whitelisted(name, settings["rankings"]["city_whitelist"]): continue
if not is_whitelisted(country, settings["country_ranking_whitelist"]): continue if add_country_code:
country_dict[country] = 1 # count, country code name = f"{name} ({code})"
ranking.append((count, name))
city_ranking = [(v[0], f"{city} ({v[1]})") for city,v in cities_dict.items()] # for (city_id, name) in db(f"SELECT city_id, name FROM city"):
city_ranking.sort() # if is_blacklisted(name, settings["rankings"]["city_blacklist"]): continue
country_ranking = [(count, country) for country,count in country_dict.items()] # if not is_whitelisted(name, settings["rankings"]["city_whitelist"]): continue
country_ranking.sort() # db.execute(f"""SELECT COUNT(v.visitor_id)
return city_ranking, country_ranking # FROM visitor AS v, ip_range AS i
# WHERE i.city_id = {city_id}
# AND i.ip_range_id = v.ip_range_id
# AND EXISTS(
# SELECT 1
# FROM request AS r
# WHERE r.visitor_id = v.visitor_id
# AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
# )
# {'AND v.is_human = 1' if only_human else ''}""")
# ranking.append((db.fetchone()[0], name))
ranking.sort()
return ranking
def get_platform_browser_mobile_rankings(db: Database, visitor_ids: list[int]) -> tuple[list[tuple[int, str]], list[tuple[int, str]], float]: def get_country_ranking(db: Database, timestamps: tuple[int, int], only_human=True):
""" """
returns [(count, operating_system)], [(count, browser)], mobile_visitor_percentage @returns [(count, country)]
""" """
platform_ranking = {} ranking = []
platform_count = 0.0 # for (country_id, name) in db(f"SELECT country_id, name FROM country"):
browser_ranking = {} # if is_blacklisted(name, settings["rankings"]["country_blacklist"]): continue
browser_count = 0.0 # if not is_whitelisted(name, settings["rankings"]["country_whitelist"]): continue
mobile_ranking = { True: 0.0, False: 0.0 } # db.execute(f"""SELECT COUNT(v.visitor_id)
for visitor_id in visitor_ids: # FROM visitor AS v, ip_range AS i, city AS ci
platform_id, browser_id, is_mobile = db(f"SELECT platform_id, browser_id, is_mobile FROM visitor WHERE visitor_id = {visitor_id}")[0] # WHERE ci.country_id = {country_id}
is_mobile = bool(is_mobile) # AND ci.city_id = i.city_id
if platform_id: # AND i.ip_range_id = v.ip_range_id
if platform_id in platform_ranking: platform_ranking[platform_id] += 1 # AND EXISTS(
else: platform_ranking[platform_id] = 1 # SELECT 1
platform_count += 1 # FROM request AS r
if browser_id: # WHERE r.visitor_id = v.visitor_id
if browser_id in browser_ranking: browser_ranking[browser_id] += 1 # AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
else: browser_ranking[browser_id] = 1 # )
browser_count += 1 # {'AND v.is_human = 1' if only_human else ''}""")
if (platform_id or browser_id): # ranking.append((db.fetchone()[0], name))
mobile_ranking[is_mobile] += 1 results = db(f"""SELECT co.name,COUNT(v.visitor_id)
try: FROM country as co, city as ci, visitor as v, ip_range as i
mobile_visitor_percentage = mobile_ranking[True] / (mobile_ranking[True] + mobile_ranking[False]) WHERE co.country_id = ci.country_id
except ZeroDivisionError: AND ci.city_id = i.city_id
mobile_visitor_percentage = 0.0 AND i.ip_range_id = v.ip_range_id
AND EXISTS(
platform_ranking = [(c * 100/platform_count, db.get_name("platform", p_id)) for p_id, c in platform_ranking.items()] SELECT 1
platform_ranking.sort() FROM request AS r
browser_ranking = [(c * 100/browser_count, db.get_name("browser", b_id)) for b_id, c in browser_ranking.items()] WHERE r.visitor_id = v.visitor_id
browser_ranking.sort() AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
return platform_ranking, browser_ranking, mobile_visitor_percentage*100 )
{'AND v.is_human = 1' if only_human else ''}
GROUP BY co.name
ORDER BY COUNT(v.visitor_id)
""")
for name, count in results:
if is_blacklisted(name, settings["rankings"]["country_blacklist"]): continue
if not is_whitelisted(name, settings["rankings"]["country_whitelist"]): continue
ranking.append((count, name))
ranking.sort()
return ranking
# Store ranking in results class and dump with pickle def _get_platform_or_browser_ranking(db: Database, timestamps: tuple[int, int], table: str, only_human=False):
# class Results: ranking = []
# def __init__(self, timespan_name, for (table_id, name) in db(f"SELECT {table}_id, name FROM {table}"):
# r_routes: list[tuple[int, str]], # if is_blacklisted(name, settings["rankings"][f"{table}_blacklist"]): continue
# r_referrers: list[tuple[int, str]], # if not is_whitelisted(name, settings["rankings"][f"{table}_whitelist"]): continue
# r_platforms: list[tuple[int, str]], if name == "None": continue
# r_browsers: list[tuple[int, str]], db.execute(f"""SELECT COUNT(v.visitor_id)
# r_cities: list[tuple[int, str]], FROM visitor AS v, {table} AS t
# r_countries: list[tuple[int, str]], WHERE v.{table}_id = {table_id}
# ): AND EXISTS(
# self.r_routes = r_routes SELECT 1
# self.r_referrers= r_referrers FROM request AS r
# self.r_platforms= r_platforms WHERE r.visitor_id = v.visitor_id
# self.r_browsers = r_browsers AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
# self.r_cities = r_cities )
# self.r_countries= r_countries {'AND v.is_human = 1' if only_human else ''}""")
ranking.append((db.fetchone()[0], name))
ranking.sort()
return ranking
def get_platform_ranking(db: Database, timestamps: tuple[int, int], only_human=False):
return _get_platform_or_browser_ranking(db, timestamps, "platform", only_human=only_human)
def get_browser_ranking(db: Database, timestamps: tuple[int, int], only_human=False):
return _get_platform_or_browser_ranking(db, timestamps, "browser", only_human=only_human)
def make_ranking_relative(ranking: list[tuple[int, str]]) -> list[tuple[float, str]]:
total_count = sum([ c for c, _ in ranking ])
if total_count == 0:
warning(f"make_ranking_relative: Can not make ranking relative, total_count is 0")
return [ (float(c), name) for c, name in ranking ]
rel_ranking = [ (100.0*c/total_count, name) for c, name in ranking ]
return rel_ranking

View File

@ -2,17 +2,21 @@ from re import fullmatch
from regina.database import Database from regina.database import Database
from regina.utility.globals import settings from regina.utility.globals import settings
from regina.utility.utility import pdebug, warning, missing_arg from regina.utility.utility import pdebug, warning
from regina.utility.sql_util import sanitize, sql_tablesize
# re_uri_protocol = f"(https?)://" # re_uri_protocol = f"(https?)://"
re_uri_protocol = f"(https?://)?" re_uri_protocol = f"(https?://)?"
re_uri_ipv4 = r"(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?)" re_uri_ipv4 = r"(?:\d{1,3}\.?){4}"
# re_uri_ipv6 = "" # re_uri_ipv6 = ""
re_uri_domain = r"(?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})" re_uri_domain = r"(?:[^/:]+)"
re_uri_route = r"(?:/(.*))?" re_uri_port = r"(?::\d+)?"
re_uri_full = f"{re_uri_protocol}({re_uri_domain}|{re_uri_ipv4})({re_uri_route})" re_uri_route = r"(?:/.*)?"
re_uri_full = f"{re_uri_protocol}({re_uri_domain}|{re_uri_ipv4})({re_uri_port})({re_uri_route})"
# (https?://)?((?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})|(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?))((?:/(.*))?) # (https?://)?((?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})|(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?))((?:/(.*))?)
re_domain = r"[^/:]+\.[a-z]{2,}"
def cleanup_referer(referer: str) -> str: def cleanup_referer(referer: str) -> str:
""" """
split the referer uri into its parts and reassemeble them depending on settings split the referer uri into its parts and reassemeble them depending on settings
@ -21,90 +25,40 @@ def cleanup_referer(referer: str) -> str:
if not m: if not m:
warning(f"cleanup_referer: Could not match referer '{referer}'") warning(f"cleanup_referer: Could not match referer '{referer}'")
return referer return referer
# pdebug(f"cleanup_referer: {referer} - {m.groups()}") pdebug(f"cleanup_referer: {referer} - {m.groups()}", lvl=4)
protocol = m.groups()[0] protocol, domain, port, route = m.groups()
subdomains = m.groups()[2] if not protocol: protocol = ""
if not subdomains: subdomains = "" if not port: port = ""
domain = m.groups()[1].replace(subdomains, "")
route = m.groups()[3]
referer = domain if fullmatch(re_domain, domain): # no ip address
if settings["referer_ranking_ignore_tld"]: parts = domain.split(".")
if len(domain.split(".")) == 2: # if domain.tld if len(parts) < 2:
referer = domain.split(".")[0] warning(f"cleanup_referer: Domain has not enough parts: '{domain}'")
if not settings["referer_ranking_ignore_subdomain"]: referer = subdomains + referer tld = parts[-1]
if not settings["referer_ranking_ignore_protocol"]: referer = protocol + referer referer = parts[-2]
if not settings["referer_ranking_ignore_route"]: referer += route subdomains = ""
for sd in parts[:-2]:
subdomains += f"{sd}."
if not settings["rankings"]["referer_ignore_tld"]: referer += "." + tld
if not settings["rankings"]["referer_ignore_subdomain"]: referer = subdomains + referer
else:
referer = domain
if not settings["rankings"]["referer_ignore_protocol"]: referer = protocol + referer
if not settings["rankings"]["referer_ignore_port"]: referer += port
if not settings["rankings"]["referer_ignore_route"]: referer += route
# pdebug(f"cleanup_referer: cleaned up: {referer}") # pdebug(f"cleanup_referer: cleaned up: {referer}")
return referer return referer
def get_where_date_str(at_date=None, min_date=None, max_date=None):
"""
get a condition string that sets a condition on the time
"""
# dates in unix time
s = ""
if at_date is not None:
if isinstance(at_date, str):
s += f"DATE(time, 'unixepoch') = '{sanitize(at_date)}' AND "
elif isinstance(at_date, int|float):
s += f"time = {int(at_date)} AND "
else:
print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
if min_date is not None:
if isinstance(min_date, str):
s += f"DATE(time, 'unixepoch') >= '{sanitize(min_date)}' AND "
elif isinstance(min_date, int|float):
s += f"time >= {int(min_date)} AND "
else:
print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
if max_date is not None:
if isinstance(max_date, str):
s += f"DATE(time, 'unixepoch') <= '{sanitize(max_date)}' AND "
elif isinstance(max_date, int|float):
s += f"time <= {int(max_date)} AND "
else:
print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
if s == "":
print(f"WARNING: get_where_date_str: no date_str generated. Returning 'time > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
return "time > 0"
return s.removesuffix(" AND ")
def is_valid_status(status: int): def is_valid_status(status: int):
if status >= 400: return False if status >= 400: return False
if settings["status_300_is_success"] and status >= 300: return True if settings["data-collection"]["status_300_is_success"] and status >= 300: return True
return status < 300 return status < 300
#
# GETTERS
#
def get_unique_visitor_ids_for_date(db: Database, date:str) -> list[int]:
return [ visitor_id[0] for visitor_id in db(f"SELECT DISTINCT visitor_id FROM request WHERE {date}") ]
def append_human_visitors(db: Database, unique_visitor_ids, unique_visitor_ids_human: list): def len_list_list(l: list[list]):
""" size = 0
for visitor in unique_visitor_ids: for i in range(len(l)):
if human -> append to unique_visitor_ids_human size += len(l[i])
""" return size
for visitor_id in unique_visitor_ids:
db.execute(f"SELECT is_human FROM visitor WHERE visitor_id = {visitor_id}")
if db.fetchone()[0] == 1:
unique_visitor_ids_human.append(visitor_id)
def get_unique_request_ids_for_date(db: Database, date_constraint:str):
return [ request_id[0] for request_id in db(f"SELECT DISTINCT request_id FROM request WHERE {date_constraint}")]
def append_unique_request_ids_for_date_and_visitor(db: Database, date_constraint:str, visitor_id: int, unique_request_ids_human: list):
"""append all unique requests for visitor_id at date_constraint to unique_request_ids_human"""
for request_id in db(f"SELECT DISTINCT request_id FROM request WHERE {date_constraint} AND visitor_id = {visitor_id}"):
unique_request_ids_human.append(request_id[0])
# get number of requests per day
def get_request_count_for_date(db: Database, date_constraint:str) -> int:
db.execute(f"SELECT COUNT(*) FROM request WHERE {date_constraint}")
return db.fetchone()[0]
def get_unique_visitor_count(db: Database) -> int:
return sql_tablesize(db.cur, "visitor")

View File

@ -1,20 +1,18 @@
# from sys import path # from sys import path
# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}") # print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
import sqlite3 as sql
from sys import exit
from re import fullmatch
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from os.path import isdir from pickle import dump
from os import path, makedirs
from datetime import datetime as dt from datetime import datetime as dt
from numpy import empty
# local # local
from regina.database import Database from regina.database import Database
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where from regina.utility.sql_util import get_date_constraint, sanitize
from regina.utility.utility import pdebug, warning, missing_arg from regina.utility.utility import pdebug, warning, error, make_parent_dirs, dict_str
from regina.utility.globals import settings from regina.utility.globals import settings
from regina.data_visualization.utility import cleanup_referer, get_where_date_str, get_unique_visitor_ids_for_date, get_unique_request_ids_for_date, append_human_visitors, append_unique_request_ids_for_date_and_visitor from regina.data_visualization.utility import len_list_list
from regina.data_visualization.ranking import get_city_and_country_ranking, get_platform_browser_mobile_rankings, get_ranking, cleanup_referer_ranking, get_route_ranking from regina.data_visualization.ranking import get_referer_ranking, cleanup_referer_ranking, get_route_ranking, route_ranking_group_routes, get_browser_ranking, get_platform_ranking, get_city_ranking, get_country_ranking, make_ranking_relative
import regina.data_visualization.history as h
""" """
visualize information from the databse visualize information from the databse
@ -53,12 +51,14 @@ color_settings_platforms = {
palette["blue"]: ["Windows"], palette["blue"]: ["Windows"],
} }
color_settings_history = {
def len_list_list(l: list[list]): "visitors": "#000050",
size = 0 "visitors_human": "#3366ff",
for i in range(len(l)): "visitors_new": "#66ccff",
size += len(l[i]) "requests": "#770000",
return size "requests_human": "#ff3500",
"requests_new": "#ff9999",
}
# #
@ -86,18 +86,18 @@ def add_labels_at_top_of_bar(xdata, ydata, max_y_val, ax, bar_plot):
for idx,rect in enumerate(bar_plot): for idx,rect in enumerate(bar_plot):
ax.text(rect.get_x() + rect.get_width()/2, ydata[idx] - y_offset, round(ydata[idx], 1), ha='center', bbox=dict(facecolor='white', alpha=0.8)) ax.text(rect.get_x() + rect.get_width()/2, ydata[idx] - y_offset, round(ydata[idx], 1), ha='center', bbox=dict(facecolor='white', alpha=0.8))
def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[], figsize=None): def plot_ranking(ranking: list[tuple[int or float, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[], figsize=None):
""" """
make a bar plot of the ranking make a bar plot of the ranking
""" """
# pdebug(f"plot_ranking: ranking={ranking}") # pdebug(f"plot_ranking: ranking={ranking}")
if not fig: if not fig:
fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None) fig = plt.figure(figsize=figsize, dpi=settings["plot-generation"]["dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
# create new axis if none is given # create new axis if none is given
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel) ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
# fill x y data # fill x y data
if len(ranking) > settings["file_ranking_plot_max_files"]: if len(ranking) > settings["rankings"]["route_plot_max_routes"]:
start_index = len(ranking) - settings["file_ranking_plot_max_files"] start_index = len(ranking) - settings["rankings"]["route_plot_max_routes"]
else: start_index = 0 else: start_index = 0
x_names = [] x_names = []
y_counts = [] y_counts = []
@ -120,14 +120,14 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
if len(y_counts) > 0: if len(y_counts) > 0:
add_vertikal_labels_in_bar_plot(x_names, y_counts[-1], ax, bar) add_vertikal_labels_in_bar_plot(x_names, y_counts[-1], ax, bar)
if settings["plot_add_count_label"]: add_labels_at_top_of_bar(x_names, y_counts, y_counts[-1], ax, bar) if settings["plot-generation"]["add_count_label"]: add_labels_at_top_of_bar(x_names, y_counts, y_counts[-1], ax, bar)
# ax.ylabel(y_counts) # ax.ylabel(y_counts)
return fig return fig
# def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue", rotate_xlabel=0): # def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue", rotate_xlabel=0):
# if not fig: # if not fig:
# fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None) # fig = plt.figure(figsize=None, dpi=settings["plot-generation"]["dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
# if not ax: # if not ax:
# ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel) # ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
# else: # else:
@ -139,29 +139,39 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
# if label: ax.legend() # if label: ax.legend()
# return fig, ax # return fig, ax
def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major", rotate_xlabel=0, figsize=None):
if not fig:
fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
if not (ax1 and ax2):
ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
ax2 = ax1.twinx()
ax2.set_ylabel(ylabel2)
ax1.tick_params(axis="x", rotation=90)
plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
# ax1.set_xticks(ax1.get_xticks())
# ax1.set_xticklabels(xdata, rotation=rotate_xlabel, rotation_mode="anchor")
# if label1 or label2: ax1.legend()
if plots: plots += plot1 + plot2
else: plots = plot1 + plot2
plt.legend(plots, [ l.get_label() for l in plots])
if grid == "major" or grid == "minor" or grid == "both": class Plot2Y:
if grid == "minor" or "both": def __init__(self, xlabel, ylabel_left, ylabel_right, grid="major", rotate_xlabel=0, figsize=None):
ax1.minorticks_on() self.fig, self.ax1 = plt.subplots(figsize=figsize, dpi=settings["plot-generation"]["dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
ax1.grid(visible=True, which=grid, linestyle="-", color="#888") self.ax1.set_xlabel(xlabel=xlabel) #, ylabel=ylabel_left)
self.ax1.set_ylabel(ylabel=ylabel_left) #, ylabel=ylabel_left)
self.ax2 = self.ax1.twinx()
self.ax2.set_ylabel(ylabel_right)
self.ax1.tick_params(axis="x", rotation=90)
self.plots = None
if grid == "major" or grid == "minor" or grid == "both":
if grid == "minor" or "both":
self.ax1.minorticks_on()
self.ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
return fig, ax1, ax2, plots def _plot(self, ax, xdata, ydata, label="", linestyle="-", marker="", color="blue"):
plot = ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
# ax1.set_xticks(ax1.get_xticks())
# ax1.set_xticklabels(xdata, rotation=rotate_xlabel, rotation_mode="anchor")
# if label1 or label2: ax1.legend()
if self.plots: self.plots += plot
else: self.plots = plot
plt.legend(self.plots, [ l.get_label() for l in self.plots ])
def plot_left(self, xdata, ydata, label="", linestyle="-", marker="", color="blue"):
self._plot(self.ax1, xdata, ydata, label, linestyle, marker, color)
def plot_right(self, xdata, ydata, label="", linestyle="-", marker="", color="blue"):
self._plot(self.ax2, xdata, ydata, label, linestyle, marker, color)
def get_fig(self):
return self.fig
# #
@ -172,194 +182,259 @@ def visualize(db: Database):
This assumes sanity checks have been done This assumes sanity checks have been done
""" """
pdebug("visualizing...") pdebug("visualizing...")
if not settings["db"]: missing_arg("db")
if not settings["server_name"]: missing_arg("server_name")
img_dir = settings["img_dir"] def make_dir_if_not_None(d):
pdebug("img_dir:", img_dir) if d:
img_filetype = settings["img_filetype"] if not path.isdir(d):
if isdir(img_dir) and img_filetype: makedirs(d)
gen_img = True
else:
print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
gen_img = False
img_location = settings["img_location"] # plot generation
names = { img_out_dir = settings["plot-generation"]["img_out_dir"]
# paths make_dir_if_not_None(img_out_dir)
"img_route_ranking_last_x_days": f"ranking_routes_last_x_days.{img_filetype}", img_filetype = settings["plot-generation"]["filetype"]
"img_referer_ranking_last_x_days": f"ranking_referers_last_x_days.{img_filetype}", img_location = settings["html-generation"]["img_location"]
"img_countries_last_x_days": f"ranking_countries_last_x_days.{img_filetype}", pdebug(f"visualize: img_out_dir='{img_out_dir}', filetype='{img_filetype}', {img_location}='{img_location}'", lvl=2)
"img_cities_last_x_days": f"ranking_cities_last_x_days.{img_filetype}", if not img_out_dir:
"img_browser_ranking_last_x_days": f"ranking_browsers_last_x_days.{img_filetype}", pdebug(f"visualize: Not generating images since img_out_dir is None", lvl=1)
"img_platform_ranking_last_x_days": f"ranking_platforms_last_x_days.{img_filetype}",
"img_visitors_and_requests_last_x_days": f"visitor_request_count_daily_last_x_days.{img_filetype}",
"img_route_ranking_total": f"ranking_routes_total.{img_filetype}", # data export
"img_referer_ranking_total": f"ranking_referers_total.{img_filetype}", data_out_dir = settings["data-export"]["data_out_dir"]
"img_countries_total": f"ranking_countries_total.{img_filetype}", make_dir_if_not_None(data_out_dir)
"img_cities_total": f"ranking_cities_total.{img_filetype}", data_filetype = settings["data-export"]["filetype"]
"img_browser_ranking_total": f"ranking_browsers_total.{img_filetype}", pdebug(f"visualize: data_out_dir='{data_out_dir}', filetype='{data_filetype}'", lvl=2)
"img_platform_ranking_total": f"ranking_platforms_total.{img_filetype}", if not data_out_dir:
"img_visitors_and_requests_total": f"visitor_request_count_daily_total.{img_filetype}", pdebug(f"visualize: Not exporting data since data_out_dir is None", lvl=1)
if not data_out_dir and not img_out_dir:
warning(f"data_out_dir and img_out_dir are both None. No data will be exported and no plots will be generated!")
html_variables = {
# values # values
"mobile_visitor_percentage_total": 0.0, "visitor_count_last_x_days": "NaN",
"mobile_visitor_percentage_last_x_days": 0.0, "visitor_count_total": "NaN",
"visitor_count_last_x_days": 0, "request_count_last_x_days": "NaN",
"visitor_count_total": 0, "request_count_total": "NaN",
"request_count_last_x_days": 0, "visitor_count_human_last_x_days": "NaN",
"request_count_total": 0, "visitor_count_human_total": "NaN",
"human_visitor_percentage_last_x_days": 0.0, "request_count_human_last_x_days": "NaN",
"human_visitor_percentage_total": 0.0, "request_count_human_total": "NaN",
"human_request_percentage_last_x_days": 0.0, "human_visitor_percentage_last_x_days": "NaN",
"human_request_percentage_total": 0.0, "human_visitor_percentage_total": "NaN",
"human_request_percentage_last_x_days": "NaN",
"human_request_percentage_total": "NaN",
"mobile_visitor_percentage_total": "NaN",
"mobile_visitor_percentage_last_x_days": "NaN",
# general # general
"regina_version": settings["version"], "regina_version": settings["regina"]["version"],
"server_name": settings["server_name"], "server_name": settings["regina"]["server_name"],
"last_x_days": settings["last_x_days"], # must be after all the things with last_x_days! "last_x_days": settings["data-visualization"]["last_x_days"],
"earliest_date": "1990-1-1", "earliest_date": "1990-1-1",
"generation_date": "1990-1-1 0:0:0", "generation_date": "1990-1-1 0:0:0",
} }
db = Database(database_path=settings["db"]) for suffix in ["last_x_days", "total"]:
# add all plot paths as variables: img_plot_suffix -> plot_suffix.filetype
# not adding img_location or img_out_dir since these names are needed for both
html_variables.update((f"img_{plot_}_{suffix}", f"{plot_}_{suffix}.{img_filetype}") for plot_ in ["ranking_platform", "ranking_browser", "ranking_country", "ranking_city", "ranking_referer", "ranking_route", "history_visitor_request"])
get_humans_visitors = settings["data-visualization"]["history_track_human_visitors"]
get_new_visitors = settings["data-visualization"]["history_track_new_visitors"]
get_humans = settings["get_human_percentage"]
# pdebug(f"visualize: settings {settings}")
# DATE STRINGS # DATE STRINGS
earliest_date = db.get_earliest_date() earliest_timestamp = db.get_earliest_timestamp()
names["earliest_date"] = dt.fromtimestamp(earliest_date).strftime("%Y-%m-%d") html_variables["earliest_date"] = dt.fromtimestamp(earliest_timestamp).strftime("%Y-%m-%d")
names["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S") html_variables["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S")
# LAST_X_DAYS
# last_x_days_min_date: latest_date - last_x_days
secs_per_day = 86400
last_x_days_min_date = db.get_latest_date() - settings["last_x_days"] * secs_per_day
last_x_days_constraint = get_where_date_str(min_date=last_x_days_min_date)
last_x_days = db.get_days_where(last_x_days_constraint)
last_x_days_contraints = [get_where_date_str(at_date=day) for day in last_x_days]
# ALL DATES todos: list[tuple[str, tuple[int, int], list[str], list[str], list[tuple[int, int]]]] = [] # suffix, whole_time_timestamps, history_date_constraints, history_date_names, history_date_timestamps
all_time_constraint = get_where_date_str(min_date=0)
# all months in yyyy-mm format
months_all_time = db.get_months_where(all_time_constraint)
# sqlite constrict to month string
months_strs = []
for year_month in months_all_time:
year, month = year_month.split("-")
# first day of the month
min_date = dt(int(year), int(month), 1).timestamp()
month = (int(month) % 12) + 1 # + 1 month
year = int(year)
if month == 1: year += 1
# first day of the next month - 1 sec
max_date = dt(year, month, 1).timestamp() - 1
months_strs.append(get_where_date_str(min_date=min_date, max_date=max_date))
for i in range(2): now_stamp = int(dt.now().timestamp())
suffix = ["_total", "_last_x_days"][i] total: bool = settings["data-visualization"]["total"]
date_constraint = [all_time_constraint, last_x_days_constraint][i] if total:
date_names = [months_all_time, last_x_days][i] all_time_timestamps = (0, now_stamp)
date_constraints = [months_strs, last_x_days_contraints][i] # all months in yyyy-mm format
assert(len(date_names) == len(date_constraints)) month_names = db.get_months_where(get_date_constraint(min_date=0))
month_timestamps = []
# sqlite constrict to month string
month_constraints = []
for year_month in month_names:
year, month = year_month.split("-")
# timestamp of first day of the month
min_date = int(dt(int(year), int(month), 1).timestamp())
month = (int(month) % 12) + 1 # + 1 month
year = int(year)
# first day of the next month - 1 sec
if month == 1: year += 1
max_date = int(dt(year, month, 1).timestamp()) - 1
month_constraints.append(get_date_constraint(min_date=min_date, max_date=max_date))
month_timestamps.append((min_date, max_date))
todos.append(("total", all_time_timestamps, month_constraints, month_names, month_timestamps))
# FILES last_x_days: int = settings["data-visualization"]["last_x_days"]
if last_x_days > 0:
secs_per_day = 86400
last_x_days_min_date = db.get_latest_timestamp() - last_x_days * secs_per_day
last_x_days_timestamps = (last_x_days_min_date, now_stamp)
last_x_days_constraint = get_date_constraint(min_date=last_x_days_min_date)
days = db.get_days_where(last_x_days_constraint) # yyyy-mm-dd
day_constrains = [ get_date_constraint(at_date=day) for day in days ]
day_timestamps = []
for day in days:
year, month, day = day.split("-")
min_date = int(dt(int(year), int(month), int(day)).timestamp())
max_date = min_date + secs_per_day
day_timestamps.append((min_date, max_date))
todos.append(("last_x_days", last_x_days_timestamps, day_constrains, days, day_timestamps))
def export_ranking(name: str, column_name: str, ranking: list[tuple[int or float, str]]):
filename = f"{data_out_dir}/{name}.{data_filetype}"
if data_filetype == "pkl":
pdebug(f"visualize: Exporting {name} as pickle to '{filename}'", lvl=2)
with open(filename, "wb") as file:
dump(ranking, file)
elif data_filetype == "csv":
pdebug(f"visualize: Exporting {name} as csv to '{filename}'", lvl=2)
s = f'"{name}"\n'
s += f'"count","{column_name}"\n'
for count, item in ranking:
s += f'{count},"{item}"\n'
s = s.strip("\n")
with open(filename, "w") as file:
file.write(s)
else:
error(f"visualize: Unsupported data filetype: '{data_filetype}'")
def savefig(name: str, figure):
filename = f"{img_out_dir}/{name}.{img_filetype}"
pdebug(f"visualize: Saving plot for {name} as '{filename}'")
figure.savefig(filename, bbox_inches="tight") # bboximg_inches="tight"
pdebug(f"visualize: total={total}, last_x_days={last_x_days}", lvl=3)
for suffix, whole_timespan_timestamps, single_date_constraints, single_date_names, single_date_timestamps in todos:
assert(len(single_date_names) == len(single_date_constraints))
# STATISTICS
visitor_count = h.get_visitor_count_between(db, whole_timespan_timestamps)
request_count = h.get_request_count_between(db, whole_timespan_timestamps)
html_variables[f"visitor_count_{suffix}"] = visitor_count
html_variables[f"request_count_{suffix}"] = request_count
if get_humans_visitors:
visitor_count_human = h.get_visitor_count_between(db, whole_timespan_timestamps, only_human=True)
request_count_human = h.get_request_count_between(db, whole_timespan_timestamps, only_human=True)
html_variables[f"visitor_count_human_{suffix}"] = visitor_count_human
html_variables[f"request_count_human_{suffix}"] = request_count_human
try: html_variables[f"human_visitor_percentage_{suffix}"] = 100.0 * visitor_count_human / visitor_count
except ZeroDivisionError: pass
try: html_variables[f"human_request_percentage_{suffix}"] = 100.0 * request_count_human / request_count
except ZeroDivisionError: pass
try: html_variables[f"mobile_visitor_percentage_{suffix}"] = 100.0 * h.get_mobile_visitor_count_between(db, whole_timespan_timestamps, only_human=True) / visitor_count_human
except ZeroDivisionError: pass
# HISTORY
date_count = len(single_date_constraints)
visitor_count_dates = [ h.get_visitor_count_between(db, single_date_timestamps[i], only_human=False) for i in range(date_count) ]
request_count_dates = [ h.get_request_count_between(db, single_date_timestamps[i], only_human=False) for i in range(date_count) ]
visitor_count_human_dates = [ h.get_visitor_count_between(db, single_date_timestamps[i], only_human=True) for i in range(date_count) ]
request_count_human_dates = [ h.get_request_count_between(db, single_date_timestamps[i], only_human=True) for i in range(date_count) ]
visitor_count_new_dates = [ h.get_new_visitor_count_between(db, single_date_timestamps[i]) for i in range(date_count) ]
request_count_new_dates = [ h.get_request_from_new_visitor_count_between(db, single_date_timestamps[i]) for i in range(date_count) ]
if img_out_dir:
plt_history = Plot2Y(xlabel="Date", ylabel_left="Visitor count", ylabel_right="Request count", rotate_xlabel=-45, figsize=settings["plot-generation"]["size_broad"])
# visitors, plot on correct order
plt_history.plot_left(single_date_names, visitor_count_dates, label="Unique visitors", color=color_settings_history["visitors"])
if get_humans_visitors:
plt_history.plot_left(single_date_names, visitor_count_human_dates, label="Unique visitors (human)", color=color_settings_history["visitors_human"])
if get_new_visitors:
plt_history.plot_left(single_date_names, visitor_count_new_dates, label="Unique visitors (new)", color=color_settings_history["visitors_new"])
# requests
plt_history.plot_right(single_date_names, request_count_dates, label="Unique requests", color=color_settings_history["requests"])
if get_humans_visitors:
plt_history.plot_left(single_date_names, request_count_human_dates, label="Unique requests (human)", color=color_settings_history["requests_human"])
if get_new_visitors:
plt_history.plot_left(single_date_names, request_count_new_dates, label="Unique requests (new)", color=color_settings_history["requests_new"])
savefig(f"history_visitor_request_{suffix}", plt_history.get_fig())
# if data_out_dir: # TODO export history
# s = ""
# ROUTES
# TODO handle groups # TODO handle groups
file_ranking = get_route_ranking(db, date_constraint) route_ranking = get_route_ranking(db, whole_timespan_timestamps)
if gen_img: route_ranking = route_ranking_group_routes(route_ranking)
fig_file_ranking = plot_ranking(file_ranking, xlabel="Route Name", ylabel="Number of requests", color_settings=color_settings_filetypes, figsize=settings["plot_size_broad"]) pdebug("visualize: route ranking", route_ranking, lvl=3)
fig_file_ranking.savefig(f"{img_dir}/{names[f'img_route_ranking{suffix}']}", bbox_inches="tight") if img_out_dir:
fig_file_ranking = plot_ranking(route_ranking, xlabel="Route", ylabel="Number of requests", color_settings=color_settings_filetypes, figsize=settings["plot-generation"]["size_broad"])
savefig(f"ranking_route_{suffix}", fig_file_ranking)
if data_out_dir:
export_ranking(f"ranking_route_{suffix}", "route", route_ranking)
# REFERER # REFERER
referer_ranking = get_ranking(db, "request", "referer", date_constraint, settings["referer_ranking_whitelist"], settings["referer_ranking_whitelist"]) referer_ranking = get_referer_ranking(db, whole_timespan_timestamps)
pdebug("Referer ranking", referer_ranking)
cleanup_referer_ranking(referer_ranking) cleanup_referer_ranking(referer_ranking)
if gen_img: pdebug("visualize: referer ranking", referer_ranking, lvl=3)
fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"]) if img_out_dir:
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_referer_ranking{suffix}']}", bbox_inches="tight") fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate, figsize=settings["plot-generation"]["size_broad"])
savefig(f"ranking_referer_{suffix}", fig_referer_ranking)
if data_out_dir:
export_ranking(f"ranking_referer_{suffix}", "referer", referer_ranking)
# GEOIP # GEOIP
if settings["do_geoip_rankings"]: if settings["data-collection"]["get_visitor_location"]:
city_ranking, country_ranking = get_city_and_country_ranking(db, require_humans=settings["geoip_only_humans"]) country_ranking = get_country_ranking(db, whole_timespan_timestamps, only_human=settings["rankings"]["geoip_only_humans"])
pdebug("Country ranking:", country_ranking) pdebug("visualize: country ranking:", country_ranking, lvl=3)
pdebug("City ranking:", city_ranking) city_ranking = get_city_ranking(db, whole_timespan_timestamps, add_country_code=settings["rankings"]["city_add_country_code"], only_human=settings["rankings"]["geoip_only_humans"])
if gen_img: pdebug("visualize: city ranking:", city_ranking, lvl=3)
fig_referer_ranking = plot_ranking(country_ranking, xlabel="Country", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"]) if img_out_dir:
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_countries{suffix}']}", bbox_inches="tight") fig_referer_ranking = plot_ranking(country_ranking, xlabel="Country", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot-generation"]["size_broad"])
savefig(f"ranking_country_{suffix}", fig_referer_ranking)
fig_referer_ranking = plot_ranking(city_ranking, xlabel="City", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"]) fig_referer_ranking = plot_ranking(city_ranking, xlabel="City", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot-generation"]["size_broad"])
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_cities{suffix}']}", bbox_inches="tight") savefig(f"ranking_city_{suffix}", fig_referer_ranking)
if data_out_dir:
export_ranking(f"ranking_country_{suffix}", "country", country_ranking)
# USER export_ranking(f"ranking_city_{suffix}", "city", city_ranking)
# visitor_agent_ranking = get_visitor_agent_ranking(cur, date_str)
# for the time span
unique_visitor_ids = get_unique_visitor_ids_for_date(db, date_constraint)
unique_visitor_ids_human = []
append_human_visitors(db, unique_visitor_ids, unique_visitor_ids_human)
# for each date
date_count = len(date_constraints)
unique_visitor_ids_dates: list[list[int]] = []
unique_request_ids_dates: list[list[int]] = []
unique_visitor_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
unique_request_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
for i in range(date_count):
date_constraint_ = date_constraints[i]
unique_visitor_ids_dates.append(get_unique_visitor_ids_for_date(db, date_constraint_))
unique_request_ids_dates.append(get_unique_request_ids_for_date(db, date_constraint_))
if get_humans:
# empty_list = []
# unique_visitor_ids_human_dates.append(empty_list)
append_human_visitors(db, unique_visitor_ids_dates[i], unique_visitor_ids_human_dates[i])
# unique_request_ids_human_dates.append(list())
for human in unique_visitor_ids_human_dates[i]:
append_unique_request_ids_for_date_and_visitor(db, date_constraint_, human, unique_request_ids_human_dates[i])
# print("\n\tuu", unique_visitor_ids_dates, "\n\tur",unique_request_ids_dates, "\n\tuuh", unique_visitor_ids_human_dates, "\n\turh", unique_request_ids_human_dates)
# pdebug("uui", unique_visitor_ids)
# pdebug("uuih", unique_visitor_ids_human)
# pdebug("uuid", unique_visitor_ids_dates)
# pdebug("uuidh", unique_visitor_ids_human_dates)
# pdebug("urid", unique_request_ids_dates)
# pdebug("uridh", unique_visitor_ids_human_dates)
# pdebug(f"human_visitor_precentage: len_list_list(visitor_ids)={len_list_list(unique_visitor_ids_dates)}, len_list_list(visitor_ids_human)={len_list_list(unique_visitor_ids_human_dates)}")
if get_humans:
try:
names[f"human_visitor_percentage{suffix}"] = round(100 * len_list_list(unique_visitor_ids_human_dates) / len_list_list(unique_visitor_ids_dates), 2)
except:
names[f"human_visitor_percentage{suffix}"] = -1.0
try:
names[f"human_request_percentage{suffix}"] = round(100 * len_list_list(unique_request_ids_human_dates) / len_list_list(unique_request_ids_dates), 2)
except:
names[f"human_request_percentage{suffix}"] = -1.0
names[f"visitor_count{suffix}"] = len_list_list(unique_visitor_ids_dates)
names[f"request_count{suffix}"] = len_list_list(unique_request_ids_dates)
if gen_img:
fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_dates], [len(request_ids) for request_ids in unique_request_ids_dates], xlabel="Date", ylabel1="Visitor count", label1="Unique visitors", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"], rotate_xlabel=-45, figsize=settings["plot_size_broad"])
if get_humans:
fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_human_dates], [len(request_ids) for request_ids in unique_request_ids_human_dates], label1="Unique visitors (human)", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots, rotate_xlabel=-45, figsize=settings["plot_size_broad"])
fig_daily.savefig(f"{img_dir}/{names[f'img_visitors_and_requests{suffix}']}", bbox_inches="tight")
# os & browser # os & browser
platform_ranking, browser_ranking, names[f"mobile_visitor_percentage{suffix}"] = get_platform_browser_mobile_rankings(db, unique_visitor_ids_human) browser_ranking = get_browser_ranking(db, whole_timespan_timestamps, only_human=False)
if gen_img: browser_ranking = make_ranking_relative(browser_ranking)
fig_os_rating = plot_ranking(platform_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_platforms, figsize=settings["plot_size_narrow"]) pdebug("visualize: browser ranking:", browser_ranking, lvl=3)
fig_os_rating.savefig(f"{img_dir}/{names[f'img_platform_ranking{suffix}']}", bbox_inches="tight") platform_ranking = get_platform_ranking(db, whole_timespan_timestamps, only_human=False)
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browser", ylabel="Share [%]", color_settings=color_settings_browsers, figsize=settings["plot_size_narrow"]) platform_ranking = make_ranking_relative(platform_ranking)
fig_browser_rating.savefig(f"{img_dir}/{names[f'img_browser_ranking{suffix}']}", bbox_inches="tight") pdebug("visualize: platform ranking:", platform_ranking, lvl=3)
if img_out_dir:
fig_os_rating = plot_ranking(platform_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_platforms, figsize=settings["plot-generation"]["size_narrow"])
savefig(f"ranking_platform_{suffix}", fig_os_rating)
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browser", ylabel="Share [%]", color_settings=color_settings_browsers, figsize=settings["plot-generation"]["size_narrow"])
savefig(f"ranking_browser_{suffix}", fig_browser_rating)
if data_out_dir:
export_ranking(f"ranking_platform_{suffix}", "platform", platform_ranking)
export_ranking(f"ranking_browser_{suffix}", "browser", browser_ranking)
# print("OS ranking", os_ranking)
# print("Browser ranking", browser_ranking)
# print("Mobile percentage", names["mobile_visitor_percentage"])
if settings["template_html"] and settings["html_out_path"]:
pdebug(f"visualize: writing to html: {settings['html_out_path']}")
with open(settings["template_html"], "r") as file: html_variables_str = dict_str(html_variables).replace('\n', '\n\t')
pdebug(f"visualize: html_variables:\n\t{html_variables_str}", lvl=2)
template_html: str|None = settings["html-generation"]["template_html"]
html_out_path: str|None = settings["html-generation"]["html_out_path"]
if template_html and html_out_path:
pdebug(f"visualize: generating from template '{template_html}' to '{html_out_path}'", lvl=2)
if not path.isfile(template_html):
error(f"Invalid template file path: '{template_html}'")
with open(template_html, "r") as file:
html = file.read() html = file.read()
for name, value in names.items(): for name, value in html_variables.items():
if "img" in name: if "img" in name:
value = f"{img_location}/{value}" value = f"{img_location}/{value}"
if type(value) == float: elif type(value) == float:
value = f"{value:.2f}" value = f"{value:.2f}"
html = html.replace(f"%{name}", str(value)) html = html.replace(f"%{name}", str(value))
with open(settings["html_out_path"], "w") as file: make_parent_dirs(html_out_path)
with open(html_out_path, "w") as file:
file.write(html) file.write(html)
else: else:
warning(f"Skipping html generation because either template_html or html_out_path is invalid: template_html='{settings['template_html']}', html_out_path='{settings['html_out_path']}'") pdebug(f"visualize: skipping html generation because either template_html or html_out_path is None: template_html='{template_html}', html_out_path='{html_out_path}'", lvl=1)

View File

@ -48,43 +48,44 @@
<body> <body>
<h1>Analytics for %server_name</h1> <h1>Analytics for %server_name</h1>
<div class=box> <div class=box>
<center> <div style="text-align: center">
<h2>Last %last_x_days days</h2> <h2>Last %last_x_days days</h2>
<hr> <hr>
<h3>Visitor and request count (per month)</h3> <h3>Visitor and request count (per month)</h3>
<img src="%img_visitors_and_requests_last_x_days" alt="Daily Statistics", title="Visitor and request count for the last %last_x_days days"> <img src="%img_history_visitor_request_last_x_days" alt="Daily Statistics", title="Visitor and request count for the last %last_x_days days">
<ul> <ul>
<li>visitor count: <b>%visitor_count_last_x_days</b>, from which <b>%human_visitor_percentage_last_x_days%</b> are human</li> <li>visitor count: <b>%visitor_count_last_x_days</b>, from which <b>%visitor_count_human_last_x_days</b> (<b>%human_visitor_percentage_last_x_days%</b>) are human</li>
<li>request count: <b>%request_count_last_x_days</b>, from which <b>%human_request_percentage_last_x_days%</b> came from human visitors </li> <li>request count: <b>%request_count_last_x_days</b>, from which <b>%request_count_human_last_x_days</b> (<b>%human_request_percentage_last_x_days%</b>) came from human visitors </li>
</ul> </ul>
<hr> <hr>
<h3>File access</h3> <h3>File access</h3>
<img src="%img_file_ranking_last_x_days" alt="File ranking for the last %last_x_days days", title="File ranking for the last %last_x_days days"> <img src="%img_ranking_route_last_x_days" alt="File ranking for the last %last_x_days days", title="File ranking for the last %last_x_days days">
<hr> <hr>
<h3>Platforms and browsers</h3> <h3>Platforms and browsers</h3>
<img class="small" src="%img_operating_system_ranking_last_x_days" alt="Operating system ranking for the last %last_x_days days", title="Operating system ranking for the last %last_x_days days"> <img class="small" src="%img_ranking_platform_last_x_days" alt="Operating system ranking for the last %last_x_days days", title="Operating system ranking for the last %last_x_days days">
<img class="small" src="%img_browser_ranking_last_x_days" alt="Browser ranking for the last %last_x_days days", title="Browser ranking for the last %last_x_days days"> <img class="small" src="%img_ranking_browser_last_x_days" alt="Browser ranking for the last %last_x_days days", title="Browser ranking for the last %last_x_days days">
<h4>Mobile visitors: %mobile_visitor_percentage_last_x_days%</h4> <h4>Mobile visitors: %mobile_visitor_percentage_last_x_days%</h4>
<hr> <hr>
<h3>Referrers</h3> <h3>Referrers</h3>
<img src="%img_referer_ranking_last_x_days" alt="Referer ranking for the last %last_x_days days", title="Referer ranking for the last %last_x_days days"> <img src="%img_ranking_referer_last_x_days" alt="Referer ranking for the last %last_x_days days", title="Referer ranking for the last %last_x_days days">
<hr> <hr>
<!-- <h3>GeoIP</h3> --> <h3>GeoIP</h3>
<!-- <img src="%img_countries_last_x_days" alt="Country ranking for the last %last_x_days days", title="Country ranking for the last %last_x_days days"> --> <img src="%img_ranking_country_last_x_days" alt="Country ranking for the last %last_x_days days", title="Country ranking for the last %last_x_days days">
<!-- <img src="%img_cities_last_x_days" alt="City ranking for the last %last_x_days days", title="City ranking for the last %last_x_days days"> --> <img src="%img_ranking_city_last_x_days" alt="City ranking for the last %last_x_days days", title="City ranking for the last %last_x_days days">
<!-- <hr> --> <hr>
</center>
</div> </div>
</div>
<div class=box> <div class=box>
<center> <div style="text-align: center">
<h2>Total (since %earliest_date)</h2> <h2>Total (since %earliest_date)</h2>
<hr> <hr>
<h3>Visitor and request count (per month)</h3> <h3>Visitor and request count (per month)</h3>
<img src="%img_visitors_and_requests_total" alt="Monthly Statistics", title="Visitor and request count"> <img src="%img_history_visitor_request_total" alt="Monthly Statistics", title="Visitor and request history">
<ul> <ul>
<li>Total visitor count: <b>%visitor_count_total</b>, from which <b>%human_visitor_percentage_total%</b> are human</li> <li>Total visitor count: <b>%visitor_count_total</b>, from which <b>%human_visitor_percentage_total%</b> are human</li>
<li>Total request count: <b>%request_count_total</b>, from which <b>%human_request_percentage_total%</b> came from human visitors </li> <li>Total request count: <b>%request_count_total</b>, from which <b>%human_request_percentage_total%</b> came from human visitors </li>
@ -92,24 +93,24 @@
<hr> <hr>
<h3>File access</h3> <h3>File access</h3>
<img src="%img_file_ranking_total" alt="File ranking", title="File ranking"> <img src="%img_ranking_route_total" alt="File ranking", title="File ranking">
<hr> <hr>
<h3>Platforms and browsers</h3> <h3>Platforms and browsers</h3>
<img class="small" src="%img_operating_system_ranking_total" alt="Operating system ranking", title="Operating system ranking"> <img class="small" src="%img_ranking_platform_total" alt="Operating system ranking", title="Operating system ranking">
<img class="small" src="%img_browser_ranking_total" alt="Browser ranking", title="Browser ranking"> <img class="small" src="%img_ranking_browser_total" alt="Browser ranking", title="Browser ranking">
<h4>Mobile visitors: %mobile_visitor_percentage_total%</h4> <h4>Mobile visitors: %mobile_visitor_percentage_total%</h4>
<hr> <hr>
<h3>Referrers</h3> <h3>Referrers</h3>
<img src="%img_referer_ranking_total" alt="Referer ranking", title="Referer ranking"> <img src="%img_ranking_referer_total" alt="Referer ranking", title="Referer ranking">
<hr> <hr>
<!-- <h3>GeoIP</h3> --> <h3>GeoIP</h3>
<!-- <img src="%img_countries_total" alt="Country ranking", title="Country ranking"> --> <img src="%img_ranking_country_total" alt="Country ranking", title="Country ranking">
<!-- <img src="%img_cities_total" alt="City ranking", title="City ranking"> --> <img src="%img_ranking_city_total" alt="City ranking", title="City ranking">
<!-- <hr> --> <hr>
</center> </div>
</div> </div>
<p>These analytics were generated by <a href="https://git.quintern.xyz/MatthiasQuintern/regina">regina %regina_version</a> at %generation_date</p> <p>These analytics were generated by <a href="https://git.quintern.xyz/MatthiasQuintern/regina">regina %regina_version</a> at %generation_date</p>
<!-- Uncomment if you use IP2Location database --> <!-- Uncomment if you use IP2Location database -->