refactored visualization
rankings and statistics now use more sql features for better performance added data export changed html variable names
This commit is contained in:
parent
cf1294882b
commit
4a97335b96
53
regina/data_visualization/history.py
Normal file
53
regina/data_visualization/history.py
Normal file
@ -0,0 +1,53 @@
|
||||
from regina.database import Database
|
||||
|
||||
def get_visitor_count_between(db: Database, timestamps: tuple[int, int], only_human=False):
|
||||
return db(f"""SELECT COUNT(visitor_id)
|
||||
FROM visitor AS v
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM request as r
|
||||
WHERE r.visitor_id = v.visitor_id
|
||||
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
)
|
||||
{'AND v.is_human = 1' if only_human else ''}""")[0][0]
|
||||
|
||||
def get_request_count_between(db: Database, timestamps: tuple[int, int], only_human=False):
|
||||
return db(f"""SELECT COUNT(r.request_id)
|
||||
FROM request AS r, visitor AS v
|
||||
WHERE r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
{'AND v.is_human = 1' if only_human else ''}""")[0][0]
|
||||
|
||||
|
||||
def get_new_visitor_count_between(db: Database, timestamps: tuple[int, int]):
|
||||
return db(f"""SELECT COUNT(*)
|
||||
FROM visitor AS v
|
||||
JOIN (
|
||||
SELECT visitor_id, MIN(time) AS first_request_time
|
||||
FROM request
|
||||
GROUP BY visitor_id
|
||||
) AS r ON v.visitor_id = r.visitor_id
|
||||
WHERE r.first_request_time BETWEEN {timestamps[0]} AND {timestamps[1]}""")[0][0]
|
||||
|
||||
def get_request_from_new_visitor_count_between(db: Database, timestamps: tuple[int, int]):
|
||||
return db(f"""SELECT COUNT(*)
|
||||
FROM request AS r
|
||||
JOIN (
|
||||
SELECT visitor_id, MIN(time) AS first_request_time
|
||||
FROM request
|
||||
GROUP BY visitor_id
|
||||
) AS v ON r.visitor_id = v.visitor_id
|
||||
WHERE v.first_request_time BETWEEN {timestamps[0]} AND {timestamps[1]}""")[0][0]
|
||||
|
||||
|
||||
def get_mobile_visitor_count_between(db: Database, timestamps: tuple[int, int], only_human=True) -> float:
|
||||
return db(f"""SELECT COUNT(*)
|
||||
FROM visitor AS v
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM request as r
|
||||
WHERE r.visitor_id = v.visitor_id
|
||||
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
)
|
||||
{'AND v.is_human = 1' if only_human else ''}
|
||||
AND v.is_mobile = 1""")[0][0]
|
||||
|
@ -2,54 +2,68 @@ from re import fullmatch
|
||||
|
||||
from regina.database import Database
|
||||
from regina.utility.globals import settings
|
||||
from regina.utility.utility import pdebug, warning, missing_arg, is_blacklisted, is_whitelisted
|
||||
from regina.utility.utility import pdebug, warning, is_blacklisted, is_whitelisted
|
||||
from regina.utility.sql_util import sanitize
|
||||
from regina.data_visualization.utility import is_valid_status, cleanup_referer
|
||||
|
||||
|
||||
def get_route_ranking(db: Database, date_condition:str) -> list[tuple[int, str]]:
|
||||
def get_route_ranking(db: Database, timestamps: tuple[int, int]) -> list[tuple[int, str]]:
|
||||
"""
|
||||
:returns [(request_count, route name)]
|
||||
"""
|
||||
ranking = []
|
||||
for (route_id, name) in db(f"SELECT route_id, name FROM route"):
|
||||
if is_blacklisted(name, settings["route_ranking_blacklist"]): continue
|
||||
if not is_whitelisted(name, settings["route_ranking_whitelist"]): continue
|
||||
if settings["route_ranking_ignore_404"]: # use only succesful routes
|
||||
if is_blacklisted(name, settings["rankings"]["route_blacklist"]): continue
|
||||
if not is_whitelisted(name, settings["rankings"]["route_whitelist"]): continue
|
||||
if settings["rankings"]["route_ignore_404"]: # use only succesful routes
|
||||
success = False
|
||||
for (status) in db(f"SELECT status FROM request WHERE route_id = {route_id}"):
|
||||
for (status, ) in db(f"SELECT status FROM request WHERE route_id = {route_id}"):
|
||||
if is_valid_status(status):
|
||||
pdebug(f"get_route_ranking: success code {status} for route with route_id {route_id} and name {name}")
|
||||
pdebug(f"get_route_ranking: success code {status} for route with route_id {route_id} and name {name}", lvl=4)
|
||||
success = True
|
||||
break
|
||||
if not success:
|
||||
pdebug(f"get_route_ranking: route with route_id {route_id} and name {name} has only requests resulting in error")
|
||||
pdebug(f"get_route_ranking: route with route_id {route_id} and name {name} has only requests resulting in error", lvl=3)
|
||||
continue
|
||||
db.execute(f"SELECT COUNT(*) FROM request WHERE route_id = {route_id} AND {date_condition}")
|
||||
db.execute(f"SELECT COUNT(*) FROM request WHERE route_id = {route_id} AND time BETWEEN {timestamps[0]} AND {timestamps[1]}")
|
||||
ranking.append((db.fetchone()[0], name))
|
||||
ranking.sort()
|
||||
return ranking
|
||||
|
||||
|
||||
def get_ranking(db: Database, table: str, field_name: str, date_condition:str, whitelist_regex: str|list[str]|None=None, blacklist_regex: str|list[str]|None=None) -> list[tuple[int, str]]:
|
||||
def route_ranking_group_routes(route_ranking: list[tuple[int, str]]):
|
||||
"""
|
||||
1) get all the distinct entries for field_name after min_date_unix_time
|
||||
2) call get_name_function with the distinct entry
|
||||
3) skip if not fully matching regex whitelist
|
||||
4) skip if fully matching regex blacklist
|
||||
5) for every entry, get the count in table after min_date_unix_time
|
||||
6) sort by count in ascending order
|
||||
@returns [(count, name)]
|
||||
group the routes in the route ranking according the groups defined in the config section "route-groups"
|
||||
"""
|
||||
ranking = {}
|
||||
for count, route in route_ranking:
|
||||
ingroup = False
|
||||
for group_name, group_regexp in settings["route-groups"].items():
|
||||
if fullmatch(group_regexp, route):
|
||||
if group_name in ranking:
|
||||
ranking[group_name] += count
|
||||
else:
|
||||
ranking[group_name] = count
|
||||
ingroup = True
|
||||
if not ingroup:
|
||||
ranking[route] = count
|
||||
ranking = [ (c, name) for name, c in ranking.items() ]
|
||||
ranking.sort()
|
||||
return ranking
|
||||
|
||||
|
||||
def get_referer_ranking(db: Database, timestamps: tuple[int, int]) -> list[tuple[int, str]]:
|
||||
"""
|
||||
@returns [(count, referer)]
|
||||
"""
|
||||
ranking = []
|
||||
for (name) in db(f"SELECT DISTINCT {field_name} FROM {table} WHERE {date_condition}"):
|
||||
if is_blacklisted(name, blacklist_regex): continue
|
||||
if not is_whitelisted(name, whitelist_regex): continue
|
||||
db.execute(f"SELECT COUNT(*) FROM {table} WHERE {field_name} = '{name}' AND {date_condition}")
|
||||
for referer_id, name in db(f"SELECT referer_id, name FROM referer"):
|
||||
if is_blacklisted(name, settings["rankings"]["referer_blacklist"]): continue
|
||||
if not is_whitelisted(name, settings["rankings"]["referer_whitelist"]): continue
|
||||
db.execute(f"SELECT COUNT(*) FROM request WHERE referer_id = {referer_id} AND time BETWEEN {timestamps[0]} AND {timestamps[1]}")
|
||||
ranking.append((db.fetchone()[0], name))
|
||||
ranking.sort()
|
||||
return ranking
|
||||
|
||||
|
||||
def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]):
|
||||
unique_referers = dict()
|
||||
for count, referer in referer_ranking:
|
||||
@ -64,88 +78,129 @@ def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]):
|
||||
referer_ranking.sort()
|
||||
|
||||
|
||||
def get_city_and_country_ranking(db: Database, require_humans=True):
|
||||
def get_city_ranking(db: Database, timestamps: tuple[int, int], add_country_code=True, only_human=True):
|
||||
"""
|
||||
@returns [(count, "city (CO)")], [(count, country)]
|
||||
@returns [(count, city (Country Code))]
|
||||
"""
|
||||
cities_dict = {}
|
||||
country_dict = {}
|
||||
|
||||
sql_cmd = f"SELECT ci.name, co.code, co.name FROM country AS co, city as ci, visitor as v, ip_range as i WHERE v.ip_range_id = i.ip_range_id AND i.city_id = ci.city_id AND ci.country_id = co.country_id"
|
||||
if require_humans: sql_cmd += " AND v.is_human = 1"
|
||||
result = db(sql_cmd)
|
||||
|
||||
for (city, country_code, country) in result:
|
||||
if city in cities_dict:
|
||||
cities_dict[city][0] += 1
|
||||
else:
|
||||
if is_blacklisted(city, settings["city_ranking_blacklist"]): continue
|
||||
if not is_whitelisted(city, settings["city_ranking_whitelist"]): continue
|
||||
cities_dict[city] = [1, country_code, country] # count, country code
|
||||
|
||||
if country in country_dict:
|
||||
country_dict[country] += 1
|
||||
else:
|
||||
if is_blacklisted(country, settings["country_ranking_blacklist"]): continue
|
||||
if not is_whitelisted(country, settings["country_ranking_whitelist"]): continue
|
||||
country_dict[country] = 1 # count, country code
|
||||
|
||||
city_ranking = [(v[0], f"{city} ({v[1]})") for city,v in cities_dict.items()]
|
||||
city_ranking.sort()
|
||||
country_ranking = [(count, country) for country,count in country_dict.items()]
|
||||
country_ranking.sort()
|
||||
return city_ranking, country_ranking
|
||||
ranking = []
|
||||
results = db(f"""SELECT co.code, ci.name,COUNT(v.visitor_id)
|
||||
FROM country as co, city as ci, visitor as v, ip_range as i
|
||||
WHERE ci.city_id = i.city_id
|
||||
AND co.country_id = ci.country_id
|
||||
AND i.ip_range_id = v.ip_range_id
|
||||
AND EXISTS(
|
||||
SELECT 1
|
||||
FROM request AS r
|
||||
WHERE r.visitor_id = v.visitor_id
|
||||
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
)
|
||||
{'AND v.is_human = 1' if only_human else ''}
|
||||
GROUP BY ci.name
|
||||
ORDER BY COUNT(v.visitor_id)
|
||||
""")
|
||||
for code, name, count in results:
|
||||
if is_blacklisted(name, settings["rankings"]["city_blacklist"]): continue
|
||||
if not is_whitelisted(name, settings["rankings"]["city_whitelist"]): continue
|
||||
if add_country_code:
|
||||
name = f"{name} ({code})"
|
||||
ranking.append((count, name))
|
||||
# for (city_id, name) in db(f"SELECT city_id, name FROM city"):
|
||||
# if is_blacklisted(name, settings["rankings"]["city_blacklist"]): continue
|
||||
# if not is_whitelisted(name, settings["rankings"]["city_whitelist"]): continue
|
||||
# db.execute(f"""SELECT COUNT(v.visitor_id)
|
||||
# FROM visitor AS v, ip_range AS i
|
||||
# WHERE i.city_id = {city_id}
|
||||
# AND i.ip_range_id = v.ip_range_id
|
||||
# AND EXISTS(
|
||||
# SELECT 1
|
||||
# FROM request AS r
|
||||
# WHERE r.visitor_id = v.visitor_id
|
||||
# AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
# )
|
||||
# {'AND v.is_human = 1' if only_human else ''}""")
|
||||
# ranking.append((db.fetchone()[0], name))
|
||||
ranking.sort()
|
||||
return ranking
|
||||
|
||||
|
||||
def get_platform_browser_mobile_rankings(db: Database, visitor_ids: list[int]) -> tuple[list[tuple[int, str]], list[tuple[int, str]], float]:
|
||||
def get_country_ranking(db: Database, timestamps: tuple[int, int], only_human=True):
|
||||
"""
|
||||
returns [(count, operating_system)], [(count, browser)], mobile_visitor_percentage
|
||||
@returns [(count, country)]
|
||||
"""
|
||||
platform_ranking = {}
|
||||
platform_count = 0.0
|
||||
browser_ranking = {}
|
||||
browser_count = 0.0
|
||||
mobile_ranking = { True: 0.0, False: 0.0 }
|
||||
for visitor_id in visitor_ids:
|
||||
platform_id, browser_id, is_mobile = db(f"SELECT platform_id, browser_id, is_mobile FROM visitor WHERE visitor_id = {visitor_id}")[0]
|
||||
is_mobile = bool(is_mobile)
|
||||
if platform_id:
|
||||
if platform_id in platform_ranking: platform_ranking[platform_id] += 1
|
||||
else: platform_ranking[platform_id] = 1
|
||||
platform_count += 1
|
||||
if browser_id:
|
||||
if browser_id in browser_ranking: browser_ranking[browser_id] += 1
|
||||
else: browser_ranking[browser_id] = 1
|
||||
browser_count += 1
|
||||
if (platform_id or browser_id):
|
||||
mobile_ranking[is_mobile] += 1
|
||||
try:
|
||||
mobile_visitor_percentage = mobile_ranking[True] / (mobile_ranking[True] + mobile_ranking[False])
|
||||
except ZeroDivisionError:
|
||||
mobile_visitor_percentage = 0.0
|
||||
|
||||
platform_ranking = [(c * 100/platform_count, db.get_name("platform", p_id)) for p_id, c in platform_ranking.items()]
|
||||
platform_ranking.sort()
|
||||
browser_ranking = [(c * 100/browser_count, db.get_name("browser", b_id)) for b_id, c in browser_ranking.items()]
|
||||
browser_ranking.sort()
|
||||
return platform_ranking, browser_ranking, mobile_visitor_percentage*100
|
||||
ranking = []
|
||||
# for (country_id, name) in db(f"SELECT country_id, name FROM country"):
|
||||
# if is_blacklisted(name, settings["rankings"]["country_blacklist"]): continue
|
||||
# if not is_whitelisted(name, settings["rankings"]["country_whitelist"]): continue
|
||||
# db.execute(f"""SELECT COUNT(v.visitor_id)
|
||||
# FROM visitor AS v, ip_range AS i, city AS ci
|
||||
# WHERE ci.country_id = {country_id}
|
||||
# AND ci.city_id = i.city_id
|
||||
# AND i.ip_range_id = v.ip_range_id
|
||||
# AND EXISTS(
|
||||
# SELECT 1
|
||||
# FROM request AS r
|
||||
# WHERE r.visitor_id = v.visitor_id
|
||||
# AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
# )
|
||||
# {'AND v.is_human = 1' if only_human else ''}""")
|
||||
# ranking.append((db.fetchone()[0], name))
|
||||
results = db(f"""SELECT co.name,COUNT(v.visitor_id)
|
||||
FROM country as co, city as ci, visitor as v, ip_range as i
|
||||
WHERE co.country_id = ci.country_id
|
||||
AND ci.city_id = i.city_id
|
||||
AND i.ip_range_id = v.ip_range_id
|
||||
AND EXISTS(
|
||||
SELECT 1
|
||||
FROM request AS r
|
||||
WHERE r.visitor_id = v.visitor_id
|
||||
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
)
|
||||
{'AND v.is_human = 1' if only_human else ''}
|
||||
GROUP BY co.name
|
||||
ORDER BY COUNT(v.visitor_id)
|
||||
""")
|
||||
for name, count in results:
|
||||
if is_blacklisted(name, settings["rankings"]["country_blacklist"]): continue
|
||||
if not is_whitelisted(name, settings["rankings"]["country_whitelist"]): continue
|
||||
ranking.append((count, name))
|
||||
ranking.sort()
|
||||
return ranking
|
||||
|
||||
|
||||
# Store ranking in results class and dump with pickle
|
||||
# class Results:
|
||||
# def __init__(self, timespan_name,
|
||||
# r_routes: list[tuple[int, str]],
|
||||
# r_referrers: list[tuple[int, str]],
|
||||
# r_platforms: list[tuple[int, str]],
|
||||
# r_browsers: list[tuple[int, str]],
|
||||
# r_cities: list[tuple[int, str]],
|
||||
# r_countries: list[tuple[int, str]],
|
||||
# ):
|
||||
# self.r_routes = r_routes
|
||||
# self.r_referrers= r_referrers
|
||||
# self.r_platforms= r_platforms
|
||||
# self.r_browsers = r_browsers
|
||||
# self.r_cities = r_cities
|
||||
# self.r_countries= r_countries
|
||||
def _get_platform_or_browser_ranking(db: Database, timestamps: tuple[int, int], table: str, only_human=False):
|
||||
ranking = []
|
||||
for (table_id, name) in db(f"SELECT {table}_id, name FROM {table}"):
|
||||
# if is_blacklisted(name, settings["rankings"][f"{table}_blacklist"]): continue
|
||||
# if not is_whitelisted(name, settings["rankings"][f"{table}_whitelist"]): continue
|
||||
if name == "None": continue
|
||||
db.execute(f"""SELECT COUNT(v.visitor_id)
|
||||
FROM visitor AS v, {table} AS t
|
||||
WHERE v.{table}_id = {table_id}
|
||||
AND EXISTS(
|
||||
SELECT 1
|
||||
FROM request AS r
|
||||
WHERE r.visitor_id = v.visitor_id
|
||||
AND r.time BETWEEN {timestamps[0]} AND {timestamps[1]}
|
||||
)
|
||||
{'AND v.is_human = 1' if only_human else ''}""")
|
||||
ranking.append((db.fetchone()[0], name))
|
||||
ranking.sort()
|
||||
return ranking
|
||||
|
||||
def get_platform_ranking(db: Database, timestamps: tuple[int, int], only_human=False):
|
||||
return _get_platform_or_browser_ranking(db, timestamps, "platform", only_human=only_human)
|
||||
|
||||
def get_browser_ranking(db: Database, timestamps: tuple[int, int], only_human=False):
|
||||
return _get_platform_or_browser_ranking(db, timestamps, "browser", only_human=only_human)
|
||||
|
||||
|
||||
def make_ranking_relative(ranking: list[tuple[int, str]]) -> list[tuple[float, str]]:
|
||||
total_count = sum([ c for c, _ in ranking ])
|
||||
if total_count == 0:
|
||||
warning(f"make_ranking_relative: Can not make ranking relative, total_count is 0")
|
||||
return [ (float(c), name) for c, name in ranking ]
|
||||
rel_ranking = [ (100.0*c/total_count, name) for c, name in ranking ]
|
||||
return rel_ranking
|
||||
|
||||
|
||||
|
||||
|
@ -2,17 +2,21 @@ from re import fullmatch
|
||||
|
||||
from regina.database import Database
|
||||
from regina.utility.globals import settings
|
||||
from regina.utility.utility import pdebug, warning, missing_arg
|
||||
from regina.utility.utility import pdebug, warning
|
||||
from regina.utility.sql_util import sanitize, sql_tablesize
|
||||
|
||||
# re_uri_protocol = f"(https?)://"
|
||||
re_uri_protocol = f"(https?://)?"
|
||||
re_uri_ipv4 = r"(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?)"
|
||||
re_uri_ipv4 = r"(?:\d{1,3}\.?){4}"
|
||||
# re_uri_ipv6 = ""
|
||||
re_uri_domain = r"(?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})"
|
||||
re_uri_route = r"(?:/(.*))?"
|
||||
re_uri_full = f"{re_uri_protocol}({re_uri_domain}|{re_uri_ipv4})({re_uri_route})"
|
||||
re_uri_domain = r"(?:[^/:]+)"
|
||||
re_uri_port = r"(?::\d+)?"
|
||||
re_uri_route = r"(?:/.*)?"
|
||||
re_uri_full = f"{re_uri_protocol}({re_uri_domain}|{re_uri_ipv4})({re_uri_port})({re_uri_route})"
|
||||
# (https?://)?((?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})|(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?))((?:/(.*))?)
|
||||
|
||||
re_domain = r"[^/:]+\.[a-z]{2,}"
|
||||
|
||||
def cleanup_referer(referer: str) -> str:
|
||||
"""
|
||||
split the referer uri into its parts and reassemeble them depending on settings
|
||||
@ -21,90 +25,40 @@ def cleanup_referer(referer: str) -> str:
|
||||
if not m:
|
||||
warning(f"cleanup_referer: Could not match referer '{referer}'")
|
||||
return referer
|
||||
# pdebug(f"cleanup_referer: {referer} - {m.groups()}")
|
||||
protocol = m.groups()[0]
|
||||
subdomains = m.groups()[2]
|
||||
if not subdomains: subdomains = ""
|
||||
domain = m.groups()[1].replace(subdomains, "")
|
||||
route = m.groups()[3]
|
||||
pdebug(f"cleanup_referer: {referer} - {m.groups()}", lvl=4)
|
||||
protocol, domain, port, route = m.groups()
|
||||
if not protocol: protocol = ""
|
||||
if not port: port = ""
|
||||
|
||||
if fullmatch(re_domain, domain): # no ip address
|
||||
parts = domain.split(".")
|
||||
if len(parts) < 2:
|
||||
warning(f"cleanup_referer: Domain has not enough parts: '{domain}'")
|
||||
tld = parts[-1]
|
||||
referer = parts[-2]
|
||||
subdomains = ""
|
||||
for sd in parts[:-2]:
|
||||
subdomains += f"{sd}."
|
||||
if not settings["rankings"]["referer_ignore_tld"]: referer += "." + tld
|
||||
if not settings["rankings"]["referer_ignore_subdomain"]: referer = subdomains + referer
|
||||
else:
|
||||
referer = domain
|
||||
if settings["referer_ranking_ignore_tld"]:
|
||||
if len(domain.split(".")) == 2: # if domain.tld
|
||||
referer = domain.split(".")[0]
|
||||
if not settings["referer_ranking_ignore_subdomain"]: referer = subdomains + referer
|
||||
if not settings["referer_ranking_ignore_protocol"]: referer = protocol + referer
|
||||
if not settings["referer_ranking_ignore_route"]: referer += route
|
||||
if not settings["rankings"]["referer_ignore_protocol"]: referer = protocol + referer
|
||||
if not settings["rankings"]["referer_ignore_port"]: referer += port
|
||||
if not settings["rankings"]["referer_ignore_route"]: referer += route
|
||||
# pdebug(f"cleanup_referer: cleaned up: {referer}")
|
||||
return referer
|
||||
|
||||
|
||||
|
||||
def get_where_date_str(at_date=None, min_date=None, max_date=None):
|
||||
"""
|
||||
get a condition string that sets a condition on the time
|
||||
"""
|
||||
# dates in unix time
|
||||
s = ""
|
||||
if at_date is not None:
|
||||
if isinstance(at_date, str):
|
||||
s += f"DATE(time, 'unixepoch') = '{sanitize(at_date)}' AND "
|
||||
elif isinstance(at_date, int|float):
|
||||
s += f"time = {int(at_date)} AND "
|
||||
else:
|
||||
print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
|
||||
if min_date is not None:
|
||||
if isinstance(min_date, str):
|
||||
s += f"DATE(time, 'unixepoch') >= '{sanitize(min_date)}' AND "
|
||||
elif isinstance(min_date, int|float):
|
||||
s += f"time >= {int(min_date)} AND "
|
||||
else:
|
||||
print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
|
||||
if max_date is not None:
|
||||
if isinstance(max_date, str):
|
||||
s += f"DATE(time, 'unixepoch') <= '{sanitize(max_date)}' AND "
|
||||
elif isinstance(max_date, int|float):
|
||||
s += f"time <= {int(max_date)} AND "
|
||||
else:
|
||||
print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
|
||||
if s == "":
|
||||
print(f"WARNING: get_where_date_str: no date_str generated. Returning 'time > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
|
||||
return "time > 0"
|
||||
return s.removesuffix(" AND ")
|
||||
|
||||
def is_valid_status(status: int):
|
||||
if status >= 400: return False
|
||||
if settings["status_300_is_success"] and status >= 300: return True
|
||||
if settings["data-collection"]["status_300_is_success"] and status >= 300: return True
|
||||
return status < 300
|
||||
|
||||
#
|
||||
# GETTERS
|
||||
#
|
||||
def get_unique_visitor_ids_for_date(db: Database, date:str) -> list[int]:
|
||||
return [ visitor_id[0] for visitor_id in db(f"SELECT DISTINCT visitor_id FROM request WHERE {date}") ]
|
||||
|
||||
def append_human_visitors(db: Database, unique_visitor_ids, unique_visitor_ids_human: list):
|
||||
"""
|
||||
for visitor in unique_visitor_ids:
|
||||
if human -> append to unique_visitor_ids_human
|
||||
"""
|
||||
for visitor_id in unique_visitor_ids:
|
||||
db.execute(f"SELECT is_human FROM visitor WHERE visitor_id = {visitor_id}")
|
||||
if db.fetchone()[0] == 1:
|
||||
unique_visitor_ids_human.append(visitor_id)
|
||||
def len_list_list(l: list[list]):
|
||||
size = 0
|
||||
for i in range(len(l)):
|
||||
size += len(l[i])
|
||||
return size
|
||||
|
||||
def get_unique_request_ids_for_date(db: Database, date_constraint:str):
|
||||
return [ request_id[0] for request_id in db(f"SELECT DISTINCT request_id FROM request WHERE {date_constraint}")]
|
||||
|
||||
def append_unique_request_ids_for_date_and_visitor(db: Database, date_constraint:str, visitor_id: int, unique_request_ids_human: list):
|
||||
"""append all unique requests for visitor_id at date_constraint to unique_request_ids_human"""
|
||||
for request_id in db(f"SELECT DISTINCT request_id FROM request WHERE {date_constraint} AND visitor_id = {visitor_id}"):
|
||||
unique_request_ids_human.append(request_id[0])
|
||||
|
||||
# get number of requests per day
|
||||
def get_request_count_for_date(db: Database, date_constraint:str) -> int:
|
||||
db.execute(f"SELECT COUNT(*) FROM request WHERE {date_constraint}")
|
||||
return db.fetchone()[0]
|
||||
|
||||
def get_unique_visitor_count(db: Database) -> int:
|
||||
return sql_tablesize(db.cur, "visitor")
|
||||
|
@ -1,20 +1,18 @@
|
||||
# from sys import path
|
||||
# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
|
||||
import sqlite3 as sql
|
||||
from sys import exit
|
||||
from re import fullmatch
|
||||
import matplotlib.pyplot as plt
|
||||
from os.path import isdir
|
||||
from pickle import dump
|
||||
from os import path, makedirs
|
||||
from datetime import datetime as dt
|
||||
|
||||
from numpy import empty
|
||||
# local
|
||||
from regina.database import Database
|
||||
from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where
|
||||
from regina.utility.utility import pdebug, warning, missing_arg
|
||||
from regina.utility.sql_util import get_date_constraint, sanitize
|
||||
from regina.utility.utility import pdebug, warning, error, make_parent_dirs, dict_str
|
||||
from regina.utility.globals import settings
|
||||
from regina.data_visualization.utility import cleanup_referer, get_where_date_str, get_unique_visitor_ids_for_date, get_unique_request_ids_for_date, append_human_visitors, append_unique_request_ids_for_date_and_visitor
|
||||
from regina.data_visualization.ranking import get_city_and_country_ranking, get_platform_browser_mobile_rankings, get_ranking, cleanup_referer_ranking, get_route_ranking
|
||||
from regina.data_visualization.utility import len_list_list
|
||||
from regina.data_visualization.ranking import get_referer_ranking, cleanup_referer_ranking, get_route_ranking, route_ranking_group_routes, get_browser_ranking, get_platform_ranking, get_city_ranking, get_country_ranking, make_ranking_relative
|
||||
import regina.data_visualization.history as h
|
||||
|
||||
"""
|
||||
visualize information from the databse
|
||||
@ -53,12 +51,14 @@ color_settings_platforms = {
|
||||
palette["blue"]: ["Windows"],
|
||||
}
|
||||
|
||||
|
||||
def len_list_list(l: list[list]):
|
||||
size = 0
|
||||
for i in range(len(l)):
|
||||
size += len(l[i])
|
||||
return size
|
||||
color_settings_history = {
|
||||
"visitors": "#000050",
|
||||
"visitors_human": "#3366ff",
|
||||
"visitors_new": "#66ccff",
|
||||
"requests": "#770000",
|
||||
"requests_human": "#ff3500",
|
||||
"requests_new": "#ff9999",
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
@ -86,18 +86,18 @@ def add_labels_at_top_of_bar(xdata, ydata, max_y_val, ax, bar_plot):
|
||||
for idx,rect in enumerate(bar_plot):
|
||||
ax.text(rect.get_x() + rect.get_width()/2, ydata[idx] - y_offset, round(ydata[idx], 1), ha='center', bbox=dict(facecolor='white', alpha=0.8))
|
||||
|
||||
def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[], figsize=None):
|
||||
def plot_ranking(ranking: list[tuple[int or float, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[], figsize=None):
|
||||
"""
|
||||
make a bar plot of the ranking
|
||||
"""
|
||||
# pdebug(f"plot_ranking: ranking={ranking}")
|
||||
if not fig:
|
||||
fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||
fig = plt.figure(figsize=figsize, dpi=settings["plot-generation"]["dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||
# create new axis if none is given
|
||||
ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
||||
# fill x y data
|
||||
if len(ranking) > settings["file_ranking_plot_max_files"]:
|
||||
start_index = len(ranking) - settings["file_ranking_plot_max_files"]
|
||||
if len(ranking) > settings["rankings"]["route_plot_max_routes"]:
|
||||
start_index = len(ranking) - settings["rankings"]["route_plot_max_routes"]
|
||||
else: start_index = 0
|
||||
x_names = []
|
||||
y_counts = []
|
||||
@ -120,14 +120,14 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
|
||||
|
||||
if len(y_counts) > 0:
|
||||
add_vertikal_labels_in_bar_plot(x_names, y_counts[-1], ax, bar)
|
||||
if settings["plot_add_count_label"]: add_labels_at_top_of_bar(x_names, y_counts, y_counts[-1], ax, bar)
|
||||
if settings["plot-generation"]["add_count_label"]: add_labels_at_top_of_bar(x_names, y_counts, y_counts[-1], ax, bar)
|
||||
# ax.ylabel(y_counts)
|
||||
return fig
|
||||
|
||||
|
||||
# def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue", rotate_xlabel=0):
|
||||
# if not fig:
|
||||
# fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||
# fig = plt.figure(figsize=None, dpi=settings["plot-generation"]["dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||
# if not ax:
|
||||
# ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
|
||||
# else:
|
||||
@ -139,29 +139,39 @@ def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="",
|
||||
# if label: ax.legend()
|
||||
# return fig, ax
|
||||
|
||||
def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major", rotate_xlabel=0, figsize=None):
|
||||
if not fig:
|
||||
fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||
if not (ax1 and ax2):
|
||||
ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
|
||||
ax2 = ax1.twinx()
|
||||
ax2.set_ylabel(ylabel2)
|
||||
ax1.tick_params(axis="x", rotation=90)
|
||||
plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
|
||||
plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
|
||||
|
||||
class Plot2Y:
|
||||
def __init__(self, xlabel, ylabel_left, ylabel_right, grid="major", rotate_xlabel=0, figsize=None):
|
||||
self.fig, self.ax1 = plt.subplots(figsize=figsize, dpi=settings["plot-generation"]["dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
|
||||
self.ax1.set_xlabel(xlabel=xlabel) #, ylabel=ylabel_left)
|
||||
self.ax1.set_ylabel(ylabel=ylabel_left) #, ylabel=ylabel_left)
|
||||
self.ax2 = self.ax1.twinx()
|
||||
self.ax2.set_ylabel(ylabel_right)
|
||||
self.ax1.tick_params(axis="x", rotation=90)
|
||||
self.plots = None
|
||||
if grid == "major" or grid == "minor" or grid == "both":
|
||||
if grid == "minor" or "both":
|
||||
self.ax1.minorticks_on()
|
||||
self.ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
|
||||
|
||||
def _plot(self, ax, xdata, ydata, label="", linestyle="-", marker="", color="blue"):
|
||||
plot = ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
|
||||
# ax1.set_xticks(ax1.get_xticks())
|
||||
# ax1.set_xticklabels(xdata, rotation=rotate_xlabel, rotation_mode="anchor")
|
||||
# if label1 or label2: ax1.legend()
|
||||
if plots: plots += plot1 + plot2
|
||||
else: plots = plot1 + plot2
|
||||
plt.legend(plots, [ l.get_label() for l in plots])
|
||||
if self.plots: self.plots += plot
|
||||
else: self.plots = plot
|
||||
plt.legend(self.plots, [ l.get_label() for l in self.plots ])
|
||||
|
||||
if grid == "major" or grid == "minor" or grid == "both":
|
||||
if grid == "minor" or "both":
|
||||
ax1.minorticks_on()
|
||||
ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
|
||||
|
||||
return fig, ax1, ax2, plots
|
||||
def plot_left(self, xdata, ydata, label="", linestyle="-", marker="", color="blue"):
|
||||
self._plot(self.ax1, xdata, ydata, label, linestyle, marker, color)
|
||||
|
||||
def plot_right(self, xdata, ydata, label="", linestyle="-", marker="", color="blue"):
|
||||
self._plot(self.ax2, xdata, ydata, label, linestyle, marker, color)
|
||||
|
||||
def get_fig(self):
|
||||
return self.fig
|
||||
|
||||
|
||||
#
|
||||
@ -172,194 +182,259 @@ def visualize(db: Database):
|
||||
This assumes sanity checks have been done
|
||||
"""
|
||||
pdebug("visualizing...")
|
||||
if not settings["db"]: missing_arg("db")
|
||||
if not settings["server_name"]: missing_arg("server_name")
|
||||
|
||||
img_dir = settings["img_dir"]
|
||||
pdebug("img_dir:", img_dir)
|
||||
img_filetype = settings["img_filetype"]
|
||||
if isdir(img_dir) and img_filetype:
|
||||
gen_img = True
|
||||
else:
|
||||
print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
|
||||
gen_img = False
|
||||
def make_dir_if_not_None(d):
|
||||
if d:
|
||||
if not path.isdir(d):
|
||||
makedirs(d)
|
||||
|
||||
img_location = settings["img_location"]
|
||||
names = {
|
||||
# paths
|
||||
"img_route_ranking_last_x_days": f"ranking_routes_last_x_days.{img_filetype}",
|
||||
"img_referer_ranking_last_x_days": f"ranking_referers_last_x_days.{img_filetype}",
|
||||
"img_countries_last_x_days": f"ranking_countries_last_x_days.{img_filetype}",
|
||||
"img_cities_last_x_days": f"ranking_cities_last_x_days.{img_filetype}",
|
||||
"img_browser_ranking_last_x_days": f"ranking_browsers_last_x_days.{img_filetype}",
|
||||
"img_platform_ranking_last_x_days": f"ranking_platforms_last_x_days.{img_filetype}",
|
||||
"img_visitors_and_requests_last_x_days": f"visitor_request_count_daily_last_x_days.{img_filetype}",
|
||||
# plot generation
|
||||
img_out_dir = settings["plot-generation"]["img_out_dir"]
|
||||
make_dir_if_not_None(img_out_dir)
|
||||
img_filetype = settings["plot-generation"]["filetype"]
|
||||
img_location = settings["html-generation"]["img_location"]
|
||||
pdebug(f"visualize: img_out_dir='{img_out_dir}', filetype='{img_filetype}', {img_location}='{img_location}'", lvl=2)
|
||||
if not img_out_dir:
|
||||
pdebug(f"visualize: Not generating images since img_out_dir is None", lvl=1)
|
||||
|
||||
"img_route_ranking_total": f"ranking_routes_total.{img_filetype}",
|
||||
"img_referer_ranking_total": f"ranking_referers_total.{img_filetype}",
|
||||
"img_countries_total": f"ranking_countries_total.{img_filetype}",
|
||||
"img_cities_total": f"ranking_cities_total.{img_filetype}",
|
||||
"img_browser_ranking_total": f"ranking_browsers_total.{img_filetype}",
|
||||
"img_platform_ranking_total": f"ranking_platforms_total.{img_filetype}",
|
||||
"img_visitors_and_requests_total": f"visitor_request_count_daily_total.{img_filetype}",
|
||||
# data export
|
||||
data_out_dir = settings["data-export"]["data_out_dir"]
|
||||
make_dir_if_not_None(data_out_dir)
|
||||
data_filetype = settings["data-export"]["filetype"]
|
||||
pdebug(f"visualize: data_out_dir='{data_out_dir}', filetype='{data_filetype}'", lvl=2)
|
||||
if not data_out_dir:
|
||||
pdebug(f"visualize: Not exporting data since data_out_dir is None", lvl=1)
|
||||
|
||||
if not data_out_dir and not img_out_dir:
|
||||
warning(f"data_out_dir and img_out_dir are both None. No data will be exported and no plots will be generated!")
|
||||
|
||||
html_variables = {
|
||||
# values
|
||||
"mobile_visitor_percentage_total": 0.0,
|
||||
"mobile_visitor_percentage_last_x_days": 0.0,
|
||||
"visitor_count_last_x_days": 0,
|
||||
"visitor_count_total": 0,
|
||||
"request_count_last_x_days": 0,
|
||||
"request_count_total": 0,
|
||||
"human_visitor_percentage_last_x_days": 0.0,
|
||||
"human_visitor_percentage_total": 0.0,
|
||||
"human_request_percentage_last_x_days": 0.0,
|
||||
"human_request_percentage_total": 0.0,
|
||||
"visitor_count_last_x_days": "NaN",
|
||||
"visitor_count_total": "NaN",
|
||||
"request_count_last_x_days": "NaN",
|
||||
"request_count_total": "NaN",
|
||||
"visitor_count_human_last_x_days": "NaN",
|
||||
"visitor_count_human_total": "NaN",
|
||||
"request_count_human_last_x_days": "NaN",
|
||||
"request_count_human_total": "NaN",
|
||||
"human_visitor_percentage_last_x_days": "NaN",
|
||||
"human_visitor_percentage_total": "NaN",
|
||||
"human_request_percentage_last_x_days": "NaN",
|
||||
"human_request_percentage_total": "NaN",
|
||||
"mobile_visitor_percentage_total": "NaN",
|
||||
"mobile_visitor_percentage_last_x_days": "NaN",
|
||||
# general
|
||||
"regina_version": settings["version"],
|
||||
"server_name": settings["server_name"],
|
||||
"last_x_days": settings["last_x_days"], # must be after all the things with last_x_days!
|
||||
"regina_version": settings["regina"]["version"],
|
||||
"server_name": settings["regina"]["server_name"],
|
||||
"last_x_days": settings["data-visualization"]["last_x_days"],
|
||||
"earliest_date": "1990-1-1",
|
||||
"generation_date": "1990-1-1 0:0:0",
|
||||
}
|
||||
|
||||
db = Database(database_path=settings["db"])
|
||||
for suffix in ["last_x_days", "total"]:
|
||||
# add all plot paths as variables: img_plot_suffix -> plot_suffix.filetype
|
||||
# not adding img_location or img_out_dir since these names are needed for both
|
||||
html_variables.update((f"img_{plot_}_{suffix}", f"{plot_}_{suffix}.{img_filetype}") for plot_ in ["ranking_platform", "ranking_browser", "ranking_country", "ranking_city", "ranking_referer", "ranking_route", "history_visitor_request"])
|
||||
|
||||
get_humans_visitors = settings["data-visualization"]["history_track_human_visitors"]
|
||||
get_new_visitors = settings["data-visualization"]["history_track_new_visitors"]
|
||||
|
||||
get_humans = settings["get_human_percentage"]
|
||||
# pdebug(f"visualize: settings {settings}")
|
||||
# DATE STRINGS
|
||||
earliest_date = db.get_earliest_date()
|
||||
names["earliest_date"] = dt.fromtimestamp(earliest_date).strftime("%Y-%m-%d")
|
||||
names["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
# LAST_X_DAYS
|
||||
# last_x_days_min_date: latest_date - last_x_days
|
||||
secs_per_day = 86400
|
||||
last_x_days_min_date = db.get_latest_date() - settings["last_x_days"] * secs_per_day
|
||||
last_x_days_constraint = get_where_date_str(min_date=last_x_days_min_date)
|
||||
last_x_days = db.get_days_where(last_x_days_constraint)
|
||||
last_x_days_contraints = [get_where_date_str(at_date=day) for day in last_x_days]
|
||||
earliest_timestamp = db.get_earliest_timestamp()
|
||||
html_variables["earliest_date"] = dt.fromtimestamp(earliest_timestamp).strftime("%Y-%m-%d")
|
||||
html_variables["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# ALL DATES
|
||||
all_time_constraint = get_where_date_str(min_date=0)
|
||||
todos: list[tuple[str, tuple[int, int], list[str], list[str], list[tuple[int, int]]]] = [] # suffix, whole_time_timestamps, history_date_constraints, history_date_names, history_date_timestamps
|
||||
|
||||
now_stamp = int(dt.now().timestamp())
|
||||
total: bool = settings["data-visualization"]["total"]
|
||||
if total:
|
||||
all_time_timestamps = (0, now_stamp)
|
||||
# all months in yyyy-mm format
|
||||
months_all_time = db.get_months_where(all_time_constraint)
|
||||
month_names = db.get_months_where(get_date_constraint(min_date=0))
|
||||
month_timestamps = []
|
||||
# sqlite constrict to month string
|
||||
months_strs = []
|
||||
for year_month in months_all_time:
|
||||
month_constraints = []
|
||||
for year_month in month_names:
|
||||
year, month = year_month.split("-")
|
||||
# first day of the month
|
||||
min_date = dt(int(year), int(month), 1).timestamp()
|
||||
# timestamp of first day of the month
|
||||
min_date = int(dt(int(year), int(month), 1).timestamp())
|
||||
month = (int(month) % 12) + 1 # + 1 month
|
||||
year = int(year)
|
||||
if month == 1: year += 1
|
||||
# first day of the next month - 1 sec
|
||||
max_date = dt(year, month, 1).timestamp() - 1
|
||||
months_strs.append(get_where_date_str(min_date=min_date, max_date=max_date))
|
||||
if month == 1: year += 1
|
||||
max_date = int(dt(year, month, 1).timestamp()) - 1
|
||||
month_constraints.append(get_date_constraint(min_date=min_date, max_date=max_date))
|
||||
month_timestamps.append((min_date, max_date))
|
||||
todos.append(("total", all_time_timestamps, month_constraints, month_names, month_timestamps))
|
||||
|
||||
for i in range(2):
|
||||
suffix = ["_total", "_last_x_days"][i]
|
||||
date_constraint = [all_time_constraint, last_x_days_constraint][i]
|
||||
date_names = [months_all_time, last_x_days][i]
|
||||
date_constraints = [months_strs, last_x_days_contraints][i]
|
||||
assert(len(date_names) == len(date_constraints))
|
||||
last_x_days: int = settings["data-visualization"]["last_x_days"]
|
||||
if last_x_days > 0:
|
||||
secs_per_day = 86400
|
||||
last_x_days_min_date = db.get_latest_timestamp() - last_x_days * secs_per_day
|
||||
last_x_days_timestamps = (last_x_days_min_date, now_stamp)
|
||||
last_x_days_constraint = get_date_constraint(min_date=last_x_days_min_date)
|
||||
days = db.get_days_where(last_x_days_constraint) # yyyy-mm-dd
|
||||
day_constrains = [ get_date_constraint(at_date=day) for day in days ]
|
||||
day_timestamps = []
|
||||
for day in days:
|
||||
year, month, day = day.split("-")
|
||||
min_date = int(dt(int(year), int(month), int(day)).timestamp())
|
||||
max_date = min_date + secs_per_day
|
||||
day_timestamps.append((min_date, max_date))
|
||||
|
||||
# FILES
|
||||
todos.append(("last_x_days", last_x_days_timestamps, day_constrains, days, day_timestamps))
|
||||
|
||||
def export_ranking(name: str, column_name: str, ranking: list[tuple[int or float, str]]):
|
||||
filename = f"{data_out_dir}/{name}.{data_filetype}"
|
||||
if data_filetype == "pkl":
|
||||
pdebug(f"visualize: Exporting {name} as pickle to '{filename}'", lvl=2)
|
||||
with open(filename, "wb") as file:
|
||||
dump(ranking, file)
|
||||
elif data_filetype == "csv":
|
||||
pdebug(f"visualize: Exporting {name} as csv to '{filename}'", lvl=2)
|
||||
s = f'"{name}"\n'
|
||||
s += f'"count","{column_name}"\n'
|
||||
for count, item in ranking:
|
||||
s += f'{count},"{item}"\n'
|
||||
s = s.strip("\n")
|
||||
with open(filename, "w") as file:
|
||||
file.write(s)
|
||||
else:
|
||||
error(f"visualize: Unsupported data filetype: '{data_filetype}'")
|
||||
|
||||
def savefig(name: str, figure):
|
||||
filename = f"{img_out_dir}/{name}.{img_filetype}"
|
||||
pdebug(f"visualize: Saving plot for {name} as '{filename}'")
|
||||
figure.savefig(filename, bbox_inches="tight") # bboximg_inches="tight"
|
||||
|
||||
|
||||
pdebug(f"visualize: total={total}, last_x_days={last_x_days}", lvl=3)
|
||||
for suffix, whole_timespan_timestamps, single_date_constraints, single_date_names, single_date_timestamps in todos:
|
||||
assert(len(single_date_names) == len(single_date_constraints))
|
||||
|
||||
# STATISTICS
|
||||
visitor_count = h.get_visitor_count_between(db, whole_timespan_timestamps)
|
||||
request_count = h.get_request_count_between(db, whole_timespan_timestamps)
|
||||
html_variables[f"visitor_count_{suffix}"] = visitor_count
|
||||
html_variables[f"request_count_{suffix}"] = request_count
|
||||
|
||||
if get_humans_visitors:
|
||||
visitor_count_human = h.get_visitor_count_between(db, whole_timespan_timestamps, only_human=True)
|
||||
request_count_human = h.get_request_count_between(db, whole_timespan_timestamps, only_human=True)
|
||||
html_variables[f"visitor_count_human_{suffix}"] = visitor_count_human
|
||||
html_variables[f"request_count_human_{suffix}"] = request_count_human
|
||||
try: html_variables[f"human_visitor_percentage_{suffix}"] = 100.0 * visitor_count_human / visitor_count
|
||||
except ZeroDivisionError: pass
|
||||
try: html_variables[f"human_request_percentage_{suffix}"] = 100.0 * request_count_human / request_count
|
||||
except ZeroDivisionError: pass
|
||||
try: html_variables[f"mobile_visitor_percentage_{suffix}"] = 100.0 * h.get_mobile_visitor_count_between(db, whole_timespan_timestamps, only_human=True) / visitor_count_human
|
||||
except ZeroDivisionError: pass
|
||||
|
||||
# HISTORY
|
||||
date_count = len(single_date_constraints)
|
||||
visitor_count_dates = [ h.get_visitor_count_between(db, single_date_timestamps[i], only_human=False) for i in range(date_count) ]
|
||||
request_count_dates = [ h.get_request_count_between(db, single_date_timestamps[i], only_human=False) for i in range(date_count) ]
|
||||
|
||||
visitor_count_human_dates = [ h.get_visitor_count_between(db, single_date_timestamps[i], only_human=True) for i in range(date_count) ]
|
||||
request_count_human_dates = [ h.get_request_count_between(db, single_date_timestamps[i], only_human=True) for i in range(date_count) ]
|
||||
|
||||
visitor_count_new_dates = [ h.get_new_visitor_count_between(db, single_date_timestamps[i]) for i in range(date_count) ]
|
||||
request_count_new_dates = [ h.get_request_from_new_visitor_count_between(db, single_date_timestamps[i]) for i in range(date_count) ]
|
||||
|
||||
if img_out_dir:
|
||||
plt_history = Plot2Y(xlabel="Date", ylabel_left="Visitor count", ylabel_right="Request count", rotate_xlabel=-45, figsize=settings["plot-generation"]["size_broad"])
|
||||
# visitors, plot on correct order
|
||||
plt_history.plot_left(single_date_names, visitor_count_dates, label="Unique visitors", color=color_settings_history["visitors"])
|
||||
if get_humans_visitors:
|
||||
plt_history.plot_left(single_date_names, visitor_count_human_dates, label="Unique visitors (human)", color=color_settings_history["visitors_human"])
|
||||
if get_new_visitors:
|
||||
plt_history.plot_left(single_date_names, visitor_count_new_dates, label="Unique visitors (new)", color=color_settings_history["visitors_new"])
|
||||
# requests
|
||||
plt_history.plot_right(single_date_names, request_count_dates, label="Unique requests", color=color_settings_history["requests"])
|
||||
if get_humans_visitors:
|
||||
plt_history.plot_left(single_date_names, request_count_human_dates, label="Unique requests (human)", color=color_settings_history["requests_human"])
|
||||
if get_new_visitors:
|
||||
plt_history.plot_left(single_date_names, request_count_new_dates, label="Unique requests (new)", color=color_settings_history["requests_new"])
|
||||
|
||||
savefig(f"history_visitor_request_{suffix}", plt_history.get_fig())
|
||||
# if data_out_dir: # TODO export history
|
||||
# s = ""
|
||||
|
||||
# ROUTES
|
||||
# TODO handle groups
|
||||
file_ranking = get_route_ranking(db, date_constraint)
|
||||
if gen_img:
|
||||
fig_file_ranking = plot_ranking(file_ranking, xlabel="Route Name", ylabel="Number of requests", color_settings=color_settings_filetypes, figsize=settings["plot_size_broad"])
|
||||
fig_file_ranking.savefig(f"{img_dir}/{names[f'img_route_ranking{suffix}']}", bbox_inches="tight")
|
||||
route_ranking = get_route_ranking(db, whole_timespan_timestamps)
|
||||
route_ranking = route_ranking_group_routes(route_ranking)
|
||||
pdebug("visualize: route ranking", route_ranking, lvl=3)
|
||||
if img_out_dir:
|
||||
fig_file_ranking = plot_ranking(route_ranking, xlabel="Route", ylabel="Number of requests", color_settings=color_settings_filetypes, figsize=settings["plot-generation"]["size_broad"])
|
||||
savefig(f"ranking_route_{suffix}", fig_file_ranking)
|
||||
if data_out_dir:
|
||||
export_ranking(f"ranking_route_{suffix}", "route", route_ranking)
|
||||
|
||||
|
||||
# REFERER
|
||||
referer_ranking = get_ranking(db, "request", "referer", date_constraint, settings["referer_ranking_whitelist"], settings["referer_ranking_whitelist"])
|
||||
pdebug("Referer ranking", referer_ranking)
|
||||
referer_ranking = get_referer_ranking(db, whole_timespan_timestamps)
|
||||
cleanup_referer_ranking(referer_ranking)
|
||||
if gen_img:
|
||||
fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
|
||||
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_referer_ranking{suffix}']}", bbox_inches="tight")
|
||||
pdebug("visualize: referer ranking", referer_ranking, lvl=3)
|
||||
if img_out_dir:
|
||||
fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate, figsize=settings["plot-generation"]["size_broad"])
|
||||
savefig(f"ranking_referer_{suffix}", fig_referer_ranking)
|
||||
if data_out_dir:
|
||||
export_ranking(f"ranking_referer_{suffix}", "referer", referer_ranking)
|
||||
|
||||
# GEOIP
|
||||
if settings["do_geoip_rankings"]:
|
||||
city_ranking, country_ranking = get_city_and_country_ranking(db, require_humans=settings["geoip_only_humans"])
|
||||
pdebug("Country ranking:", country_ranking)
|
||||
pdebug("City ranking:", city_ranking)
|
||||
if gen_img:
|
||||
fig_referer_ranking = plot_ranking(country_ranking, xlabel="Country", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
|
||||
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_countries{suffix}']}", bbox_inches="tight")
|
||||
|
||||
fig_referer_ranking = plot_ranking(city_ranking, xlabel="City", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
|
||||
fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_cities{suffix}']}", bbox_inches="tight")
|
||||
|
||||
|
||||
# USER
|
||||
# visitor_agent_ranking = get_visitor_agent_ranking(cur, date_str)
|
||||
# for the time span
|
||||
unique_visitor_ids = get_unique_visitor_ids_for_date(db, date_constraint)
|
||||
unique_visitor_ids_human = []
|
||||
append_human_visitors(db, unique_visitor_ids, unique_visitor_ids_human)
|
||||
# for each date
|
||||
date_count = len(date_constraints)
|
||||
unique_visitor_ids_dates: list[list[int]] = []
|
||||
unique_request_ids_dates: list[list[int]] = []
|
||||
unique_visitor_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
|
||||
unique_request_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
|
||||
for i in range(date_count):
|
||||
date_constraint_ = date_constraints[i]
|
||||
unique_visitor_ids_dates.append(get_unique_visitor_ids_for_date(db, date_constraint_))
|
||||
unique_request_ids_dates.append(get_unique_request_ids_for_date(db, date_constraint_))
|
||||
if get_humans:
|
||||
# empty_list = []
|
||||
# unique_visitor_ids_human_dates.append(empty_list)
|
||||
append_human_visitors(db, unique_visitor_ids_dates[i], unique_visitor_ids_human_dates[i])
|
||||
# unique_request_ids_human_dates.append(list())
|
||||
for human in unique_visitor_ids_human_dates[i]:
|
||||
append_unique_request_ids_for_date_and_visitor(db, date_constraint_, human, unique_request_ids_human_dates[i])
|
||||
# print("\n\tuu", unique_visitor_ids_dates, "\n\tur",unique_request_ids_dates, "\n\tuuh", unique_visitor_ids_human_dates, "\n\turh", unique_request_ids_human_dates)
|
||||
# pdebug("uui", unique_visitor_ids)
|
||||
# pdebug("uuih", unique_visitor_ids_human)
|
||||
# pdebug("uuid", unique_visitor_ids_dates)
|
||||
# pdebug("uuidh", unique_visitor_ids_human_dates)
|
||||
# pdebug("urid", unique_request_ids_dates)
|
||||
# pdebug("uridh", unique_visitor_ids_human_dates)
|
||||
# pdebug(f"human_visitor_precentage: len_list_list(visitor_ids)={len_list_list(unique_visitor_ids_dates)}, len_list_list(visitor_ids_human)={len_list_list(unique_visitor_ids_human_dates)}")
|
||||
if get_humans:
|
||||
try:
|
||||
names[f"human_visitor_percentage{suffix}"] = round(100 * len_list_list(unique_visitor_ids_human_dates) / len_list_list(unique_visitor_ids_dates), 2)
|
||||
except:
|
||||
names[f"human_visitor_percentage{suffix}"] = -1.0
|
||||
try:
|
||||
names[f"human_request_percentage{suffix}"] = round(100 * len_list_list(unique_request_ids_human_dates) / len_list_list(unique_request_ids_dates), 2)
|
||||
except:
|
||||
names[f"human_request_percentage{suffix}"] = -1.0
|
||||
names[f"visitor_count{suffix}"] = len_list_list(unique_visitor_ids_dates)
|
||||
names[f"request_count{suffix}"] = len_list_list(unique_request_ids_dates)
|
||||
if gen_img:
|
||||
fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_dates], [len(request_ids) for request_ids in unique_request_ids_dates], xlabel="Date", ylabel1="Visitor count", label1="Unique visitors", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"], rotate_xlabel=-45, figsize=settings["plot_size_broad"])
|
||||
if get_humans:
|
||||
fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_human_dates], [len(request_ids) for request_ids in unique_request_ids_human_dates], label1="Unique visitors (human)", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots, rotate_xlabel=-45, figsize=settings["plot_size_broad"])
|
||||
fig_daily.savefig(f"{img_dir}/{names[f'img_visitors_and_requests{suffix}']}", bbox_inches="tight")
|
||||
if settings["data-collection"]["get_visitor_location"]:
|
||||
country_ranking = get_country_ranking(db, whole_timespan_timestamps, only_human=settings["rankings"]["geoip_only_humans"])
|
||||
pdebug("visualize: country ranking:", country_ranking, lvl=3)
|
||||
city_ranking = get_city_ranking(db, whole_timespan_timestamps, add_country_code=settings["rankings"]["city_add_country_code"], only_human=settings["rankings"]["geoip_only_humans"])
|
||||
pdebug("visualize: city ranking:", city_ranking, lvl=3)
|
||||
if img_out_dir:
|
||||
fig_referer_ranking = plot_ranking(country_ranking, xlabel="Country", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot-generation"]["size_broad"])
|
||||
savefig(f"ranking_country_{suffix}", fig_referer_ranking)
|
||||
fig_referer_ranking = plot_ranking(city_ranking, xlabel="City", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot-generation"]["size_broad"])
|
||||
savefig(f"ranking_city_{suffix}", fig_referer_ranking)
|
||||
if data_out_dir:
|
||||
export_ranking(f"ranking_country_{suffix}", "country", country_ranking)
|
||||
export_ranking(f"ranking_city_{suffix}", "city", city_ranking)
|
||||
|
||||
# os & browser
|
||||
platform_ranking, browser_ranking, names[f"mobile_visitor_percentage{suffix}"] = get_platform_browser_mobile_rankings(db, unique_visitor_ids_human)
|
||||
if gen_img:
|
||||
fig_os_rating = plot_ranking(platform_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_platforms, figsize=settings["plot_size_narrow"])
|
||||
fig_os_rating.savefig(f"{img_dir}/{names[f'img_platform_ranking{suffix}']}", bbox_inches="tight")
|
||||
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browser", ylabel="Share [%]", color_settings=color_settings_browsers, figsize=settings["plot_size_narrow"])
|
||||
fig_browser_rating.savefig(f"{img_dir}/{names[f'img_browser_ranking{suffix}']}", bbox_inches="tight")
|
||||
browser_ranking = get_browser_ranking(db, whole_timespan_timestamps, only_human=False)
|
||||
browser_ranking = make_ranking_relative(browser_ranking)
|
||||
pdebug("visualize: browser ranking:", browser_ranking, lvl=3)
|
||||
platform_ranking = get_platform_ranking(db, whole_timespan_timestamps, only_human=False)
|
||||
platform_ranking = make_ranking_relative(platform_ranking)
|
||||
pdebug("visualize: platform ranking:", platform_ranking, lvl=3)
|
||||
if img_out_dir:
|
||||
fig_os_rating = plot_ranking(platform_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_platforms, figsize=settings["plot-generation"]["size_narrow"])
|
||||
savefig(f"ranking_platform_{suffix}", fig_os_rating)
|
||||
fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browser", ylabel="Share [%]", color_settings=color_settings_browsers, figsize=settings["plot-generation"]["size_narrow"])
|
||||
savefig(f"ranking_browser_{suffix}", fig_browser_rating)
|
||||
if data_out_dir:
|
||||
export_ranking(f"ranking_platform_{suffix}", "platform", platform_ranking)
|
||||
export_ranking(f"ranking_browser_{suffix}", "browser", browser_ranking)
|
||||
|
||||
# print("OS ranking", os_ranking)
|
||||
# print("Browser ranking", browser_ranking)
|
||||
# print("Mobile percentage", names["mobile_visitor_percentage"])
|
||||
if settings["template_html"] and settings["html_out_path"]:
|
||||
pdebug(f"visualize: writing to html: {settings['html_out_path']}")
|
||||
|
||||
with open(settings["template_html"], "r") as file:
|
||||
html_variables_str = dict_str(html_variables).replace('\n', '\n\t')
|
||||
pdebug(f"visualize: html_variables:\n\t{html_variables_str}", lvl=2)
|
||||
|
||||
template_html: str|None = settings["html-generation"]["template_html"]
|
||||
html_out_path: str|None = settings["html-generation"]["html_out_path"]
|
||||
if template_html and html_out_path:
|
||||
pdebug(f"visualize: generating from template '{template_html}' to '{html_out_path}'", lvl=2)
|
||||
if not path.isfile(template_html):
|
||||
error(f"Invalid template file path: '{template_html}'")
|
||||
with open(template_html, "r") as file:
|
||||
html = file.read()
|
||||
for name, value in names.items():
|
||||
for name, value in html_variables.items():
|
||||
if "img" in name:
|
||||
value = f"{img_location}/{value}"
|
||||
if type(value) == float:
|
||||
elif type(value) == float:
|
||||
value = f"{value:.2f}"
|
||||
html = html.replace(f"%{name}", str(value))
|
||||
with open(settings["html_out_path"], "w") as file:
|
||||
make_parent_dirs(html_out_path)
|
||||
with open(html_out_path, "w") as file:
|
||||
file.write(html)
|
||||
else:
|
||||
warning(f"Skipping html generation because either template_html or html_out_path is invalid: template_html='{settings['template_html']}', html_out_path='{settings['html_out_path']}'")
|
||||
pdebug(f"visualize: skipping html generation because either template_html or html_out_path is None: template_html='{template_html}', html_out_path='{html_out_path}'", lvl=1)
|
||||
|
@ -48,43 +48,44 @@
|
||||
<body>
|
||||
<h1>Analytics for %server_name</h1>
|
||||
<div class=box>
|
||||
<center>
|
||||
<div style="text-align: center">
|
||||
<h2>Last %last_x_days days</h2>
|
||||
<hr>
|
||||
<h3>Visitor and request count (per month)</h3>
|
||||
<img src="%img_visitors_and_requests_last_x_days" alt="Daily Statistics", title="Visitor and request count for the last %last_x_days days">
|
||||
<img src="%img_history_visitor_request_last_x_days" alt="Daily Statistics", title="Visitor and request count for the last %last_x_days days">
|
||||
<ul>
|
||||
<li>visitor count: <b>%visitor_count_last_x_days</b>, from which <b>%human_visitor_percentage_last_x_days%</b> are human</li>
|
||||
<li>request count: <b>%request_count_last_x_days</b>, from which <b>%human_request_percentage_last_x_days%</b> came from human visitors </li>
|
||||
<li>visitor count: <b>%visitor_count_last_x_days</b>, from which <b>%visitor_count_human_last_x_days</b> (<b>%human_visitor_percentage_last_x_days%</b>) are human</li>
|
||||
<li>request count: <b>%request_count_last_x_days</b>, from which <b>%request_count_human_last_x_days</b> (<b>%human_request_percentage_last_x_days%</b>) came from human visitors </li>
|
||||
</ul>
|
||||
<hr>
|
||||
|
||||
<h3>File access</h3>
|
||||
<img src="%img_file_ranking_last_x_days" alt="File ranking for the last %last_x_days days", title="File ranking for the last %last_x_days days">
|
||||
<img src="%img_ranking_route_last_x_days" alt="File ranking for the last %last_x_days days", title="File ranking for the last %last_x_days days">
|
||||
<hr>
|
||||
|
||||
<h3>Platforms and browsers</h3>
|
||||
<img class="small" src="%img_operating_system_ranking_last_x_days" alt="Operating system ranking for the last %last_x_days days", title="Operating system ranking for the last %last_x_days days">
|
||||
<img class="small" src="%img_browser_ranking_last_x_days" alt="Browser ranking for the last %last_x_days days", title="Browser ranking for the last %last_x_days days">
|
||||
<img class="small" src="%img_ranking_platform_last_x_days" alt="Operating system ranking for the last %last_x_days days", title="Operating system ranking for the last %last_x_days days">
|
||||
<img class="small" src="%img_ranking_browser_last_x_days" alt="Browser ranking for the last %last_x_days days", title="Browser ranking for the last %last_x_days days">
|
||||
<h4>Mobile visitors: %mobile_visitor_percentage_last_x_days%</h4>
|
||||
<hr>
|
||||
|
||||
<h3>Referrers</h3>
|
||||
<img src="%img_referer_ranking_last_x_days" alt="Referer ranking for the last %last_x_days days", title="Referer ranking for the last %last_x_days days">
|
||||
<img src="%img_ranking_referer_last_x_days" alt="Referer ranking for the last %last_x_days days", title="Referer ranking for the last %last_x_days days">
|
||||
<hr>
|
||||
|
||||
<!-- <h3>GeoIP</h3> -->
|
||||
<!-- <img src="%img_countries_last_x_days" alt="Country ranking for the last %last_x_days days", title="Country ranking for the last %last_x_days days"> -->
|
||||
<!-- <img src="%img_cities_last_x_days" alt="City ranking for the last %last_x_days days", title="City ranking for the last %last_x_days days"> -->
|
||||
<!-- <hr> -->
|
||||
</center>
|
||||
<h3>GeoIP</h3>
|
||||
<img src="%img_ranking_country_last_x_days" alt="Country ranking for the last %last_x_days days", title="Country ranking for the last %last_x_days days">
|
||||
<img src="%img_ranking_city_last_x_days" alt="City ranking for the last %last_x_days days", title="City ranking for the last %last_x_days days">
|
||||
<hr>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=box>
|
||||
<center>
|
||||
<div style="text-align: center">
|
||||
<h2>Total (since %earliest_date)</h2>
|
||||
<hr>
|
||||
<h3>Visitor and request count (per month)</h3>
|
||||
<img src="%img_visitors_and_requests_total" alt="Monthly Statistics", title="Visitor and request count">
|
||||
<img src="%img_history_visitor_request_total" alt="Monthly Statistics", title="Visitor and request history">
|
||||
<ul>
|
||||
<li>Total visitor count: <b>%visitor_count_total</b>, from which <b>%human_visitor_percentage_total%</b> are human</li>
|
||||
<li>Total request count: <b>%request_count_total</b>, from which <b>%human_request_percentage_total%</b> came from human visitors </li>
|
||||
@ -92,24 +93,24 @@
|
||||
<hr>
|
||||
|
||||
<h3>File access</h3>
|
||||
<img src="%img_file_ranking_total" alt="File ranking", title="File ranking">
|
||||
<img src="%img_ranking_route_total" alt="File ranking", title="File ranking">
|
||||
<hr>
|
||||
|
||||
<h3>Platforms and browsers</h3>
|
||||
<img class="small" src="%img_operating_system_ranking_total" alt="Operating system ranking", title="Operating system ranking">
|
||||
<img class="small" src="%img_browser_ranking_total" alt="Browser ranking", title="Browser ranking">
|
||||
<img class="small" src="%img_ranking_platform_total" alt="Operating system ranking", title="Operating system ranking">
|
||||
<img class="small" src="%img_ranking_browser_total" alt="Browser ranking", title="Browser ranking">
|
||||
<h4>Mobile visitors: %mobile_visitor_percentage_total%</h4>
|
||||
<hr>
|
||||
|
||||
<h3>Referrers</h3>
|
||||
<img src="%img_referer_ranking_total" alt="Referer ranking", title="Referer ranking">
|
||||
<img src="%img_ranking_referer_total" alt="Referer ranking", title="Referer ranking">
|
||||
<hr>
|
||||
|
||||
<!-- <h3>GeoIP</h3> -->
|
||||
<!-- <img src="%img_countries_total" alt="Country ranking", title="Country ranking"> -->
|
||||
<!-- <img src="%img_cities_total" alt="City ranking", title="City ranking"> -->
|
||||
<!-- <hr> -->
|
||||
</center>
|
||||
<h3>GeoIP</h3>
|
||||
<img src="%img_ranking_country_total" alt="Country ranking", title="Country ranking">
|
||||
<img src="%img_ranking_city_total" alt="City ranking", title="City ranking">
|
||||
<hr>
|
||||
</div>
|
||||
</div>
|
||||
<p>These analytics were generated by <a href="https://git.quintern.xyz/MatthiasQuintern/regina">regina %regina_version</a> at %generation_date</p>
|
||||
<!-- Uncomment if you use IP2Location database -->
|
||||
|
Loading…
Reference in New Issue
Block a user