path to IP-COUNTRY-REGION-CITY database in csv format
+ --visualize generate the visualization website
+ --collect fill the database from the nginx access log
"""
print(helpstring)
@@ -68,16 +68,20 @@ def main():
collect = False
visualize_ = False
log_file = ""
+ geoip_city_csv = ""
# parse args
i = 1
while i in range(1, len(argv)):
- if argv[i] == "--config":
+ if argv[i] in ["--config", "-c"]:
if len(argv) > i + 1: config_file = argv[i+1]
else: missing_arg_val(argv[i])
- if argv[i] == "--log-file":
+ elif argv[i] == "--log-file":
if len(argv) > i + 1: log_file = argv[i+1]
else: missing_arg_val(argv[i])
- elif argv[i] == "--help":
+ if argv[i] == "--update-geoip":
+ if len(argv) > i + 1: geoip_city_csv = argv[i+1]
+ else: missing_arg_val(argv[i])
+ elif argv[i] in ["--help", "-h"]:
help()
exit(0)
elif argv[i] == "--collect":
@@ -87,11 +91,11 @@ def main():
else:
pass
i += 1
- if not collect and not visualize_:
- missing_arg("--visualize or --collect")
+ if not (collect or visualize_ or geoip_city_csv):
+ missing_arg("--visualize or --collect or --update-geoip")
if not config_file:
- missing_arg("--config_file")
+ missing_arg("--config")
if not isfile(config_file):
error(f"Not a file: '{config_file}'")
read_settings_file(config_file, settings)
@@ -107,19 +111,33 @@ def main():
if isinstance(settings["locs_and_dirs"], str):
settings["locs_and_dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs_and_dirs"].split(",") ]
+ if not isfile(config_file):
+ error(f"Not a file: '{config_file}'")
+
+ if not isfile(settings["db"]):
+ create_db(settings["db"], settings["filegroups"], settings["locs_and_dirs"], settings["auto_group_filetypes"])
+
+ if geoip_city_csv:
+ if not isfile(geoip_city_csv):
+ error(f"Not a file: '{geoip_city_csv}'")
+ conn = sql.connect(settings['db'], isolation_level=None) # required vor vacuum
+ cur = conn.cursor()
+ update_geoip_tables(cur, geoip_city_csv)
+ # update users
+ for user_id in range(sql_tablesize(cur, t_user)):
+ update_ip_range_id(cur, user_id)
+ cur.close()
+ conn.commit()
+ conn.close()
if collect:
pmessage(f"regina version {version} with server-name '{settings['server_name']}', database '{settings['db']}' and logfile '{settings['access_log']}'")
- if not isfile(settings["db"]):
- create_db(settings["db"], settings["filegroups"], settings["locs_and_dirs"], settings["auto_group_filetypes"])
requests = parse_log(settings["access_log"])
add_requests_to_db(requests, settings["db"])
- elif visualize_:
+ if visualize_:
pmessage(f"regina version {version} with server-name '{settings['server_name']}', database '{settings['db']}'")
if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'")
visualize(settings)
- else:
- error("Either --collect --visualize has to be provided")
if __name__ == '__main__':
main()
diff --git a/regina/utility/globals.py b/regina/utility/globals.py
index 6e6ecb0..8b9970a 100644
--- a/regina/utility/globals.py
+++ b/regina/utility/globals.py
@@ -5,7 +5,7 @@ version = "1.0"
# default settings, these are overwriteable through a config file
settings = {
# GENERAL
- "server_name": "",
+ "server_name": "default_sever",
# DATA COLLECTION
"access_log": "",
"db": "",
@@ -15,12 +15,16 @@ settings = {
"request_location_regex_blacklist": "",
"request_is_same_on_same_day": True, # mutiple requests from same user to same file at same day are counted as 1
"unique_user_is_ip_address": False,
- "user_get_country": True,
+ "get_cities_for_countries": [""], # list if country codes for which the ip address ranges need to be collected at city level, not country level
# VISUALIZATION
"get_human_percentage": False,
"human_needs_success": True, # a human must have at least 1 successful request (status < 300)
"status_300_is_success": False, # 300 codes are success
+ "do_geoip_rankings": False,
+ "geoip_only_humans": True,
+ "city_ranking_regex_blacklist": "",
+ "country_ranking_regex_blacklist": "",
# "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
"file_ranking_regex_whitelist": r".*\.(html)",
"file_ranking_ignore_error_files": False, # skip files that only had unsuccessful requests (status < 300)
@@ -34,12 +38,16 @@ settings = {
# "plot_figsize": (60, 40),
"plot_dpi": 300,
"plot_add_count_label": True,
+ "plot_size_broad": (10, 5),
+ "plot_size_narrow": (6.5, 5),
"img_dir": "",
"img_location": "",
"img_filetype": "svg",
"template_html": "",
"html_out_path": "",
"last_x_days": 30,
+ # regina
+ "debug": False
}
# these oses and browser can be detected:
diff --git a/regina/utility/settings_manager.py b/regina/utility/settings_manager.py
index cb821d6..92c0300 100644
--- a/regina/utility/settings_manager.py
+++ b/regina/utility/settings_manager.py
@@ -4,14 +4,24 @@ def get_bool(bool_str: str, fallback=False):
elif bool_str in ["false", "False"]: return False
return fallback
+def get_iterable(s, original_iterable, require_same_length=False):
+ val_type = str
+ if len(original_iterable) > 0: val_type = type(original_iterable[0])
+ new_iter = type(original_iterable)(val_type(v.strip(" ")) for v in s.split(","))
+ if require_same_length and len(original_iterable) != len(new_iter):
+ raise Exception(f"{new_iter} does not have the same length as {original_iterable}")
+ return new_iter
+
+
def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True, allow_new_keys=False, convert_to_type=True):
+ ignore_invalid_lines = False
lines = []
with open(filepath, "r") as file:
lines = file.readlines()
for i in range(len(lines)):
line = lines[i].strip("\n ")
- if line.startswith("#"): continue
+ if line.startswith("#") or len(line) == 0: continue
vals = line.split("=")
if not len(vals) == 2:
if ignore_invalid_lines: continue
@@ -23,11 +33,23 @@ def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True,
if convert_to_type and not isinstance(settings[vals[0]], str|list|None):
if isinstance(settings[vals[0]], bool):
settings[vals[0]] = get_bool(vals[1].strip(" "), fallback=settings[vals[0]])
- continue
- try:
- settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" "))
- except Exception as e:
- if not ignore_invalid_lines: raise e
- else: continue
+ elif isinstance(settings[vals[0]], tuple):
+ try:
+ settings[vals[0]] = get_iterable(vals[1], settings[vals[0]], require_same_length=True)
+ except Exception as e:
+ if not ignore_invalid_lines: raise e
+ else: continue
+ elif isinstance(settings[vals[0]], list):
+ try:
+ settings[vals[0]] = get_iterable(vals[1], settings[vals[0]], require_same_length=False)
+ except Exception as e:
+ if not ignore_invalid_lines: raise e
+ else: continue
+ else:
+ try:
+ settings[vals[0]] = type(settings[vals[0]])(vals[1].strip(" "))
+ except Exception as e:
+ if not ignore_invalid_lines: raise e
+ else: continue
else:
settings[vals[0]] = vals[1].strip(" ")
diff --git a/regina/utility/utility.py b/regina/utility/utility.py
index bd81d04..6788174 100644
--- a/regina/utility/utility.py
+++ b/regina/utility/utility.py
@@ -2,13 +2,14 @@
# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
from sys import exit
+from regina.utility.globals import settings
+
"""
Various utitity
"""
-DEBUG = False
def pdebug(*args, **keys):
- if DEBUG: print(*args, **keys)
+ if settings["debug"]: print(*args, **keys)
def warning(*w, **k):
print("Warning:", *w, **k)
diff --git a/template.html b/template.html
index 0b0e232..623760b 100644
--- a/template.html
+++ b/template.html
@@ -5,11 +5,11 @@
- Analytics for %server-name
+ Analytics for %server_name
- Analytics for %server-name
+ Analytics for %server_name
Last %last_x_days days
@@ -23,17 +23,22 @@
File access
-
+
Platforms and browsers
-
-
+
+
Mobile users: %mobile_user_percentage_last_x_days%
Referrers
-
+
+
+
+ GeoIP
+
+
@@ -62,8 +67,15 @@
Referrers
+
+ GeoIP
+
+
+
These analytics were generated by regina %regina_version at %generation_date
+
+ This site includes IP2Location LITE data available from https://lite.ip2location.com