regina/regina/main.py
2023-05-05 13:14:52 +02:00

155 lines
5.5 KiB
Python

# from sys import path
# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
# __package__="."
from sys import argv, exit
from os.path import isfile
import sqlite3 as sql
if __name__ == "__main__":
if __package__ is None:
# make relative imports work as described here: https://peps.python.org/pep-0366/#proposed-change
__package__ = "regina"
import sys
from os import path
filepath = path.realpath(path.abspath(__file__))
sys.path.insert(0, path.dirname(path.dirname(filepath)))
from .db_operation.collect import parse_log, add_requests_to_db, update_ip_range_id
from .db_operation.database import create_db, update_geoip_tables, t_visitor
from .db_operation.visualize import visualize
from .utility.settings_manager import read_settings_file
from .utility.globals import settings, version
from .utility.utility import pmessage
from .utility.sql_util import sql_tablesize
"""
start regina, launch either collect or visualize
TODO:
- optionen:
- unique visitor = ip address
- max requests/time
- unique request datums unabhängig
X fix datum im visitor and request count plot
X fix datum monat is 1 zu wenig
X fix ms edge nicht dabei
- für letzten Tag: uhrzeit - requests/visitors plot
- checken warum last x days und total counts abweichen
- länder aus ip addresse
- "manuelle" datenbank beabeitung in cli:
- visitor + alle seine requests löschen
- visitor agents:
X android vor linux suchen, oder linux durch X11 ersetzen
- alles was bot drin hat als bot betrachten
- wenn datenbankgröße zum problem wird:
- referrer table die die schon zusammengelegten referrer enthält, request verlinkt nur mit id
- selbes für platforms und browsers
- test:
- human detection
X referer cleanup
X geoip
- schöne log nachrichten für die cron mail
- testing!
"""
def help():
helpstring = """Command line options:
--config <path> path to a config file that specifies all the other parameters: param = value, where value has the same formatting as on the command line
--update-geoip <path> path to IP-COUNTRY-REGION-CITY database in csv format
--visualize generate the visualization website
--collect fill the database from the nginx access log
--log-file <path> use alternate logfile
"""
print(helpstring)
def missing_arg_val(arg):
print("Missing argument for", arg)
exit(1)
def missing_arg(arg):
print("Missing ", arg)
exit(1)
def error(arg):
print("Error:", arg)
exit(1)
def main():
config_file = ""
collect = False
visualize_ = False
log_file = ""
geoip_city_csv = ""
# parse args
i = 1
while i in range(1, len(argv)):
if argv[i] in ["--config", "-c"]:
if len(argv) > i + 1: config_file = argv[i+1]
else: missing_arg_val(argv[i])
elif argv[i] == "--log-file":
if len(argv) > i + 1: log_file = argv[i+1]
else: missing_arg_val(argv[i])
if argv[i] == "--update-geoip":
if len(argv) > i + 1: geoip_city_csv = argv[i+1]
else: missing_arg_val(argv[i])
elif argv[i] in ["--help", "-h"]:
help()
exit(0)
elif argv[i] == "--collect":
collect = True
elif argv[i] == "--visualize":
visualize_ = True
else:
pass
i += 1
if not (collect or visualize_ or geoip_city_csv):
missing_arg("--visualize or --collect or --update-geoip")
if not config_file:
missing_arg("--config")
if not isfile(config_file):
error(f"Not a file: '{config_file}'")
read_settings_file(config_file, settings)
settings["version"] = version
if log_file: settings["access_log"] = log_file
if not settings["server_name"]: missing_arg("server-name")
if not settings["access_log"]: missing_arg("log")
if not settings["db"]: missing_arg("db")
if isinstance(settings["auto_group_filetypes"], str):
settings["auto_group_filetypes"] = settings["auto_group_filetypes"].split(",")
if isinstance(settings["locs_and_dirs"], str):
settings["locs_and_dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs_and_dirs"].split(",") ]
if not isfile(config_file):
error(f"Not a file: '{config_file}'")
if not isfile(settings["db"]):
create_db(settings["db"], settings["filegroups"], settings["locs_and_dirs"], settings["auto_group_filetypes"])
if geoip_city_csv:
if not isfile(geoip_city_csv):
error(f"Not a file: '{geoip_city_csv}'")
conn = sql.connect(settings['db'], isolation_level=None) # required vor vacuum
cur = conn.cursor()
update_geoip_tables(cur, geoip_city_csv)
# update visitors
for visitor_id in range(sql_tablesize(cur, t_visitor)):
update_ip_range_id(cur, visitor_id)
cur.close()
conn.commit()
conn.close()
if collect:
pmessage(f"regina version {version} with server-name '{settings['server_name']}', database '{settings['db']}' and logfile '{settings['access_log']}'")
requests = parse_log(settings["access_log"])
add_requests_to_db(requests, settings["db"])
if visualize_:
pmessage(f"regina version {version} with server-name '{settings['server_name']}', database '{settings['db']}'")
if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'")
visualize(settings)
if __name__ == '__main__':
main()