diff --git a/README.md b/README.md
index 1003e2c..0fe43a0 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,13 @@ sudo chmod +x /usr/share/zsh/site-functions/_regina
 ```
 
 # Changelog
+## 2.0
+- Refactored databse code
+- New database format: 
+    - Removed filegroups table
+    - Put referrer, browser and platform in own table to reduze size of the database
+- 
+
 ## 1.0
 - Initial release
 
diff --git a/regina/__init__.py b/regina/__init__.py
index 0c15a23..2de2a73 100644
--- a/regina/__init__.py
+++ b/regina/__init__.py
@@ -1,4 +1,5 @@
 """Gather analytics from nginx access logs and visualize them through generated images and a generated html"""
 # __package__ = 'regina'
 
-from regina.db_operation import database, visualize, collect
+from regina.data_collection import parse_log
+from regina import database
diff --git a/regina/db_operation/collect.py b/regina/data_collection/parse_log.py
similarity index 75%
rename from regina/db_operation/collect.py
rename to regina/data_collection/parse_log.py
index de79710..5bc6e3c 100644
--- a/regina/db_operation/collect.py
+++ b/regina/data_collection/parse_log.py
@@ -1,7 +1,5 @@
-import sqlite3 as sql
 from re import fullmatch, match
-from regina.db_operation.database import t_request, t_visitor, t_file, t_filegroup, t_ip_range, database_tables, get_filegroup, ip_range_id
-from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
+from regina.data_collection.request import Request
 from regina.utility.utility import pdebug, warning, pmessage
 
 """
@@ -18,12 +16,12 @@ re_http_referer = r'"([^"]*)"'
 re_http_visitor_agent = r'"([^"]*)"'
 re_log_format: str = f'({re_remote_addr}) - ({re_remote_visitor}) ({re_time_local}) ({re_request}) ({re_status}) ({re_body_bytes_sent}) {re_http_referer} {re_http_visitor_agent}'
 
-def parse_log(logfile:str) -> list[Request]:
+def parse_log(logfile_path:str) -> list[Request]:
     """
     create Request objects from each line in the logfile
     """
     requests = []
-    with open(logfile, "r") as file:
+    with open(logfile_path, "r") as file:
         lines = file.readlines()
     for line in lines:
         m = match(re_log_format, line)
@@ -37,7 +35,7 @@ def parse_log(logfile:str) -> list[Request]:
             warning(f"parse_log: len('{m.groups()[3]}'.split(' ')) is {len(request_)} and not 3")
             continue
         requests.append(Request(ip_address=g[0], time_local=g[2],
-                                request_type=request_[0], request_file=request_[1], request_protocol=request_[2],
-                                status=g[4], bytes_sent=g[5], referer=g[6], visitor_agent=g[7]))
+                                request_type=request_[0], request_route=request_[1], request_protocol=request_[2],
+                                status=g[4], bytes_sent=g[5], referer=g[6], user_agent=g[7]))
     return requests
 
diff --git a/regina/db_operation/request.py b/regina/data_collection/request.py
similarity index 75%
rename from regina/db_operation/request.py
rename to regina/data_collection/request.py
index 261e0bc..6453069 100644
--- a/regina/db_operation/request.py
+++ b/regina/data_collection/request.py
@@ -3,14 +3,14 @@ from time import mktime
 from re import fullmatch, match
 from datetime import datetime as dt
 
-from .utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
-from .utility.utility import pdebug, warning, pmessage
-from .utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
+from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_max
+from regina.utility.utility import pdebug, warning, pmessage
+from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
 
 months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aut", "Sep", "Oct", "Nov", "Dec"]
 
 class Request:
-    def __init__(self, ip_address="", time_local="", request_type="", request_file="", request_protocol="", status="", bytes_sent="", referer="", visitor_agent=""):
+    def __init__(self, ip_address="", time_local="", request_type="", request_route="", request_protocol="", status="", bytes_sent="", referer="", user_agent=""):
         self.ip_address = int(IPv4Address(sanitize(ip_address)))
         self.time_local = 0
         # turn [20/Nov/2022:00:47:36 +0100] to unix time
@@ -29,21 +29,21 @@ class Request:
         else:
             warning(f"Request:__init__: Could not match time: '{time_local}'")
         self.request_type = sanitize(request_type)
-        self.request_route = sanitize(request_file)
+        self.request_route = sanitize(request_route)
         self.request_protocol = sanitize(request_protocol)
         self.status = sanitize(status)
         self.bytes_sent = sanitize(bytes_sent)
         self.referer = sanitize(referer)
-        self.visitor_agent = sanitize(visitor_agent)
+        self.user_agent = sanitize(user_agent)
 
     def __repr__(self):
-        return f"{self.ip_address} - {self.time_local} - {self.request_route} - {self.visitor_agent} - {self.status}"
+        return f"{self.ip_address} - {self.time_local} - {self.request_route} - {self.user_agent} - {self.status}"
 
     def get_platform(self):
         # for groups in findall(re_visitor_agent, visitor_agent):
         operating_system = ""
         for os in visitor_agent_operating_systems:
-            if os in self.visitor_agent:
+            if os in self.user_agent:
                 operating_system = os
                 break
         return operating_system
@@ -51,12 +51,12 @@ class Request:
     def get_browser(self):
         browser = ""
         for br in visitor_agent_browsers:
-            if br in self.visitor_agent:
+            if br in self.user_agent:
                 browser = br
                 break
         return browser
 
     def get_mobile(self):
-        return "Mobi" in self.visitor_agent
+        return "Mobi" in self.user_agent
 
 
diff --git a/regina/data_visualization/__init__.py b/regina/data_visualization/__init__.py
new file mode 100644
index 0000000..e4e8354
--- /dev/null
+++ b/regina/data_visualization/__init__.py
@@ -0,0 +1 @@
+"""Visualization utility for regina"""
diff --git a/regina/data_visualization/ranking.py b/regina/data_visualization/ranking.py
new file mode 100644
index 0000000..273957c
--- /dev/null
+++ b/regina/data_visualization/ranking.py
@@ -0,0 +1,151 @@
+from re import fullmatch
+
+from regina.database import Database
+from regina.utility.globals import settings
+from regina.utility.utility import pdebug, warning, missing_arg, is_blacklisted, is_whitelisted
+from regina.data_visualization.utility import is_valid_status, cleanup_referer
+
+
+def get_route_ranking(db: Database, date_condition:str) -> list[tuple[int, str]]:
+    """
+    :returns [(request_count, route name)]
+    """
+    ranking = []
+    for (route_id, name) in db(f"SELECT route_id, name FROM route"):
+        if     is_blacklisted(name, settings["route_ranking_blacklist"]): continue
+        if not is_whitelisted(name, settings["route_ranking_whitelist"]): continue
+        if settings["route_ranking_ignore_404"]:  # use only succesful routes
+            success = False
+            for (status) in db(f"SELECT status FROM request WHERE route_id = {route_id}"):
+                if is_valid_status(status):
+                    pdebug(f"get_route_ranking: success code {status} for route with route_id {route_id} and name {name}")
+                    success = True
+                    break
+            if not success:
+                pdebug(f"get_route_ranking: route with route_id {route_id} and name {name} has only requests resulting in error")
+                continue
+        db.execute(f"SELECT COUNT(*) FROM request WHERE route_id = {route_id} AND {date_condition}")
+        ranking.append((db.fetchone()[0], name))
+    ranking.sort()
+    return ranking
+
+
+def get_ranking(db: Database, table: str, field_name: str, date_condition:str, whitelist_regex: str|list[str]|None=None, blacklist_regex: str|list[str]|None=None) -> list[tuple[int, str]]:
+    """
+    1) get all the distinct entries for field_name after min_date_unix_time
+    2) call get_name_function with the distinct entry
+    3) skip if not fully matching regex whitelist
+    4) skip if fully matching regex blacklist
+    5) for every entry, get the count in table after min_date_unix_time
+    6) sort by count in ascending order
+    @returns [(count, name)]
+    """
+    ranking = []
+    for (name) in db(f"SELECT DISTINCT {field_name} FROM {table} WHERE {date_condition}"):
+        if     is_blacklisted(name, blacklist_regex): continue
+        if not is_whitelisted(name, whitelist_regex): continue
+        db.execute(f"SELECT COUNT(*) FROM {table} WHERE {field_name} = '{name}' AND {date_condition}")
+        ranking.append((db.fetchone()[0], name))
+    ranking.sort()
+    return ranking
+
+
+def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]):
+    unique_referers = dict()
+    for count, referer in referer_ranking:
+        referer = cleanup_referer(referer)
+        if referer in unique_referers:
+            unique_referers[referer] += count
+        else:
+            unique_referers[referer] = count
+    referer_ranking.clear()
+    for referer, count in unique_referers.items():
+        referer_ranking.append((count, referer))
+    referer_ranking.sort()
+
+
+def get_city_and_country_ranking(db: Database, require_humans=True):
+    """
+    @returns [(count, "city (CO)")], [(count, country)]
+    """
+    cities_dict = {}
+    country_dict = {}
+
+    sql_cmd = f"SELECT ci.name, co.code, co.name FROM country AS co, city as ci, visitor as v, ip_range as i WHERE v.ip_range_id = i.ip_range_id AND i.city_id = ci.city_id AND ci.country_id = co.country_id"
+    if require_humans: sql_cmd += " AND v.is_human = 1"
+    result = db(sql_cmd)
+
+    for (city, country_code, country) in result:
+        if city in cities_dict:
+            cities_dict[city][0] += 1
+        else:
+            if     is_blacklisted(city, settings["city_ranking_blacklist"]): continue
+            if not is_whitelisted(city, settings["city_ranking_whitelist"]): continue
+            cities_dict[city] = [1, country_code, country]  # count, country code
+
+        if country in country_dict:
+            country_dict[country] += 1
+        else:
+            if     is_blacklisted(country, settings["country_ranking_blacklist"]): continue
+            if not is_whitelisted(country, settings["country_ranking_whitelist"]): continue
+            country_dict[country] = 1  # count, country code
+
+    city_ranking = [(v[0], f"{city} ({v[1]})") for city,v in cities_dict.items()]
+    city_ranking.sort()
+    country_ranking = [(count, country) for country,count in country_dict.items()]
+    country_ranking.sort()
+    return city_ranking, country_ranking
+
+
+def get_platform_browser_mobile_rankings(db: Database, visitor_ids: list[int]) -> tuple[list[tuple[int, str]], list[tuple[int, str]], float]:
+    """
+    returns [(count, operating_system)], [(count, browser)], mobile_visitor_percentage
+    """
+    platform_ranking = {}
+    platform_count = 0.0
+    browser_ranking = {}
+    browser_count = 0.0
+    mobile_ranking = { True: 0.0, False: 0.0 }
+    for visitor_id in visitor_ids:
+        platform_id, browser_id, is_mobile = db(f"SELECT platform_id, browser_id, is_mobile FROM visitor WHERE visitor_id = {visitor_id}")[0]
+        is_mobile = bool(is_mobile)
+        if platform_id:
+            if platform_id in platform_ranking: platform_ranking[platform_id] += 1
+            else: platform_ranking[platform_id] = 1
+            platform_count += 1
+        if browser_id:
+            if browser_id in browser_ranking: browser_ranking[browser_id] += 1
+            else: browser_ranking[browser_id] = 1
+            browser_count += 1
+        if (platform_id or browser_id):
+            mobile_ranking[is_mobile] += 1
+    try:
+        mobile_visitor_percentage = mobile_ranking[True] / (mobile_ranking[True] + mobile_ranking[False])
+    except ZeroDivisionError:
+        mobile_visitor_percentage = 0.0
+
+    platform_ranking =  [(c * 100/platform_count, db.get_name("platform", p_id)) for p_id, c in platform_ranking.items()]
+    platform_ranking.sort()
+    browser_ranking = [(c * 100/browser_count, db.get_name("browser", b_id)) for b_id, c in browser_ranking.items()]
+    browser_ranking.sort()
+    return platform_ranking, browser_ranking, mobile_visitor_percentage*100
+
+
+# Store ranking in results class and dump with pickle
+# class Results:
+#     def __init__(self, timespan_name,
+#                  r_routes:	    list[tuple[int, str]],
+#                  r_referrers:	list[tuple[int, str]],
+#                  r_platforms:	list[tuple[int, str]],
+#                  r_browsers:	list[tuple[int, str]],
+#                  r_cities:	    list[tuple[int, str]],
+#                  r_countries:	list[tuple[int, str]],
+#                  ):
+#         self.r_routes   = r_routes
+#         self.r_referrers= r_referrers
+#         self.r_platforms= r_platforms
+#         self.r_browsers = r_browsers
+#         self.r_cities   = r_cities
+#         self.r_countries= r_countries
+
+
diff --git a/regina/data_visualization/utility.py b/regina/data_visualization/utility.py
new file mode 100644
index 0000000..efa78a3
--- /dev/null
+++ b/regina/data_visualization/utility.py
@@ -0,0 +1,110 @@
+from re import fullmatch
+
+from regina.database import Database
+from regina.utility.globals import settings
+from regina.utility.utility import pdebug, warning, missing_arg
+
+# re_uri_protocol = f"(https?)://"
+re_uri_protocol = f"(https?://)?"
+re_uri_ipv4 = r"(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?)"
+# re_uri_ipv6 = ""
+re_uri_domain = r"(?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})"
+re_uri_route = r"(?:/(.*))?"
+re_uri_full = f"{re_uri_protocol}({re_uri_domain}|{re_uri_ipv4})({re_uri_route})"
+# (https?://)?((?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})|(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?))((?:/(.*))?)
+
+def cleanup_referer(referer: str) -> str:
+    """
+    split the referer uri into its parts and reassemeble them depending on settings
+    """
+    m = fullmatch(re_uri_full, referer)
+    if not m:
+        warning(f"cleanup_referer: Could not match referer '{referer}'")
+        return referer
+    # pdebug(f"cleanup_referer: {referer} - {m.groups()}")
+    protocol = m.groups()[0]
+    subdomains = m.groups()[2]
+    if not subdomains: subdomains = ""
+    domain = m.groups()[1].replace(subdomains, "")
+    route = m.groups()[3]
+
+    referer = domain
+    if settings["referer_ranking_ignore_tld"]:
+        if len(domain.split(".")) == 2:  # if domain.tld
+            referer = domain.split(".")[0]
+    if not settings["referer_ranking_ignore_subdomain"]: referer = subdomains + referer
+    if not settings["referer_ranking_ignore_protocol"]: referer = protocol + referer
+    if not settings["referer_ranking_ignore_route"]: referer += route
+    # pdebug(f"cleanup_referer: cleaned up: {referer}")
+    return referer
+
+
+
+def get_where_date_str(at_date=None, min_date=None, max_date=None):
+    """
+    get a condition string that sets a condition on the time
+    """
+    # dates in unix time
+    s = ""
+    if at_date is not None:
+        if isinstance(at_date, str):
+            s += f"DATE(time, 'unixepoch') = '{sanitize(at_date)}' AND "
+        elif isinstance(at_date, int|float):
+            s += f"time = {int(at_date)} AND "
+        else:
+            print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
+    if min_date is not None:
+        if isinstance(min_date, str):
+            s += f"DATE(time, 'unixepoch') >= '{sanitize(min_date)}' AND "
+        elif isinstance(min_date, int|float):
+            s += f"time >= {int(min_date)} AND "
+        else:
+            print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
+    if max_date is not None:
+        if isinstance(max_date, str):
+            s += f"DATE(time, 'unixepoch') <= '{sanitize(max_date)}' AND "
+        elif isinstance(max_date, int|float):
+            s += f"time <= {int(max_date)} AND "
+        else:
+            print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
+    if s == "":
+        print(f"WARNING: get_where_date_str: no date_str generated. Returning 'time > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
+        return "time > 0"
+    return s.removesuffix(" AND ")
+
+def is_valid_status(status: int):
+    if status >= 400: return False
+    if settings["status_300_is_success"] and status >= 300: return True
+    return status < 300
+
+#
+# GETTERS
+#
+def get_unique_visitor_ids_for_date(db: Database, date:str) -> list[int]:
+    return [ visitor_id[0] for visitor_id in db(f"SELECT DISTINCT visitor_id FROM request WHERE {date}") ]
+
+def append_human_visitors(db: Database, unique_visitor_ids, unique_visitor_ids_human: list):
+    """
+    for visitor in unique_visitor_ids:
+        if human -> append to unique_visitor_ids_human
+    """
+    for visitor_id in unique_visitor_ids:
+        db.execute(f"SELECT is_human FROM visitor WHERE visitor_id = {visitor_id}")
+        if db.fetchone()[0] == 1:
+            unique_visitor_ids_human.append(visitor_id)
+
+def get_unique_request_ids_for_date(db: Database, date_constraint:str):
+    return [ request_id[0] for request_id in db(f"SELECT DISTINCT request_id FROM request WHERE {date_constraint}")]
+
+def append_unique_request_ids_for_date_and_visitor(db: Database, date_constraint:str, visitor_id: int, unique_request_ids_human: list):
+    """append all unique requests for visitor_id at date_constraint to unique_request_ids_human"""
+    for request_id in db(f"SELECT DISTINCT request_id FROM request WHERE {date_constraint} AND visitor_id = {visitor_id}"):
+        unique_request_ids_human.append(request_id[0])
+
+# get number of requests per day
+def get_request_count_for_date(db: Database, date_constraint:str) -> int:
+    db.execute(f"SELECT COUNT(*) FROM request WHERE {date_constraint}")
+    return db.fetchone()[0]
+
+def get_unique_visitor_count(db: Database) -> int:
+    return sql_tablesize(db.cur, "visitor")
diff --git a/regina/data_visualization/visualize.py b/regina/data_visualization/visualize.py
new file mode 100644
index 0000000..96e295f
--- /dev/null
+++ b/regina/data_visualization/visualize.py
@@ -0,0 +1,365 @@
+# from sys import path
+# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
+import sqlite3 as sql
+from sys import exit
+from re import fullmatch
+import matplotlib.pyplot as plt
+from os.path import isdir
+from datetime import datetime as dt
+
+from numpy import empty
+# local
+from regina.database import Database
+from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where
+from regina.utility.utility import pdebug, warning, missing_arg
+from regina.utility.globals import settings
+from regina.data_visualization.utility import cleanup_referer, get_where_date_str, get_unique_visitor_ids_for_date, get_unique_request_ids_for_date, append_human_visitors, append_unique_request_ids_for_date_and_visitor
+from regina.data_visualization.ranking import get_city_and_country_ranking, get_platform_browser_mobile_rankings, get_ranking, cleanup_referer_ranking, get_route_ranking
+
+"""
+visualize information from the databse
+"""
+
+palette = {
+    "red": "#ee4035",
+    "orange": "#f37736",
+    "yellow": "#fdf458",
+    "green": "#7bc043",
+    "blue": "#0392cf",
+    "purple": "#b044a0",
+}
+color_settings_filetypes = {
+    palette["red"]: ["html", "php"],
+    palette["green"]: ["jpg", "png", "jpeg", "gif", "svg", "webp"],
+    palette["yellow"]: ["css"],
+    "grey": ["txt"]
+}
+color_settings_alternate = list(palette.values())
+
+color_settings_browsers = {
+    palette["red"]: ["Safari"],
+    palette["orange"]: ["Firefox"],
+    palette["yellow"]: ["Chrome"],
+    "grey": ["Edge"],
+    palette["green"]: ["Chromium"],
+    palette["purple"]: ["Brave"]
+}
+color_settings_platforms = {
+    palette["red"]: ["Mac"],
+    palette["green"]: ["Android"],
+    "grey": ["iPhone", "iPad"],
+    palette["yellow"]: ["Linux"],
+    palette["purple"]: ["BSD"],
+    palette["blue"]: ["Windows"],
+}
+
+
+def len_list_list(l: list[list]):
+    size = 0
+    for i in range(len(l)):
+        size += len(l[i])
+    return size
+
+
+#
+# PLOTTING
+#
+def add_vertikal_labels_in_bar_plot(labels, max_y_val, ax, bar_plot):
+    """
+    Add the label of the bar in or on top of the bar, depending on the bar size
+    """
+    # pdebug("add_vertikal_labels_in_bar_plot:", labels)
+    for idx,rect in enumerate(bar_plot):
+        height = rect.get_height()
+        if height > 0.6 * max_y_val:  # if the bar is large, put label in the bar
+            height = 0.05 * max_y_val
+        ax.text(rect.get_x() + rect.get_width()/2., height + 0.025 * max_y_val,
+                labels[idx],
+                ha='center', va='bottom', rotation=90)
+
+def add_labels_at_top_of_bar(xdata, ydata, max_y_val, ax, bar_plot):
+    """
+    add the height of the bar on the top of each bar
+    """
+    # pdebug("add_labels_at_top_of_bar:", xdata, ydata)
+    y_offset = 0.05 * max_y_val
+    for idx,rect in enumerate(bar_plot):
+        ax.text(rect.get_x() + rect.get_width()/2, ydata[idx] - y_offset, round(ydata[idx], 1), ha='center', bbox=dict(facecolor='white', alpha=0.8))
+
+def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[], figsize=None):
+    """
+    make a bar plot of the ranking
+    """
+    # pdebug(f"plot_ranking: ranking={ranking}")
+    if not fig:
+        fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
+    # create new axis if none is given
+    ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
+    # fill x y data
+    if len(ranking) > settings["file_ranking_plot_max_files"]:
+        start_index = len(ranking) - settings["file_ranking_plot_max_files"]
+    else: start_index = 0
+    x_names = []
+    y_counts = []
+    colors = []
+    for i in range(start_index, len(ranking)):
+        x_names.append(ranking[i][1])
+        y_counts.append(ranking[i][0])
+        ft = ranking[i][1].split(".")[-1]
+        color = palette["blue"]
+        # if not color_settings: color = palette["blue"]
+        if isinstance(color_settings, dict):
+            for key, val in color_settings.items():
+                if ft in val: color = key
+            if not color: color = palette["blue"]
+        elif isinstance(color_settings, list):
+            # print(color_settings, (i - start_index) % len(color_settings))
+            color = color_settings[(i - start_index) % len(color_settings)]
+        colors.append(color)
+    bar = ax.bar(x_names, y_counts, tick_label="", color=colors)
+
+    if len(y_counts) > 0:
+        add_vertikal_labels_in_bar_plot(x_names, y_counts[-1], ax, bar)
+        if settings["plot_add_count_label"]: add_labels_at_top_of_bar(x_names, y_counts, y_counts[-1], ax, bar)
+    # ax.ylabel(y_counts)
+    return fig
+
+
+# def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue", rotate_xlabel=0):
+#     if not fig:
+#         fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
+#     if not ax:
+#         ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
+#     else:
+#         ax = ax.twinx()
+#         ax.set_ylabel(ylabel)
+#         # ax.tick_params(axis="y", labelcolor="r")
+#     ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
+#     plt.xticks(rotation=rotate_xlabel)
+#     if label: ax.legend()
+#     return fig, ax
+
+def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major", rotate_xlabel=0, figsize=None):
+    if not fig:
+        fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
+    if not (ax1 and ax2):
+        ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
+        ax2 = ax1.twinx()
+        ax2.set_ylabel(ylabel2)
+    ax1.tick_params(axis="x", rotation=90)
+    plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
+    plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
+    # ax1.set_xticks(ax1.get_xticks())
+    # ax1.set_xticklabels(xdata, rotation=rotate_xlabel, rotation_mode="anchor")
+    # if label1 or label2: ax1.legend()
+    if plots: plots += plot1 + plot2
+    else: plots = plot1 + plot2
+    plt.legend(plots, [ l.get_label() for l in plots])
+
+    if grid == "major" or grid == "minor" or grid == "both":
+        if grid == "minor" or "both":
+            ax1.minorticks_on()
+        ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
+
+    return fig, ax1, ax2, plots
+
+
+#
+# MAIN
+#
+def visualize(db: Database):
+    """
+    This assumes sanity checks have been done
+    """
+    pdebug("visualizing...")
+    if not settings["db"]:          missing_arg("db")
+    if not settings["server_name"]: missing_arg("server_name")
+
+    img_dir = settings["img_dir"]
+    pdebug("img_dir:", img_dir)
+    img_filetype = settings["img_filetype"]
+    if isdir(img_dir) and img_filetype:
+        gen_img = True
+    else:
+        print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
+        gen_img = False
+
+    img_location = settings["img_location"]
+    names = {
+        # paths
+        "img_route_ranking_last_x_days": f"ranking_routes_last_x_days.{img_filetype}",
+        "img_referer_ranking_last_x_days": f"ranking_referers_last_x_days.{img_filetype}",
+        "img_countries_last_x_days": f"ranking_countries_last_x_days.{img_filetype}",
+        "img_cities_last_x_days": f"ranking_cities_last_x_days.{img_filetype}",
+        "img_browser_ranking_last_x_days": f"ranking_browsers_last_x_days.{img_filetype}",
+        "img_platform_ranking_last_x_days": f"ranking_platforms_last_x_days.{img_filetype}",
+        "img_visitors_and_requests_last_x_days": f"visitor_request_count_daily_last_x_days.{img_filetype}",
+
+        "img_route_ranking_total": f"ranking_routes_total.{img_filetype}",
+        "img_referer_ranking_total": f"ranking_referers_total.{img_filetype}",
+        "img_countries_total": f"ranking_countries_total.{img_filetype}",
+        "img_cities_total": f"ranking_cities_total.{img_filetype}",
+        "img_browser_ranking_total": f"ranking_browsers_total.{img_filetype}",
+        "img_platform_ranking_total": f"ranking_platforms_total.{img_filetype}",
+        "img_visitors_and_requests_total": f"visitor_request_count_daily_total.{img_filetype}",
+        # values
+        "mobile_visitor_percentage_total": 0.0,
+        "mobile_visitor_percentage_last_x_days": 0.0,
+        "visitor_count_last_x_days": 0,
+        "visitor_count_total": 0,
+        "request_count_last_x_days": 0,
+        "request_count_total": 0,
+        "human_visitor_percentage_last_x_days": 0.0,
+        "human_visitor_percentage_total": 0.0,
+        "human_request_percentage_last_x_days": 0.0,
+        "human_request_percentage_total": 0.0,
+        # general
+        "regina_version": settings["version"],
+        "server_name": settings["server_name"],
+        "last_x_days": settings["last_x_days"],  # must be after all the things with last_x_days!
+        "earliest_date": "1990-1-1",
+        "generation_date": "1990-1-1 0:0:0",
+    }
+
+    db = Database(database_path=settings["db"])
+
+    get_humans = settings["get_human_percentage"]
+    # pdebug(f"visualize: settings {settings}")
+    # DATE STRINGS
+    earliest_date = db.get_earliest_date()
+    names["earliest_date"] = dt.fromtimestamp(earliest_date).strftime("%Y-%m-%d")
+    names["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S")
+    # LAST_X_DAYS
+    # last_x_days_min_date: latest_date - last_x_days
+    secs_per_day = 86400
+    last_x_days_min_date = db.get_latest_date() - settings["last_x_days"] * secs_per_day
+    last_x_days_constraint = get_where_date_str(min_date=last_x_days_min_date)
+    last_x_days = db.get_days_where(last_x_days_constraint)
+    last_x_days_contraints = [get_where_date_str(at_date=day) for day in last_x_days]
+
+    # ALL DATES
+    all_time_constraint = get_where_date_str(min_date=0)
+    # all months in yyyy-mm format
+    months_all_time = db.get_months_where(all_time_constraint)
+    # sqlite constrict to month string
+    months_strs = []
+    for year_month in months_all_time:
+        year, month = year_month.split("-")
+        # first day of the month
+        min_date  = dt(int(year), int(month), 1).timestamp()
+        month = (int(month) % 12) + 1  # + 1 month
+        year = int(year)
+        if month == 1: year += 1
+        # first day of the next month - 1 sec
+        max_date = dt(year, month, 1).timestamp() - 1
+        months_strs.append(get_where_date_str(min_date=min_date, max_date=max_date))
+
+    for i in range(2):
+        suffix = ["_total", "_last_x_days"][i]
+        date_constraint = [all_time_constraint, last_x_days_constraint][i]
+        date_names = [months_all_time, last_x_days][i]
+        date_constraints = [months_strs, last_x_days_contraints][i]
+        assert(len(date_names) == len(date_constraints))
+
+        # FILES
+        # TODO handle groups
+        file_ranking = get_route_ranking(db, date_constraint)
+        if gen_img:
+            fig_file_ranking = plot_ranking(file_ranking, xlabel="Route Name", ylabel="Number of requests", color_settings=color_settings_filetypes, figsize=settings["plot_size_broad"])
+            fig_file_ranking.savefig(f"{img_dir}/{names[f'img_route_ranking{suffix}']}", bbox_inches="tight")
+
+        # REFERER
+        referer_ranking = get_ranking(db, "request", "referer", date_constraint, settings["referer_ranking_whitelist"], settings["referer_ranking_whitelist"])
+        pdebug("Referer ranking", referer_ranking)
+        cleanup_referer_ranking(referer_ranking)
+        if gen_img:
+            fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
+            fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_referer_ranking{suffix}']}", bbox_inches="tight")
+
+        # GEOIP
+        if settings["do_geoip_rankings"]:
+            city_ranking, country_ranking = get_city_and_country_ranking(db, require_humans=settings["geoip_only_humans"])
+            pdebug("Country ranking:", country_ranking)
+            pdebug("City ranking:", city_ranking)
+            if gen_img:
+                fig_referer_ranking = plot_ranking(country_ranking, xlabel="Country", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
+                fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_countries{suffix}']}", bbox_inches="tight")
+
+                fig_referer_ranking = plot_ranking(city_ranking, xlabel="City", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
+                fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_cities{suffix}']}", bbox_inches="tight")
+
+
+        # USER
+        # visitor_agent_ranking = get_visitor_agent_ranking(cur, date_str)
+        # for the time span
+        unique_visitor_ids = get_unique_visitor_ids_for_date(db, date_constraint)
+        unique_visitor_ids_human = []
+        append_human_visitors(db, unique_visitor_ids, unique_visitor_ids_human)
+        # for each date
+        date_count = len(date_constraints)
+        unique_visitor_ids_dates: list[list[int]] = []
+        unique_request_ids_dates: list[list[int]] = []
+        unique_visitor_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
+        unique_request_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
+        for i in range(date_count):
+            date_constraint_ = date_constraints[i]
+            unique_visitor_ids_dates.append(get_unique_visitor_ids_for_date(db, date_constraint_))
+            unique_request_ids_dates.append(get_unique_request_ids_for_date(db, date_constraint_))
+            if get_humans:
+                # empty_list = []
+                # unique_visitor_ids_human_dates.append(empty_list)
+                append_human_visitors(db, unique_visitor_ids_dates[i], unique_visitor_ids_human_dates[i])
+                # unique_request_ids_human_dates.append(list())
+                for human in unique_visitor_ids_human_dates[i]:
+                    append_unique_request_ids_for_date_and_visitor(db, date_constraint_, human, unique_request_ids_human_dates[i])
+        # print("\n\tuu", unique_visitor_ids_dates, "\n\tur",unique_request_ids_dates, "\n\tuuh", unique_visitor_ids_human_dates, "\n\turh", unique_request_ids_human_dates)
+        # pdebug("uui",   unique_visitor_ids)
+        # pdebug("uuih",  unique_visitor_ids_human)
+        # pdebug("uuid",  unique_visitor_ids_dates)
+        # pdebug("uuidh", unique_visitor_ids_human_dates)
+        # pdebug("urid",  unique_request_ids_dates)
+        # pdebug("uridh", unique_visitor_ids_human_dates)
+        # pdebug(f"human_visitor_precentage: len_list_list(visitor_ids)={len_list_list(unique_visitor_ids_dates)}, len_list_list(visitor_ids_human)={len_list_list(unique_visitor_ids_human_dates)}")
+        if get_humans:
+            try:
+                names[f"human_visitor_percentage{suffix}"] = round(100 * len_list_list(unique_visitor_ids_human_dates) / len_list_list(unique_visitor_ids_dates), 2)
+            except:
+                names[f"human_visitor_percentage{suffix}"] = -1.0
+            try:
+                names[f"human_request_percentage{suffix}"] = round(100 * len_list_list(unique_request_ids_human_dates) / len_list_list(unique_request_ids_dates), 2)
+            except:
+                names[f"human_request_percentage{suffix}"] = -1.0
+        names[f"visitor_count{suffix}"] = len_list_list(unique_visitor_ids_dates)
+        names[f"request_count{suffix}"] = len_list_list(unique_request_ids_dates)
+        if gen_img:
+            fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_dates], [len(request_ids) for request_ids in unique_request_ids_dates], xlabel="Date", ylabel1="Visitor count", label1="Unique visitors", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"], rotate_xlabel=-45, figsize=settings["plot_size_broad"])
+            if get_humans:
+                fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_human_dates], [len(request_ids) for request_ids in unique_request_ids_human_dates], label1="Unique visitors (human)", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots, rotate_xlabel=-45, figsize=settings["plot_size_broad"])
+            fig_daily.savefig(f"{img_dir}/{names[f'img_visitors_and_requests{suffix}']}", bbox_inches="tight")
+
+        # os & browser
+        platform_ranking, browser_ranking, names[f"mobile_visitor_percentage{suffix}"] = get_platform_browser_mobile_rankings(db, unique_visitor_ids_human)
+        if gen_img:
+            fig_os_rating = plot_ranking(platform_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_platforms, figsize=settings["plot_size_narrow"])
+            fig_os_rating.savefig(f"{img_dir}/{names[f'img_platform_ranking{suffix}']}", bbox_inches="tight")
+            fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browser", ylabel="Share [%]", color_settings=color_settings_browsers, figsize=settings["plot_size_narrow"])
+            fig_browser_rating.savefig(f"{img_dir}/{names[f'img_browser_ranking{suffix}']}", bbox_inches="tight")
+
+    # print("OS ranking", os_ranking)
+    # print("Browser ranking", browser_ranking)
+    # print("Mobile percentage", names["mobile_visitor_percentage"])
+    if settings["template_html"] and settings["html_out_path"]:
+        pdebug(f"visualize: writing to html: {settings['html_out_path']}")
+
+        with open(settings["template_html"], "r") as file:
+            html = file.read()
+        for name, value in names.items():
+            if "img" in name:
+                value = f"{img_location}/{value}"
+            if type(value) == float:
+                value = f"{value:.2f}"
+            html = html.replace(f"%{name}", str(value))
+        with open(settings["html_out_path"], "w") as file:
+            file.write(html)
+    else:
+        warning(f"Skipping html generation because either template_html or html_out_path is invalid: template_html='{settings['template_html']}', html_out_path='{settings['html_out_path']}'")
diff --git a/regina/db_operation/database.py b/regina/database.py
similarity index 87%
rename from regina/db_operation/database.py
rename to regina/database.py
index a4dacab..f62a1a7 100644
--- a/regina/db_operation/database.py
+++ b/regina/database.py
@@ -12,15 +12,14 @@ if __name__ == "__main__":  # make relative imports work as described here: http
         import sys
         from os import path
         filepath = path.realpath(path.abspath(__file__))
-        print(path.dirname(path.dirname(path.dirname(filepath))))
-        sys.path.insert(0, path.dirname(path.dirname(path.dirname(filepath))))
+        sys.path.insert(0, path.dirname(path.dirname(filepath)))
 
 # local
-from .utility.sql_util import replace_null, sanitize, sql_select, sql_exists
-from .utility.utility import pdebug, get_filepath, warning, pmessage
-from .utility.globals import settings
-from .db_operation.request import Request
-from .utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
+from regina.utility.sql_util import replace_null, sanitize, sql_select, sql_exists
+from regina.utility.utility import pdebug, get_filepath, warning, pmessage, is_blacklisted, is_whitelisted
+from regina.utility.globals import settings
+from regina.data_collection.request import Request
+from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings
 
 """
 create reginas database as shown in the uml diagram database.uxf
@@ -36,13 +35,17 @@ class Database:
             pdebug(f"Database.__init__: Creating database at {database_path}")
             with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file:
                 create_db = file.read()
-            self.cur.execute(create_db)
+            self.cur.executescript(create_db)
             self.conn.commit()
 
     def __call__(self, s):
         """execute a command and return fetchall()"""
         self.cur.execute(s)
         return self.cur.fetchall()
+    def execute(self, s):
+        self.cur.execute(s)
+    def fetchone(self):
+        return self.cur.fetchone()
 
     #
     # VISITOR
@@ -160,9 +163,10 @@ class Database:
     def add_requests(self, requests: list[Request]):
         added_requests = 0
         # check the new visitors later
-        request_blacklist = settings["request_location_regex_blacklist"]
         new_visitors = []
         for i in range(len(requests)):
+            if     is_blacklisted(requests[i].request_route, settings["request_route_blacklist"]): continue
+            if not is_whitelisted(requests[i].request_route, settings["request_route_whitelist"]): continue
             visitor = self.add_request(requests[i])
             if visitor:
                 new_visitors.append(visitor)
@@ -267,12 +271,15 @@ class Database:
         assert(type(city_id_val) == int)
         return city_id_val
 
+
     def update_geoip_tables(self, geoip_city_csv_path: str):
         """
         update the geoip data with the contents of the geoip_city_csv file
 
         Make sure to update the visitor.ip_range_id column for all visitors.
-        In case something changed, they might point to a different city. (won't fix)
+        In case something changed, they might point to a different city.
+
+        TODO: update teh visitor.ip_range_id column to match (potentially) new city ip range
         """
         # indices for the csv
         FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5
@@ -331,5 +338,43 @@ class Database:
             if combine_range_country_id >= 0:  # last range , append
                 add_range(combine_range_low, combine_range_high, f"City in {combine_range_country_name}", f"Region in {combine_range_country_name}", combine_range_country_id)
 
+
+    #
+    # REQUEST
+    #
+    # TIME/DATE
+    def get_earliest_date(self) -> int:
+        """return the earliest time as unixepoch"""
+        date = self(f"SELECT MIN(time) FROM request")[0][0]
+        if not isinstance(date, int): return 0
+        else: return date
+
+    def get_latest_date(self) -> int:
+        """return the latest time as unixepoch"""
+        date = self(f"SELECT MAX(time) FROM request")[0][0]
+        if not isinstance(date, int): return 0
+        else: return date
+
+    def get_months_where(self, date_constraint:str) -> list[str]:
+        """get a list of all dates in yyyy-mm format
+        @param date_constraint parameter sqlite constraint
+        """
+        dates = self.get_days_where(date_constraint)
+        date_dict = {}
+        for date in dates:
+            date_without_day = date[0:date.rfind('-')]
+            date_dict[date_without_day] = 0
+        return list(date_dict.keys())
+
+    def get_days_where(self, date_constraint:str) -> list[str]:
+        """get a list of all dates in yyyy-mm-dd format
+        @param date_constraint parameter sqlite constraint
+        """
+        days = [ date[0] for date in self(f"SELECT DISTINCT DATE(time, 'unixepoch') FROM request WHERE {date_constraint}") ]  # fetchall returns tuples (date, ) 
+        days.sort()
+        return days
+
+
+
 if __name__ == '__main__':
     db = Database("test.db")
diff --git a/regina/db_operation/__init__.py b/regina/db_operation/__init__.py
deleted file mode 100644
index 0185ded..0000000
--- a/regina/db_operation/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Gather analytics from nginx access logs and visualize them through generated images and a generated html"""
-# __package__ = 'regina'
-import regina.utility
-
-from importlib import resources
-# ip2nation_db_path = resources.path("regina", "ip2nation.db")
diff --git a/regina/db_operation/visualize.py b/regina/db_operation/visualize.py
deleted file mode 100644
index 92e47bf..0000000
--- a/regina/db_operation/visualize.py
+++ /dev/null
@@ -1,666 +0,0 @@
-# from sys import path
-# print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
-import sqlite3 as sql
-from sys import exit
-from re import fullmatch
-import matplotlib.pyplot as plt
-from os.path import isdir
-from datetime import datetime as dt
-
-from numpy import empty
-# local
-from regina.db_operation.database import Database, t_request, t_visitor, t_file, t_filegroup, t_ip_range, t_city, t_country
-from regina.utility.sql_util import sanitize, sql_select, sql_exists, sql_insert, sql_tablesize, sql_get_count_where
-from regina.utility.utility import pdebug, warning, missing_arg
-from regina.utility.globals import settings
-
-"""
-visualize information from the databse
-"""
-
-palette = {
-    "red": "#ee4035",
-    "orange": "#f37736",
-    "yellow": "#fdf458",
-    "green": "#7bc043",
-    "blue": "#0392cf",
-    "purple": "#b044a0",
-}
-color_settings_filetypes = {
-    palette["red"]: ["html"],
-    palette["green"]: ["jpg", "png", "jpeg", "gif", "svg", "webp"],
-    palette["yellow"]: ["css"],
-    "grey": ["txt"]
-}
-color_settings_alternate = list(palette.values())
-
-color_settings_browsers = {
-    palette["red"]: ["Safari"],
-    palette["orange"]: ["Firefox"],
-    palette["yellow"]: ["Chrome"],
-    "grey": ["Edge"],
-    palette["green"]: ["Chromium"],
-    palette["purple"]: ["Brave"]
-}
-color_settings_operating_systems = {
-    palette["red"]: ["Mac"],
-    palette["green"]: ["Android"],
-    "grey": ["iPhone", "iPad"],
-    palette["yellow"]: ["Linux"],
-    palette["purple"]: ["BSD"],
-    palette["blue"]: ["Windows"],
-}
-
-
-def len_list_list(l: list[list]):
-    size = 0
-    for i in range(len(l)):
-        size += len(l[i])
-    return size
-
-def valid_status(status: int):
-    if status >= 400: return False
-    if settings["status_300_is_success"] and status >= 300: return True
-    return status < 300
-
-#
-# FILTERS
-#
-def get_os_browser_mobile_rankings(db: Database, visitor_ids: list[int]):
-    """
-    returns [(count, operating_system)], [(count, browser)], mobile_visitor_percentage
-    """
-    os_ranking = {}
-    os_count = 0.0
-    browser_ranking = {}
-    browser_count = 0.0
-    mobile_ranking = { True: 0.0, False: 0.0 }
-    for visitor_id in visitor_ids:
-        os, browser, mobile = db(f"SELECT platform,browser,mobile FROM {t_visitor} WHERE visitor_id = {visitor_id}")[0]
-        mobile = bool(mobile)
-        if os:
-            if os in os_ranking: os_ranking[os] += 1
-            else: os_ranking[os] = 1
-            os_count += 1
-        if browser:
-            if browser in browser_ranking: browser_ranking[browser] += 1
-            else: browser_ranking[browser] = 1
-            browser_count += 1
-        if (os or browser):
-            mobile_ranking[mobile] += 1
-    try:
-        mobile_visitor_percentage = mobile_ranking[True] / (mobile_ranking[True] + mobile_ranking[False])
-    except ZeroDivisionError:
-        mobile_visitor_percentage = 0.0
-
-    os_ranking =  [(c * 100/os_count, n) for n, c in os_ranking.items()]
-    os_ranking.sort()
-    browser_ranking = [(c * 100/browser_count, n) for n, c in browser_ranking.items()]
-    browser_ranking.sort()
-    return os_ranking, browser_ranking, mobile_visitor_percentage*100
-
-#
-# GETTERS
-#
-def get_where_date_str(at_date=None, min_date=None, max_date=None):
-    # dates in unix time
-    s = ""
-    if at_date is not None:
-        if isinstance(at_date, str):
-            s += f"DATE(date, 'unixepoch') = '{sanitize(at_date)}' AND "
-        elif isinstance(at_date, int|float):
-            s += f"date = {int(at_date)} AND "
-        else:
-            print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
-    if min_date is not None:
-        if isinstance(min_date, str):
-            s += f"DATE(date, 'unixepoch') >= '{sanitize(min_date)}' AND "
-        elif isinstance(min_date, int|float):
-            s += f"date >= {int(min_date)} AND "
-        else:
-            print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
-    if max_date is not None:
-        if isinstance(max_date, str):
-            s += f"DATE(date, 'unixepoch') <= '{sanitize(max_date)}' AND "
-        elif isinstance(max_date, int|float):
-            s += f"date <= {int(max_date)} AND "
-        else:
-            print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
-    if s == "":
-        print(f"WARNING: get_where_date_str: no date_str generated. Returning 'date > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
-        return "date > 0"
-    return s.removesuffix(" AND ")
-
-
-# get the earliest date
-def get_earliest_date(db: Database) -> int:
-    """return the earliest time as unixepoch"""
-    date = db(f"SELECT MIN(date) FROM {t_request}")[0][0]
-    if not isinstance(date, int): return 0
-    else: return date
-
-# get the latest date
-def get_latest_date(db: Database) -> int:
-    """return the latest time as unixepoch"""
-    date = db(f"SELECT MAX(date) FROM {t_request}")[0][0]
-    if not isinstance(date, int): return 0
-    else: return date
-
-# get all dates
-# the date:str parameter in all these function must be a sqlite constraint
-def get_days(db: Database, date:str) -> list[str]:
-    """get a list of all dates in yyyy-mm-dd format"""
-    days = [ date[0] for date in db(f"SELECT DISTINCT DATE(date, 'unixepoch') FROM {t_request} WHERE {date}")]  # fetchall returns tuples (date, ) 
-    days.sort()
-    return days
-
-def get_months(db: Database, date:str) -> list[str]:
-    """get a list of all dates in yyyy-mm format"""
-    dates = get_days(db, date)
-    date_dict = {}
-    for date in dates:
-        date_without_day = date[0:date.rfind('-')]
-        date_dict[date_without_day] = 0
-    return list(date_dict.keys())
-
-
-def get_visitor_agent(db: Database, visitor_id: int):
-    return sql_select(db.cur, t_visitor, [("visitor_id", visitor_id)])[0][2]
-
-def get_unique_visitor_ids_for_date(db: Database, date:str) -> list[int]:
-    return [ visitor_id[0] for visitor_id in db(f"SELECT DISTINCT visitor_id FROM {t_request} WHERE {date}") ]
-
-def get_human_visitors(db: Database, unique_visitor_ids, unique_visitor_ids_human: list):
-    """
-    check if they have a known platform AND browser
-    check if at least one request did not result in an error (http status >= 400)
-    """
-    for visitor_id in unique_visitor_ids:
-        cur.execute(f"SELECT is_human FROM {t_visitor} WHERE visitor_id = {visitor_id}")
-        # if not visitor
-        if cur.fetchone()[0] == 0:
-            # pdebug(f"get_human_visitors: {visitor_id}, is_human is 0")
-            continue
-        else:
-            # pdebug(f"get_human_visitors: {visitor_id}, is_human is non-zero")
-            pass
-
-        # visitor is human
-        unique_visitor_ids_human.append(visitor_id)
-    # pdebug("get_human_visitors: (2)", unique_visitor_ids_human)
-
-def get_unique_request_ids_for_date(db: Database, date:str):
-    cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date}")
-    return [ request_id[0] for request_id in cur.fetchall()]
-
-def get_unique_request_ids_for_date_and_visitor(db: Database, date:str, visitor_id: int, unique_request_ids_human: list):
-    cur.execute(f"SELECT DISTINCT request_id FROM {t_request} WHERE {date} AND visitor_id = {visitor_id}")
-    # all unique requests for visitor_id
-    for request_id in cur.fetchall():
-        unique_request_ids_human.append(request_id[0])
-
-# get number of requests per day
-def get_request_count_for_date(db: Database, date:str) -> int:
-    cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE {date}")
-    return cur.fetchone()[0]
-
-def get_unique_visitor_count(db: Database) -> int:
-    return sql_tablesize(cur, t_visitor)
-
-
-
-#
-# RANKINGS
-#
-def get_file_ranking(db: Database, date:str) -> list[tuple[int, str]]:
-    global settings
-    """
-    :returns [(request_count, groupname)]
-    """
-    ranking = []
-    cur.execute(f"SELECT group_id, groupname FROM {t_filegroup}")
-    for group in cur.fetchall():
-        group_id = group[0]
-        # filename = sql_select(cur, t_file, [("group_id", group)])
-        # if len(filename) == 0: continue
-        # filename = filename[0][0]
-        filename = group[1]
-        if settings["file_ranking_regex_whitelist"]:  # if file in whitelist
-            if not fullmatch(settings["file_ranking_regex_whitelist"], filename):
-                pdebug(f"get_file_ranking: file with group_id {group_id} is not in whitelist")
-                continue
-        if settings["file_ranking_ignore_error_files"]:  # if request to file was successful
-            success = False
-            cur.execute(f"SELECT status FROM {t_request} WHERE group_id = {group_id}")
-            for status in cur.fetchall():
-                if valid_status(status[0]):
-                    pdebug(f"get_file_ranking: success code {status[0]} for file with group_id {group_id} and groupname {filename}")
-                    success = True
-                    break
-            if not success:
-                pdebug(f"get_file_ranking: file with group_id {group_id} and groupname {filename} has only requests resulting in error")
-                continue
-
-
-        # ranking.append((sql_get_count_where(cur, t_request, [("group_id", group)]), filename))
-        cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE group_id = {group_id} AND {date}")
-        ranking.append((cur.fetchone()[0], filename))
-    ranking.sort()
-    # print(ranking)
-    return ranking
-
-def get_visitor_agent_ranking(db: Database, date:str) -> list[tuple[int, str]]:
-    """
-    :returns [(request_count, visitor_agent)]
-    """
-    ranking = []
-    cur.execute(f"SELECT DISTINCT visitor_id FROM {t_request} WHERE {date}")
-    for visitor_id in cur.fetchall():
-        visitor_id = visitor_id[0]
-        visitor_agent = sql_select(cur, t_visitor, [("visitor_id", visitor_id)])
-        if len(visitor_agent) == 0: continue
-        visitor_agent = visitor_agent[0][2]
-        if settings["visitor_agent_ranking_regex_whitelist"]:
-            if not fullmatch(settings["visitor_agent_ranking_regex_whitelist"], visitor_agent):
-                continue
-        # ranking.append((sql_get_count_where(cur, t_request, [("group_id", group)]), filename))
-        cur.execute(f"SELECT COUNT(*) FROM {t_request} WHERE visitor_id = {visitor_id} AND {date}")
-        ranking.append((cur.fetchone()[0], visitor_agent))
-    ranking.sort()
-    # print(ranking)
-    return ranking
-
-def get_request_ranking(field_name: str, table: str, whitelist_regex: str, db: Database, date_condition:str) -> list[tuple[int, str]]:
-    """
-    1) get all the distinct entries for field_name after min_date_unix_time
-    2) call get_name_function with the distinct entry
-    3) for every entry, get the count in table after min_date_unix_time
-    3) sort by count in ascending order
-    :returns [(request_count, name)]
-    """
-    ranking = []
-    cur.execute(f"SELECT DISTINCT {field_name} FROM {table} WHERE {date_condition}")
-    for name in cur.fetchall():
-        name = name[0]
-        if whitelist_regex:
-            if not fullmatch(whitelist_regex, name):
-                continue
-        # ranking.append((sql_get_count_where(cur, t_request, [("group_id", group)]), filename))
-        cur.execute(f"SELECT COUNT(*) FROM {table} WHERE {field_name} = '{name}' AND {date_condition}")
-        ranking.append((cur.fetchone()[0], name))
-    ranking.sort()
-    # print(ranking)
-    return ranking
-
-# re_uri_protocol = f"(https?)://"
-re_uri_protocol = f"(https?://)?"
-re_uri_ipv4 = r"(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?)"
-# re_uri_ipv6 = ""
-re_uri_domain = r"(?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})"
-re_uri_location = r"(?:/(.*))?"
-re_uri_full = f"{re_uri_protocol}({re_uri_domain}|{re_uri_ipv4})({re_uri_location})"
-# (https?://)?((?:([^/]+\.)*[^/]+\.[a-zA-Z]{2,})|(?:(?:(?:\d{1,3}\.?){4})(?::\d+)?))((?:/(.*))?)
-
-def cleanup_referer(referer: str) -> str:
-    """
-    split the referer uri into its parts and reassemeble them depending on settings
-    """
-    m = fullmatch(re_uri_full, referer)
-    if not m:
-        warning(f"cleanup_referer: Could not match referer '{referer}'")
-        return referer
-    # pdebug(f"cleanup_referer: {referer} - {m.groups()}")
-    protocol = m.groups()[0]
-    subdomains = m.groups()[2]
-    if not subdomains: subdomains = ""
-    domain = m.groups()[1].replace(subdomains, "")
-    location = m.groups()[3]
-
-    referer = domain
-    if settings["referer_ranking_ignore_tld"]:
-        if len(domain.split(".")) == 2:  # if domain.tld
-            referer = domain.split(".")[0]
-    if not settings["referer_ranking_ignore_subdomain"]: referer = subdomains + referer
-    if not settings["referer_ranking_ignore_protocol"]: referer = protocol + referer
-    if not settings["referer_ranking_ignore_location"]: referer += location
-    # pdebug(f"cleanup_referer: cleaned up: {referer}")
-    return referer
-
-def cleanup_referer_ranking(referer_ranking: list[tuple[int, str]]):
-    unique_referers = dict()
-    for count, referer in referer_ranking:
-        referer = cleanup_referer(referer)
-        if referer in unique_referers:
-            unique_referers[referer] += count
-        else:
-            unique_referers[referer] = count
-    referer_ranking.clear()
-    for referer, count in unique_referers.items():
-        referer_ranking.append((count, referer))
-    referer_ranking.sort()
-
-def get_city_and_country_ranking(cur:sql.Cursor, require_humans=True, regex_city_blacklist="", regex_country_blacklist=""):
-    sql_cmd = f"SELECT ci.name, c.code, c.name FROM {t_country} AS c, {t_city} as ci, {t_visitor} as u, {t_ip_range} as i WHERE u.ip_range_id = i.ip_range_id AND i.city_id = ci.city_id AND ci.country_id = c.country_id"
-    if require_humans: sql_cmd += " AND u.is_human = 1"
-    cur.execute(sql_cmd)
-    pdebug(f"get_city_and_country_ranking: require_humans={require_humans}, regex_city_blacklist='{regex_city_blacklist}', regex_country_blacklist='{regex_country_blacklist}'")
-    cities = cur.fetchall()
-    cities_dict = {}
-    country_dict = {}
-    pdebug(f"get_city_and_country_ranking: found {len(cities)} ip_ranges")
-
-    validate_city_cmd = lambda _ : True
-    validate_country_cmd = lambda _ : True
-    if len(regex_city_blacklist) > 0: validate_city_cmd = lambda city : fullmatch(regex_city_blacklist, city) is None
-    if len(regex_country_blacklist) > 0 : validate_country_cmd = lambda country : fullmatch(regex_country_blacklist, country) is None
-    for i in range(len(cities)):
-        if cities[i][0] in cities_dict:
-            cities_dict[cities[i][0]][0] += 1
-        else:
-            if validate_city_cmd(cities[i][0]):
-                cities_dict[cities[i][0]] = [1, cities[i][1], cities[i][2]]  # count, country code
-        if cities[i][2] in country_dict:
-            country_dict[cities[i][2]] += 1
-        else:
-            if validate_country_cmd(cities[i][2]):
-                country_dict[cities[i][2]] = 1  # count, country code
-    city_ranking = [(v[0], f"{k} ({v[1]})") for k,v in cities_dict.items()]
-    city_ranking.sort()
-    country_ranking = [(v, k) for k,v in country_dict.items()]
-    country_ranking.sort()
-    return city_ranking, country_ranking
-
-#
-# PLOTTING
-#
-# add value labels
-def add_vertikal_labels_in_bar_plot(labels, max_y_val, ax, bar_plot):
-    # pdebug("add_vertikal_labels_in_bar_plot:", labels)
-    for idx,rect in enumerate(bar_plot):
-        height = rect.get_height()
-        if height > 0.6 * max_y_val:  # if the bar is large, put label in the bar
-            height = 0.05 * max_y_val
-        ax.text(rect.get_x() + rect.get_width()/2., height + 0.025 * max_y_val,
-                labels[idx],
-                ha='center', va='bottom', rotation=90)
-# add count labels
-def add_labels_at_top_of_bar(xdata, ydata, max_y_val, ax, bar_plot):
-    # pdebug("add_labels_at_top_of_bar:", xdata, ydata)
-    y_offset = 0.05 * max_y_val
-    for idx,rect in enumerate(bar_plot):
-        ax.text(rect.get_x() + rect.get_width()/2, ydata[idx] - y_offset, round(ydata[idx], 1), ha='center', bbox=dict(facecolor='white', alpha=0.8))
-
-def plot_ranking(ranking: list[tuple[int, str]], fig=None, xlabel="", ylabel="", color_settings:dict|list=[], figsize=None):
-    """
-    make a bar plot of the most requested files
-    """
-    # pdebug(f"plot_ranking: ranking={ranking}")
-    if not fig:
-        fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
-    # create new axis if none is given
-    ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
-    # fill x y data
-    if len(ranking) > settings["file_ranking_plot_max_files"]:
-        start_index = len(ranking) - settings["file_ranking_plot_max_files"]
-    else: start_index = 0
-    x_names = []
-    y_counts = []
-    colors = []
-    for i in range(start_index, len(ranking)):
-        x_names.append(ranking[i][1])
-        y_counts.append(ranking[i][0])
-        ft = ranking[i][1].split(".")[-1]
-        color = palette["blue"]
-        # if not color_settings: color = palette["blue"]
-        if isinstance(color_settings, dict):
-            for key, val in color_settings.items():
-                if ft in val: color = key
-            if not color: color = palette["blue"]
-        elif isinstance(color_settings, list):
-            # print(color_settings, (i - start_index) % len(color_settings))
-            color = color_settings[(i - start_index) % len(color_settings)]
-        colors.append(color)
-    bar = ax.bar(x_names, y_counts, tick_label="", color=colors)
-
-    if len(y_counts) > 0:
-        add_vertikal_labels_in_bar_plot(x_names, y_counts[-1], ax, bar)
-        if settings["plot_add_count_label"]: add_labels_at_top_of_bar(x_names, y_counts, y_counts[-1], ax, bar)
-    # ax.ylabel(y_counts)
-    return fig
-
-
-# def plot(xdata, ydata, fig=None, ax=None, xlabel="", ylabel="", label="", linestyle='-', marker="", color="blue", rotate_xlabel=0):
-#     if not fig:
-#         fig = plt.figure(figsize=None, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
-#     if not ax:
-#         ax = fig.add_subplot(xlabel=xlabel, ylabel=ylabel)
-#     else:
-#         ax = ax.twinx()
-#         ax.set_ylabel(ylabel)
-#         # ax.tick_params(axis="y", labelcolor="r")
-#     ax.plot(xdata, ydata, marker=marker, label=label, linestyle=linestyle, color=color)
-#     plt.xticks(rotation=rotate_xlabel)
-#     if label: ax.legend()
-#     return fig, ax
-
-def plot2y(xdata, ydata1, ydata2, fig=None, ax1=None, ax2=None, plots=None, xlabel="", ylabel1="", ylabel2="", label1="", label2="", linestyle='-', marker="", color1="blue", color2="orange", grid="major", rotate_xlabel=0, figsize=None):
-    if not fig:
-        fig = plt.figure(figsize=figsize, dpi=settings["plot_dpi"], linewidth=1.0, frameon=True, subplotpars=None, layout=None)
-    if not (ax1 and ax2):
-        ax1 = fig.add_subplot(xlabel=xlabel, ylabel=ylabel1)
-        ax2 = ax1.twinx()
-        ax2.set_ylabel(ylabel2)
-    ax1.tick_params(axis="x", rotation=90)
-    plot1 = ax1.plot(xdata, ydata1, marker=marker, label=label1, linestyle=linestyle, color=color1)
-    plot2 = ax2.plot(xdata, ydata2, marker=marker, label=label2, linestyle=linestyle, color=color2)
-    # ax1.set_xticks(ax1.get_xticks())
-    # ax1.set_xticklabels(xdata, rotation=rotate_xlabel, rotation_mode="anchor")
-    # if label1 or label2: ax1.legend()
-    if plots: plots += plot1 + plot2
-    else: plots = plot1 + plot2
-    plt.legend(plots, [ l.get_label() for l in plots])
-
-    if grid == "major" or grid == "minor" or grid == "both":
-        if grid == "minor" or "both":
-            ax1.minorticks_on()
-        ax1.grid(visible=True, which=grid, linestyle="-", color="#888")
-
-    return fig, ax1, ax2, plots
-
-
-#
-# MAIN
-#
-
-def visualize(loaded_settings: dict):
-    pdebug("visualizing...")
-    global settings
-    settings = loaded_settings
-    if not settings["db"]: missing_arg("db")
-    if not settings["server_name"]: missing_arg("server_name")
-
-    img_dir = settings["img_dir"]
-    pdebug("img_dir:", img_dir)
-    img_filetype = settings["img_filetype"]
-    img_location = settings["img_location"]
-    names = {
-        # paths
-        "img_file_ranking_last_x_days": f"ranking_files_last_x_days.{img_filetype}",
-        "img_referer_ranking_last_x_days": f"ranking_referers_last_x_days.{img_filetype}",
-        "img_countries_last_x_days": f"ranking_countries_last_x_days.{img_filetype}",
-        "img_cities_last_x_days": f"ranking_cities_last_x_days.{img_filetype}",
-        "img_browser_ranking_last_x_days": f"ranking_browsers_last_x_days.{img_filetype}",
-        "img_operating_system_ranking_last_x_days": f"ranking_operating_systems_last_x_days.{img_filetype}",
-        "img_visitors_and_requests_last_x_days": f"visitor_request_count_daily_last_x_days.{img_filetype}",
-
-        "img_file_ranking_total": f"ranking_files_total.{img_filetype}",
-        "img_referer_ranking_total": f"ranking_referers_total.{img_filetype}",
-        "img_countries_total": f"ranking_countries_total.{img_filetype}",
-        "img_cities_total": f"ranking_cities_total.{img_filetype}",
-        "img_browser_ranking_total": f"ranking_browsers_total.{img_filetype}",
-        "img_operating_system_ranking_total": f"ranking_operating_systems_total.{img_filetype}",
-        "img_visitors_and_requests_total": f"visitor_request_count_daily_total.{img_filetype}",
-        # values
-        "mobile_visitor_percentage_total": 0.0,
-        "mobile_visitor_percentage_last_x_days": 0.0,
-        "visitor_count_last_x_days": 0,
-        "visitor_count_total": 0,
-        "request_count_last_x_days": 0,
-        "request_count_total": 0,
-        "human_visitor_percentage_last_x_days": 0.0,
-        "human_visitor_percentage_total": 0.0,
-        "human_request_percentage_last_x_days": 0.0,
-        "human_request_percentage_total": 0.0,
-        # general
-        "regina_version": settings["version"],
-        "server_name": settings["server_name"],
-        "last_x_days": settings["last_x_days"],  # must be after all the things with last_x_days!
-        "earliest_date": "1990-1-1",
-        "generation_date": "1990-1-1 0:0:0",
-    }
-
-    conn = sql.connect(settings["db"])
-    if isdir(img_dir) and img_filetype:
-        gen_img = True
-    else:
-        print(f"Warning: Not generating images since at least one required variable is invalid: img_dir='{img_dir}', img_filetype='{img_filetype}'")
-        gen_img = False
-    cur = conn.cursor()
-
-    get_humans = settings["get_human_percentage"]
-    # pdebug(f"visualize: settings {settings}")
-    # DATE STRINGS
-    earliest_date = get_earliest_date(cur)
-    names["earliest_date"] = dt.fromtimestamp(earliest_date).strftime("%Y-%m-%d")
-    names["generation_date"] = dt.now().strftime("%Y-%m-%d %H:%M:%S")
-    # LAST_X_DAYS
-    # last_x_days_min_date: latest_date - last_x_days
-    secs_per_day = 86400
-    last_x_days_min_date = get_latest_date(cur) - settings["last_x_days"] * secs_per_day
-    last_x_days_str = get_where_date_str(min_date=last_x_days_min_date)
-    days = get_days(cur, last_x_days_str)
-    days_strs = [get_where_date_str(at_date=day) for day in days]
-
-    # ALL DATES
-    all_time_str = get_where_date_str(min_date=0)
-    # all months in yyyy-mm format
-    months_all_time = get_months(cur, all_time_str)
-    # sqlite constrict to month string
-    months_strs = []
-    for year_month in months_all_time:
-        year, month = year_month.split("-")
-        # first day of the month
-        min_date  = dt(int(year), int(month), 1).timestamp()
-        month = (int(month) % 12) + 1  # + 1 month
-        year = int(year)
-        if month == 1: year += 1
-        # first day of the next month - 1 sec
-        max_date = dt(year, month, 1).timestamp() - 1
-        months_strs.append(get_where_date_str(min_date=min_date, max_date=max_date))
-
-    for i in range(2):
-        suffix = ["_total", "_last_x_days"][i]
-        date_str = [all_time_str, last_x_days_str][i]
-        date_names = [months_all_time, days][i]
-        date_strs = [months_strs, days_strs][i]
-        assert(len(date_names) == len(date_strs))
-
-        # FILES
-        file_ranking = get_file_ranking(cur, date_str)
-        if gen_img:
-            fig_file_ranking = plot_ranking(file_ranking, xlabel="Filename/Filegroup", ylabel="Number of requests", color_settings=color_settings_filetypes, figsize=settings["plot_size_broad"])
-            fig_file_ranking.savefig(f"{img_dir}/{names[f'img_file_ranking{suffix}']}", bbox_inches="tight")
-
-        # REFERER
-        referer_ranking = get_request_ranking("referer", t_request, settings["referer_ranking_regex_whitelist"], cur, date_str)
-        pdebug("Referer ranking", referer_ranking)
-        cleanup_referer_ranking(referer_ranking)
-        if gen_img:
-            fig_referer_ranking = plot_ranking(referer_ranking, xlabel="HTTP Referer", ylabel="Number of requests", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
-            fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_referer_ranking{suffix}']}", bbox_inches="tight")
-
-        # GEOIP
-        if settings["do_geoip_rankings"]:
-            city_ranking, country_ranking = get_city_and_country_ranking(cur, require_humans=settings["geoip_only_humans"], regex_city_blacklist=settings["city_ranking_regex_blacklist"], regex_country_blacklist=settings["country_ranking_regex_blacklist"])
-            pdebug("Country ranking:", country_ranking)
-            pdebug("City ranking:", city_ranking)
-            if gen_img:
-                fig_referer_ranking = plot_ranking(country_ranking, xlabel="Country", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
-                fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_countries{suffix}']}", bbox_inches="tight")
-
-                fig_referer_ranking = plot_ranking(city_ranking, xlabel="City", ylabel="Number of visitors", color_settings=color_settings_alternate, figsize=settings["plot_size_broad"])
-                fig_referer_ranking.savefig(f"{img_dir}/{names[f'img_cities{suffix}']}", bbox_inches="tight")
-
-
-        # USER
-        # visitor_agent_ranking = get_visitor_agent_ranking(cur, date_str)
-        # for the time span
-        unique_visitor_ids = get_unique_visitor_ids_for_date(cur, date_str)
-        unique_visitor_ids_human = []
-        get_human_visitors(cur, unique_visitor_ids, unique_visitor_ids_human)
-        # for each date
-        date_count = len(date_strs)
-        unique_visitor_ids_dates: list[list[int]] = []
-        unique_request_ids_dates: list[list[int]] = []
-        unique_visitor_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
-        unique_request_ids_human_dates: list[list[int]] = [[] for _ in range(date_count)]
-        for i in range(date_count):
-            date_str_ = date_strs[i]
-            unique_visitor_ids_dates.append(get_unique_visitor_ids_for_date(cur, date_str_))
-            unique_request_ids_dates.append(get_unique_request_ids_for_date(cur, date_str_))
-            if get_humans:
-                # empty_list = []
-                # unique_visitor_ids_human_dates.append(empty_list)
-                get_human_visitors(cur, unique_visitor_ids_dates[i], unique_visitor_ids_human_dates[i])
-                # unique_request_ids_human_dates.append(list())
-                for human in unique_visitor_ids_human_dates[i]:
-                    get_unique_request_ids_for_date_and_visitor(cur, date_str_, human, unique_request_ids_human_dates[i])
-        # print("\n\tuu", unique_visitor_ids_dates, "\n\tur",unique_request_ids_dates, "\n\tuuh", unique_visitor_ids_human_dates, "\n\turh", unique_request_ids_human_dates)
-        # pdebug("uui",   unique_visitor_ids)
-        # pdebug("uuih",  unique_visitor_ids_human)
-        # pdebug("uuid",  unique_visitor_ids_dates)
-        # pdebug("uuidh", unique_visitor_ids_human_dates)
-        # pdebug("urid",  unique_request_ids_dates)
-        # pdebug("uridh", unique_visitor_ids_human_dates)
-        # pdebug(f"human_visitor_precentage: len_list_list(visitor_ids)={len_list_list(unique_visitor_ids_dates)}, len_list_list(visitor_ids_human)={len_list_list(unique_visitor_ids_human_dates)}")
-        if get_humans:
-            try:
-                names[f"human_visitor_percentage{suffix}"] = round(100 * len_list_list(unique_visitor_ids_human_dates) / len_list_list(unique_visitor_ids_dates), 2)
-            except:
-                names[f"human_visitor_percentage{suffix}"] = -1.0
-            try:
-                names[f"human_request_percentage{suffix}"] = round(100 * len_list_list(unique_request_ids_human_dates) / len_list_list(unique_request_ids_dates), 2)
-            except:
-                names[f"human_request_percentage{suffix}"] = -1.0
-        names[f"visitor_count{suffix}"] = len_list_list(unique_visitor_ids_dates)
-        names[f"request_count{suffix}"] = len_list_list(unique_request_ids_dates)
-        if gen_img:
-            fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_dates], [len(request_ids) for request_ids in unique_request_ids_dates], xlabel="Date", ylabel1="Visitor count", label1="Unique visitors", ylabel2="Request count", label2="Unique requests", color1=palette["red"], color2=palette["blue"], rotate_xlabel=-45, figsize=settings["plot_size_broad"])
-            if get_humans:
-                fig_daily, ax1, ax2, plots = plot2y(date_names, [len(visitor_ids) for visitor_ids in unique_visitor_ids_human_dates], [len(request_ids) for request_ids in unique_request_ids_human_dates], label1="Unique visitors (human)", label2="Unique requests (human)", color1=palette["orange"], color2=palette["green"], fig=fig_daily, ax1=ax1, ax2=ax2, plots=plots, rotate_xlabel=-45, figsize=settings["plot_size_broad"])
-            fig_daily.savefig(f"{img_dir}/{names[f'img_visitors_and_requests{suffix}']}", bbox_inches="tight")
-
-        # os & browser
-        os_ranking, browser_ranking, names[f"mobile_visitor_percentage{suffix}"] = get_os_browser_mobile_rankings(cur, unique_visitor_ids_human)
-        if gen_img:
-            fig_os_rating = plot_ranking(os_ranking, xlabel="Platform", ylabel="Share [%]", color_settings=color_settings_operating_systems, figsize=settings["plot_size_narrow"])
-            fig_os_rating.savefig(f"{img_dir}/{names[f'img_operating_system_ranking{suffix}']}", bbox_inches="tight")
-            fig_browser_rating = plot_ranking(browser_ranking, xlabel="Browsers", ylabel="Share [%]", color_settings=color_settings_browsers, figsize=settings["plot_size_narrow"])
-            fig_browser_rating.savefig(f"{img_dir}/{names[f'img_browser_ranking{suffix}']}", bbox_inches="tight")
-
-    # print("OS ranking", os_ranking)
-    # print("Browser ranking", browser_ranking)
-    # print("Mobile percentage", names["mobile_visitor_percentage"])
-    if settings["template_html"] and settings["html_out_path"]:
-        pdebug(f"visualize: writing to html: {settings['html_out_path']}")
-
-        with open(settings["template_html"], "r") as file:
-            html = file.read()
-        for name, value in names.items():
-            if "img" in name:
-                value = f"{img_location}/{value}"
-            html = html.replace(f"%{name}", str(value))
-        with open(settings["html_out_path"], "w") as file:
-            file.write(html)
-    else:
-        warning(f"Skipping html generation because either template_html or html_out_path is invalid: template_html='{settings['template_html']}', html_out_path='{settings['html_out_path']}'")
diff --git a/regina/default.cfg b/regina/default.cfg
new file mode 100644
index 0000000..738a095
--- /dev/null
+++ b/regina/default.cfg
@@ -0,0 +1,155 @@
+# ************************************* REGINA CONFIGURATION **************************************
+#                       .__
+# _______   ____   ____ |__| ____ _____
+# \_  __ \_/ __ \ / ___\|  |/    \\__  \
+# |  | \/\  ___// /_/  >  |   |  \/ __ \_
+# |__|    \___  >___  /|__|___|  (____  /
+#             \/_____/         \/     \/
+# ************************************************************************************************* 
+[ regina ]
+# name of the server or website
+# will be available as variable for the the generated website as %server_name
+# string
+server_name = 
+
+# database path. if not specified, use xdg-data-home/regina/<server-name> 
+# eg: /home/my_user/regina/my_website.db
+# 
+# path or empty
+database =
+
+[ data-collection ]
+# path to the nginx access log to parse
+# eg: /var/log/nginx/access.log
+# path (read permissions)
+access_log =
+
+# FILE GROUPING
+# nginx locations and their root directory: location:directory,location:directory,...
+# eg: /:/www/my_website,/error:/www/error
+locs_and_dirs = 
+# filetypes that should be grouped (comma separated)
+# eg: png,jpg,jpeg,gif,svg,css,ico,pdf,txt
+auto_group_filetypes = 
+# group certain files
+# eg: home:index.html,home.html;images:image1.png,image2.png
+# PATHS
+[ data-visualization ]
+# template html input
+# eg: /home/my_visitor/.regina/template.html
+# path (read permissions)
+template_html = 
+# output for the generated html
+# eg: /www/analytics/statistics.html
+# path (write permissions)
+html_out_path = 
+
+# output directory for the generated plots
+# WARNING: you have to create the directory yourself, regina will not create it
+# eg: /www/analytics/images
+# path (directory with write permissions)
+img_out_dir = 
+
+# nginx location for the generated images, its root must be img_out_dir
+# eg: images
+img_location = 
+#
+# if the root for your server is /www/analytics and html_out_path is /www/analytics/analytics.html,
+# use img_dir = /www/analytics/images and img_location = /images
+[ route_groups ]
+images = 
+    *.gif
+    *.jpeg
+    *.jpg
+    *.png
+    *.svg
+
+# HUMAN DETECTION
+# wether a request with 30x http status counts as success
+status_300_is_success = False
+# if False, unique visitor is (ip-address - visitor agent) pair, if True only ip addess
+unique_visitor_is_ip_address = False
+# wether a visitor needs to make at least 1 successful request to be a human
+human_needs_success = True
+
+# dont collect requests to locations fully match this
+# eg: /analytics.*
+request_location_regex_blacklist =
+
+[ geoip ]
+# GEOIP
+get_visitor_location = False
+# this option is relevant used when --update-geoip is used
+# list if capitalized ISO 3166-1 alpha-2 country codes for which the location needs to be resolved at city level, not country level
+# for EU, use: get_cities_for_countries = AT, BE, BG, HR, CY, CZ, DK, EE, FI, FR, DE, GZ, HU, IE, IT, LV, LT, LU, MT, NL, PL, PT, RO, SK, SI, ES, SE
+get_cities_for_countries =  
+
+# hash_ip_address = False
+
+
+# ***************************************** VISUALIZATION ***************************************** 
+# these changes can be changed at any point in time as they only affect the visualization of the data
+# ************************************************************************************************* 
+[ visualization ]
+
+# separate visitors into all and humans 
+# True/False
+get_human_percentage = True
+
+# GEOIP
+# generate a country and city ranking
+# True/False
+do_geoip_rankings = False
+
+# only use humans for geoip rankings
+# True/False
+geoip_only_humans = True
+
+# eg exclude unknown cities: City in .*
+# regex
+city_ranking_regex_blacklist = City in .*
+
+# True/False
+country_ranking_regex_blacklist =
+
+# ignore the protocol in referers, so https://url.com = http://url.com -> url.com
+referer_ranking_ignore_protocol = True
+
+# ignore the subdomains in referers, so foo.url.com = bar.url.com -> url.com
+referer_ranking_ignore_subdomain = False
+
+# ignore the location in referers, so url.com/foo = url.com/bar -> url.com
+referer_ranking_ignore_location = True
+
+# regex expression as whitelist for referer ranking, minus means empty
+# eg exclude empty referers: ^[^\-].*
+referer_ranking_regex_whitelist = ^[^\-].*
+
+# regex expression as whitelist for file ranking
+# eg .*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif)) to only show these files
+# regex
+route_ranking_regex_whitelist =
+
+# maximum number of route (group)s on the file ranking
+# int
+route_ranking_plot_max_routes = 20
+
+# wether to ignore non existing files in the ranking
+# True/False
+route_ranking_ignore_error_files = True
+
+# int
+plot_dpi = 300
+
+# affects visitor/request count plot, geoip rankings, file ranking and referer ranking
+plot_size_broad = 14, 5
+
+# affects platform and browser ranking
+plot_size_narrow = 7, 5
+
+
+# ******************************************** REGINA ********************************************* 
+# these settings affect the behavior of regina
+# ************************************************************************************************* 
+# print lots! of debug messages to help you find problems
+debug = False
diff --git a/regina/generated-default.cfg b/regina/generated-default.cfg
new file mode 100644
index 0000000..3cb6213
--- /dev/null
+++ b/regina/generated-default.cfg
@@ -0,0 +1,166 @@
+# ************************************* REGINA CONFIGURATION **************************************
+#                           .__
+#     _______   ____   ____ |__| ____ _____
+#     \_  __ \_/ __ \ / ___\|  |/    \\__  \
+#     |  | \/\  ___// /_/  >  |   |  \/ __ \_
+#     |__|    \___  >___  /|__|___|  (____  /
+#                 \/_____/         \/     \/
+#     *************************************************************************************************
+
+# Common Settings
+[ regina ]
+# name (not url) of the server or website
+# will be avaiable as variable for the generated html as %server_name
+# type: string
+# server_name = my_website
+server_name = 
+
+# database path
+# type: file (read, write permissions)
+# database = /home/my_user/regina/my_website.db
+database = 
+
+# path to the nginx access log to parse
+# type: file (read permissions)
+# access_log = /var/log/nginx/access.log
+access_log = 
+
+
+# The template and generated file do actually have to be htmls, you can change it to whatever you want
+[ html-generation ]
+# type: True/False
+generate_html = True
+
+# template html input
+# type: file (read permissions)
+# template_html = /home/my_visitor/.regina/template.html
+template_html = 
+
+# output for the generated html
+# type: file (write permissions)
+# html_out_path = /www/analytics/statistics.html
+html_out_path = 
+
+# output directory for the generated plots
+# type: directory (write permissions)
+# img_out_dir = /www/analytics/images
+img_out_dir = 
+
+# nginx location for the generated images (this has to map to img_out_dir)
+# type: eg: images
+# img_location = /images
+img_location = 
+
+
+# These settings affect the data collection. If changed, they will affect how the database is being filled in the future.
+[ data-collection ]
+# whether a unique visitor is only identified by IP address
+# type: True/False
+unique_visitor_is_ip_address = 
+
+# whether a visitor needs at least one successful request to be a human
+# type: True/False
+human_needs_success = True
+
+# whether a request with 30x HTTP status counts as successful request
+# type: True/False
+status_300_is_success = True
+
+# delete all ip addresses after the collection is done
+# type: True/False
+delete_ip_addresses = True
+
+# don't collect requests to locations that match this regex
+# type: regexp, None, int or string
+# request_location_blacklist = /analytics.*
+request_location_blacklist = 
+
+# whether to get visitor location information
+# type: True/False
+get_visitor_location = 
+
+# whether to generate country and city rankings using GeoIP (requires GeoIP Database)
+# type: True/False
+do_geoip_rankings = 
+
+# countries for which the GeoIP needs to be resolved at city level
+# type: list of capitalized ISO 3166-1 alpha-2 country codes
+# get_cities_for_countries = AT, BE, BG, HR, CY, CZ, DK, EE, FI, FR, DE, GZ, HU, IE, IT, LV, LT, LU, MT, NL, PL, PT, RO, SK, SI, ES, SE
+get_cities_for_countries = 
+
+# whether to use only humans for GeoIP rankings (requires GeoIP Database)
+# type: True/False
+geoip_only_humans = True
+
+
+[ rankings ]
+# Explanation for blacklists and whitelists:
+#     If a blacklist is given: values that fully match the blacklist are excluded
+#     If a whitelist is given: values that do not fully match the whitelist are excluded
+#     Both are optional: you can provide, none or both
+
+# type: regexp or None
+# city_ranking_blacklist = City in .*
+city_ranking_blacklist = 
+
+# type: regexp or None
+city_ranking_whitelist = 
+
+# type: regexp or None
+country_ranking_blacklist = 
+
+# type: regexp or None
+country_ranking_whitelist = 
+
+# type: regexp or None
+# route_ranking_blacklist = .*\.((css)|(txt))
+route_ranking_blacklist = 
+
+# type: regexp or None
+# route_ranking_whitelist = .*\.((php)|(html)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))
+route_ranking_whitelist = 
+
+# maximum number of entries in route ranking
+# type: int
+route_ranking_plot_max_routes = 20
+
+# whether to ignore non-existing routes in ranking
+# type: True/False
+route_ranking_ignore_404 = True
+
+# type: regexp or None
+# referer_ranking_blacklist = Example: exclude '-' (nginx sets this when there is no referer)
+referer_ranking_blacklist = -
+
+# type: regexp or None
+referer_ranking_whitelist = 
+
+# whether to ignore protocol in referer ranking (if True: https://domain.com == http://domain.com -> domain.com)
+# type: True/False
+referer_ranking_ignore_protocol = True
+
+# whether to ignore subdomains inreferer ranking (if True: sub.domain.com == another.sub2.domain.com -> domain.com)
+# type: True/False
+referer_ranking_ignore_subdomain = 
+
+# whether to ignore route in referer ranking (if True: domain.com/route1 == domain.com/route2 -> domain.com)
+# type: True/False
+referer_ranking_ignore_route = True
+
+
+[ plots ]
+# DPI for plots
+# type: int
+plot_dpi = 300
+
+# plot size for broad plots: width, heigh
+# type: int, int
+plot_size_broad = 14, 5
+
+# plot size for narrow plots: width, height
+# type: int, int
+plot_size_narrow = 7, 5
+
+# *************************************************************************************************
+#     https://git.quintern.xyz/MatthiasQuintern/regina
+#     *************************************************************************************************
\ No newline at end of file
diff --git a/regina/main.py b/regina/main.py
index fe28c24..486a6a5 100644
--- a/regina/main.py
+++ b/regina/main.py
@@ -5,18 +5,19 @@ from sys import argv, exit
 from os.path import isfile
 import sqlite3 as sql
 
-if __name__ == "__main__":
+import argparse
+
+if __name__ == "__main__":  # make relative imports work as described here: https://peps.python.org/pep-0366/#proposed-change
     if __package__ is None:
-        # make relative imports work as described here: https://peps.python.org/pep-0366/#proposed-change
         __package__ = "regina"
         import sys
         from os import path
         filepath = path.realpath(path.abspath(__file__))
         sys.path.insert(0, path.dirname(path.dirname(filepath)))
 
-from .db_operation.collect import parse_log, add_requests_to_db, update_ip_range_id
-from .db_operation.database import create_db, update_geoip_tables, t_visitor
-from .db_operation.visualize import visualize
+from .data_collection.parse_log import parse_log
+from .database import Database
+from .data_visualization import visualize
 from .utility.settings_manager import read_settings_file
 from .utility.globals import settings, version
 from .utility.utility import pmessage
@@ -74,81 +75,56 @@ def error(arg):
     print("Error:", arg)
     exit(1)
 
-def main():
-    config_file = ""
-    collect = False
-    visualize_ = False
-    log_file = ""
-    geoip_city_csv = ""
-    # parse args
-    i = 1
-    while i in range(1, len(argv)):
-        if argv[i] in ["--config", "-c"]:
-            if len(argv) > i + 1: config_file = argv[i+1]
-            else: missing_arg_val(argv[i])
-        elif argv[i] == "--log-file":
-            if len(argv) > i + 1: log_file = argv[i+1]
-            else: missing_arg_val(argv[i])
-        if argv[i] == "--update-geoip":
-            if len(argv) > i + 1: geoip_city_csv = argv[i+1]
-            else: missing_arg_val(argv[i])
-        elif argv[i] in ["--help", "-h"]:
-            help()
-            exit(0)
-        elif argv[i] == "--collect":
-            collect = True
-        elif argv[i] == "--visualize":
-            visualize_ = True
-        else:
-            pass
-        i += 1
-    if not (collect or visualize_ or geoip_city_csv):
-        missing_arg("--visualize or --collect or --update-geoip")
 
-    if not config_file:
-        missing_arg("--config")
-    if not isfile(config_file):
-        error(f"Not a file: '{config_file}'")
-    read_settings_file(config_file, settings)
+def main2():
+    parser = argparse.ArgumentParser(prog="regina")
+    parser.add_argument("--config", "-c",   action="store",         help="path to a config file that specifies all the other parameters", metavar="config-file", required=True)
+    parser.add_argument("--update-geoip",   action="store",         help="path to IP-COUNTRY-REGION-CITY database in csv format", metavar="geoip-csv")
+    parser.add_argument("--visualize",      action="store_true",    help="generate the visualization website")
+    parser.add_argument("--collect",        action="store_true",    help="fill the database from the nginx access log")
+    parser.add_argument("--log-file",       action="store",         help="use alternate logfile than what is set in the config file", metavar="log-file")
+    args = parser.parse_args()
+
+    if not (args.collect or args.visualize or args.update_geoip):
+        parser.error("at least one of --visualize, --collect, or --update-geoip is required.")
+
+    if not path.isfile(args.config):
+        parser.error(f"invalid path to configuration file: '{args.config}'")
+
+    read_settings_file(args.config, settings)
     settings["version"] = version
-    if log_file: settings["access_log"] = log_file
 
+    if args.log_file:
+        settings["access_log"] = args.log_file
 
-    if not settings["server_name"]: missing_arg("server-name")
-    if not settings["access_log"]: missing_arg("log")
-    if not settings["db"]: missing_arg("db")
-    if isinstance(settings["auto_group_filetypes"], str):
-        settings["auto_group_filetypes"] = settings["auto_group_filetypes"].split(",")
-    if isinstance(settings["locs_and_dirs"], str):
-        settings["locs_and_dirs"] = [ loc_and_dir.split(":") for loc_and_dir in settings["locs_and_dirs"].split(",") ]
+    if not settings["server_name"]:
+        error("'server-name' is missing in the configuration file.")
 
-    if not isfile(config_file):
-        error(f"Not a file: '{config_file}'")
+    if not settings["access_log"]:
+        error("'log' is missing in the configuration file.")
 
+    if not settings["db"]:
+        error("'db' is missing in the configuration file.")
 
-    if not isfile(settings["db"]):
-        create_db(settings["db"], settings["filegroups"], settings["locs_and_dirs"], settings["auto_group_filetypes"])
+    db = Database(settings["db"])
+    # if not isfile(settings["db"]):
+    #     create_db(settings["db"], settings["filegroups"], settings["locs_and_dirs"], settings["auto_group_filetypes"])
 
-    if geoip_city_csv:
-        if not isfile(geoip_city_csv):
-            error(f"Not a file: '{geoip_city_csv}'")
-        conn = sql.connect(settings['db'], isolation_level=None)  # required vor vacuum
-        cur = conn.cursor()
-        update_geoip_tables(cur, geoip_city_csv)
+    if args.update_geoip:
+        if not isfile(args.update_geoip):
+            error(f"Not a file: '{args.update_geoip}'")
+        db.update_geoip_tables(args.update_geoip)
         # update visitors
-        for visitor_id in range(sql_tablesize(cur, t_visitor)):
-            update_ip_range_id(cur, visitor_id)
-        cur.close()
-        conn.commit()
-        conn.close()
-    if collect:
+        for (visitor_id) in db(f"SELECT visitor_id FROM visitor"):
+            db.update_ip_range_id(visitor_id)
+    if args.collect:
         pmessage(f"regina version {version} with server-name '{settings['server_name']}', database '{settings['db']}' and logfile '{settings['access_log']}'")
         requests = parse_log(settings["access_log"])
-        add_requests_to_db(requests, settings["db"])
-    if visualize_:
+        db.add_requests(requests)
+    if args.visualize:
         pmessage(f"regina version {version} with server-name '{settings['server_name']}', database '{settings['db']}'")
         if not isfile(settings["db"]): error(f"Invalid database path: '{settings['db']}'")
         visualize(settings)
 
 if __name__ == '__main__':
-    main()
+    main2()
diff --git a/regina/sql/create_db.sql b/regina/sql/create_db.sql
index 9fc4821..a712aac 100644
--- a/regina/sql/create_db.sql
+++ b/regina/sql/create_db.sql
@@ -67,5 +67,5 @@ CREATE TABLE IF NOT EXISTS city(
 CREATE TABLE IF NOT EXISTS country(
     country_id  INTEGER PRIMARY KEY,
     name        TEXT UNIQUE,
-    code        TEXT UNIQUE,
+    code        TEXT UNIQUE
 ) STRICT;
diff --git a/regina/test.db b/regina/test.db
new file mode 100644
index 0000000..a8a5796
Binary files /dev/null and b/regina/test.db differ
diff --git a/regina/todo.py b/regina/todo.py
new file mode 100644
index 0000000..1a0768d
--- /dev/null
+++ b/regina/todo.py
@@ -0,0 +1,34 @@
+
+
+def get_files_from_dir_rec(p: str, files: list[str]):
+    """recursivly append all files to files"""
+    pdebug("get_files_from_dir_rec:",p)
+    if path.isfile(p):
+        files.append(p)
+    elif path.isdir(p):
+        for p_ in listdir(p):
+            get_files_from_dir_rec(p + "/" + p_, files)
+
+
+def create_filegroups(cursor: sql.Cursor, filegroup_str: str):
+    """
+    TODO: make re-usable (alter groups when config changes)
+    """
+    # filegroup_str: 'name1: file1, file2, file3; name2: file33'
+    groups = filegroup_str.strip(";").split(";")
+    pdebug("create_filegroups:", groups)
+    for group in groups:
+        name, vals = group.split(":")
+        # create/get group
+        if sql_exists(cursor, "", [("groupname", name)]):
+            group_id = sql_select(cursor, "", [("groupname", name)])[0][0]
+        else:
+            group_id = sql_max(cursor, "", "group_id") + 1
+            sql_insert(cursor, "", [(group_id, name)])
+        # pdebug("create_filegroups: group_id", group_id)
+        # create/edit file
+        for filename in vals.split(","):
+            if sql_exists(cursor, "", [("filename", filename)]):  # if exist, update
+                cursor.execute(f"UPDATE file SET group_id = {group_id} WHERE filename = 'fil'")
+            else:
+                sql_insert(cursor, "", [[filename, group_id]])
diff --git a/regina/utility/globals.py b/regina/utility/globals.py
index 395ead2..50ce528 100644
--- a/regina/utility/globals.py
+++ b/regina/utility/globals.py
@@ -2,57 +2,9 @@
 
 import os
 
-version = "1.0"
+version = "2.0"
 
-# default settings, these are overwriteable through a config file
-settings = {
-    # GENERAL
-    "server_name": "default_sever",
-    # DATA COLLECTION
-    "access_log": "",
-    "db": "",
-    "locs_and_dirs": [],
-    "auto_group_filetypes": [],
-    "filegroups": "",
-    "request_location_regex_blacklist": "",
-    "request_is_same_on_same_day": True,  # mutiple requests from same visitor to same file at same day are counted as 1
-    "unique_visitor_is_ip_address": False,
-    "get_visitor_location": False,
-    "get_cities_for_countries": [""],  # list if country codes for which the ip address ranges need to be collected at city level, not country level
-    "hash_ip_address": True,
 
-    # VISUALIZATION
-    "get_human_percentage": False,
-    "human_needs_success": True,  # a human must have at least 1 successful request (status < 300)
-    "status_300_is_success": False,  # 300 codes are success
-    "do_geoip_rankings": False,
-    "geoip_only_humans": True,
-    "city_ranking_regex_blacklist": "",
-    "country_ranking_regex_blacklist": "",
-    # "file_ranking_regex_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
-    "file_ranking_regex_whitelist": r".*\.(html)",
-    "file_ranking_ignore_error_files": False,  # skip files that only had unsuccessful requests (status < 300)
-    "referer_ranking_ignore_protocol": True,
-    "referer_ranking_ignore_subdomain": False,
-    "referer_ranking_ignore_location": True,
-    "referer_ranking_ignore_tld": False,
-    "referer_ranking_regex_whitelist": r"^[^\-].*",  # minus means empty
-    "visitor_agent_ranking_regex_whitelist": r"",
-    "file_ranking_plot_max_files": 15,
-    # "plot_figsize": (60, 40),
-    "plot_dpi": 300,
-    "plot_add_count_label": True,
-    "plot_size_broad": (10, 5),
-    "plot_size_narrow": (6.5, 5),
-    "img_dir": "",
-    "img_location": "",
-    "img_filetype": "svg",
-    "template_html": "",
-    "html_out_path": "",
-    "last_x_days": 30,
-    # regina
-    "debug": False
-}
 
 # these oses and browser can be detected:
 # lower element takes precedence
diff --git a/regina/utility/settings_manager.py b/regina/utility/settings_manager.py
index 92c0300..03ac648 100644
--- a/regina/utility/settings_manager.py
+++ b/regina/utility/settings_manager.py
@@ -1,3 +1,298 @@
+from configparser import ConfigParser
+
+"""
+Classes and methods for managing regina configuration
+
+Using CFG_File and CFG_Entry, you set defaults and type restrictions for
+a dictionary like ReginaSettings object and also export the defaults as a .cfg file
+"""
+
+def comment(s):
+    return "# " + s.replace("\n", "\n# ").strip("# ")
+
+# for eventual later type checking
+class regexp:
+    """
+    represents a regular expression
+    """
+    pass
+
+class Path:
+    """
+    represents a path
+    """
+    def __init__(self, permissions="r", is_dir=False):
+        self.is_dir = is_dir
+        self.permissions = permissions
+    def __repr__(self):
+        if self.is_dir:
+            s = "directory"
+        else:
+            s = "file"
+
+        if self.permissions:
+            s += " ("
+            if "r" in self.permissions: s += "read, "
+            if "w" in self.permissions: s += "write, "
+            if "x" in self.permissions: s += "execute, "
+            s = s[:-2] + " permissions)"
+        return s
+
+
+class CFG_Entry:
+    """
+    key - value pair in a cfg file
+    extra parameters for comments on top of the key - value pair
+    """
+    types = str|Path|None|type[regexp]|type[str]|type[bool]|type[int]
+    def __init__(self, key, dflt=None, typ_: types|list[types]|tuple[types] =str, desc="", exam=""):   # all 4 letters -> nice indent
+        """
+        @param typ: type for the value:
+            use list of types if multiple types are allowed
+            use tuple of types for tuple of types
+        """
+        self.key = key
+        self.default = dflt
+        self.type_ = typ_
+        self.descripton= desc
+        self.example = exam
+
+    def type_str(self):
+        def _type_str(t):
+            if type(t) == str:          return t
+            if t is None:               return "None"
+            if t == str:    return "string"
+            if t == bool:   return "True/False"
+            if t == int:    return "int"
+            if t == float:  return "float"
+            if t == regexp: return "regexp"
+            if type(t) == Path:         return str(t)
+            try:
+                return t.__name__
+            except AttributeError:
+                return str(t)
+
+        s = ""
+        if type(self.type_) == list:
+            for i in range(len(self.type_)):
+                s += _type_str(self.type_[i])
+                if i < len(self.type_) - 2: s += ", "
+                elif i == len(self.type_) - 2: s += " or "
+        elif type(self.type_) == tuple:
+            for i in range(len(self.type_)):
+                s += _type_str(self.type_[i])
+                if i < len(self.type_) - 1: s += ", "
+        else:
+            s = _type_str(self.type_)
+        return s
+
+    def __repr__(self):
+        s = ""
+        if self.descripton: s += f"{comment(self.descripton)}\n"
+        if self.type_:      s += f"{comment('type: ' + self.type_str())}\n"
+        # if self.example:    s += f"{comment('eg: ' + self.example)}\n"
+        if self.example:    s += comment(f"{self.key} = {self.example}\n")
+        s += f"{self.key} = "
+        if self.default:    s += f"{self.default}"
+        s += "\n"
+        return s
+
+
+class CFG_File:
+    """
+    represents a cfg file
+    use the __repr__ method to export to a file
+    """
+    def __init__(self, header="", footer=""):
+        self.sections = []  # (name, desc, entries)
+        self.header = header
+        self.footer = footer
+
+    def add_section(self, name:str, entries: list[CFG_Entry|str], desc=""):
+        self.sections.append((name, desc, entries))
+
+    def __repr__(self):
+        s = comment(self.header) + "\n"
+
+        for name, desc, entries in self.sections:
+            if desc:    s += f"\n{comment(desc)}"
+            s += f"\n[ {name} ]\n"
+            for entry in entries:
+                s += f"{entry}\n"
+        s += comment(self.footer)
+        return s
+
+
+if __name__ == "__main__":
+    cfg = CFG_File(header=r"""
+    ************************************* REGINA CONFIGURATION **************************************
+                          .__
+    _______   ____   ____ |__| ____ _____
+    \_  __ \_/ __ \ / ___\|  |/    \\__  \
+    |  | \/\  ___// /_/  >  |   |  \/ __ \_
+    |__|    \___  >___  /|__|___|  (____  /
+                \/_____/         \/     \/
+    ************************************************************************************************* """.strip(" \n"), footer=r"""
+    *************************************************************************************************
+    https://git.quintern.xyz/MatthiasQuintern/regina
+    *************************************************************************************************
+    """.strip(" \n"))
+    cfg.add_section("regina", desc="Common Settings", entries=[
+        CFG_Entry("server_name",
+                desc="name (not url) of the server or website\nwill be avaiable as variable for the generated html as %server_name",
+                typ_=str,
+                exam="my_website"),
+        CFG_Entry("database",
+                desc="database path",
+                typ_=Path(permissions="rw"),
+                exam="/home/my_user/regina/my_website.db"),
+        CFG_Entry("access_log",
+                desc="path to the nginx access log to parse",
+                typ_=Path(permissions="r"),
+                exam="/var/log/nginx/access.log"),
+        ])
+
+    cfg.add_section("html-generation", desc="The template and generated file do actually have to be htmls, you can change it to whatever you want", entries=[
+        CFG_Entry("generate_html",
+                typ_=bool,
+                dflt=True),
+        CFG_Entry("template_html",
+                desc="template html input",
+                typ_=Path(permissions="r"),
+                exam="/home/my_visitor/.regina/template.html"),
+        CFG_Entry("html_out_path",
+                desc="output for the generated html",
+                typ_=Path(permissions="w"),
+                exam="/www/analytics/statistics.html"),
+        CFG_Entry("img_out_dir",
+                desc="output directory for the generated plots",
+                typ_=Path(permissions="w", is_dir=True),
+                exam="/www/analytics/images"),
+        CFG_Entry("img_location",
+                desc="nginx location for the generated images (this has to map to img_out_dir)",
+                typ_="eg: images",
+                exam="/images"),
+        ])
+
+
+    cfg.add_section("data-collection", desc="These settings affect the data collection. If changed, they will affect how the database is being filled in the future.", entries=[
+        CFG_Entry("unique_visitor_is_ip_address",
+                dflt=False,
+                desc="whether a unique visitor is only identified by IP address",
+                typ_=bool),
+        CFG_Entry("human_needs_success",
+                dflt=True,
+                desc="whether a visitor needs at least one successful request to be a human",
+                typ_=bool),
+        CFG_Entry("status_300_is_success",
+                dflt=True,
+                desc="whether a request with 30x HTTP status counts as successful request",
+                typ_=bool),
+
+        CFG_Entry("delete_ip_addresses",  # TODO: Implement
+                dflt=True,
+                desc="delete all ip addresses after the collection is done",
+                typ_=bool),
+
+        CFG_Entry("request_location_blacklist",
+                desc="don't collect requests to locations that match this regex",
+                typ_=[regexp, None],
+                exam="/analytics.*"),
+        CFG_Entry("get_visitor_location",
+                dflt=False,
+                desc="whether to get visitor location information",
+                typ_=bool),
+
+        CFG_Entry("do_geoip_rankings",  # TODO: is used?
+                dflt=False,
+                desc="whether to generate country and city rankings using GeoIP (requires GeoIP Database)",
+                typ_=bool),
+        CFG_Entry("get_cities_for_countries",
+                desc="countries for which the GeoIP needs to be resolved at city level",
+                typ_="list of capitalized ISO 3166-1 alpha-2 country codes",
+                exam="AT, BE, BG, HR, CY, CZ, DK, EE, FI, FR, DE, GZ, HU, IE, IT, LV, LT, LU, MT, NL, PL, PT, RO, SK, SI, ES, SE"),
+        CFG_Entry("geoip_only_humans", # TODO: is used?
+                dflt=True,
+                desc="whether to use only humans for GeoIP rankings (requires GeoIP Database)",
+                typ_=bool),
+        ])
+
+# cfg.add_section("data-visualization", desc="", entries=[
+
+    cfg.add_section("rankings", desc="", entries=[
+        comment("""
+    Explanation for blacklists and whitelists:
+    If a blacklist is given: values that fully match the blacklist are excluded
+    If a whitelist is given: values that do not fully match the whitelist are excluded
+    Both are optional: you can provide, none or both
+        """.strip("\n")),
+        CFG_Entry("city_ranking_blacklist",
+                typ_=[regexp, None],
+                exam="City in .*"),
+        CFG_Entry("city_ranking_whitelist",
+                typ_=[regexp, None]),
+        CFG_Entry("country_ranking_blacklist",
+                typ_=[regexp, None]),
+        CFG_Entry("country_ranking_whitelist",
+                typ_=[regexp, None]),
+
+        CFG_Entry("route_ranking_blacklist",
+                typ_=[regexp, None],
+                exam=r".*\.((css)|(txt))"),
+        CFG_Entry("route_ranking_whitelist",
+                typ_=[regexp, None],
+                exam=r".*\.((php)|(html)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))"),
+        CFG_Entry("route_ranking_plot_max_routes",
+                dflt=20,
+                desc="maximum number of entries in route ranking",
+                typ_=int),
+        CFG_Entry("route_ranking_ignore_404",
+                dflt=True,
+                desc="whether to ignore non-existing routes in ranking",
+                typ_=bool),
+        # TODO add groups
+        # Entry("route_groups",
+                # desc="route groups for images",
+                # typ_=[regexp, None],
+                # exam="*.gif, *.jpeg, *.jpg, *.png, *.svg".replace(", ", "\n")),
+
+        CFG_Entry("referer_ranking_blacklist",
+                dflt="-",
+                typ_=[regexp, None],
+                exam="Example: exclude '-' (nginx sets this when there is no referer)"),
+        CFG_Entry("referer_ranking_whitelist",
+                typ_=[regexp, None]),
+        CFG_Entry("referer_ranking_ignore_protocol",
+                dflt=True,
+                desc="whether to ignore protocol in referer ranking (if True: https://domain.com == http://domain.com -> domain.com)",
+                typ_=bool),
+        CFG_Entry("referer_ranking_ignore_subdomain",
+                dflt=False,
+                desc="whether to ignore subdomains inreferer ranking (if True: sub.domain.com == another.sub2.domain.com -> domain.com)",
+                typ_=bool),
+        CFG_Entry("referer_ranking_ignore_route",
+                dflt=True,
+                desc="whether to ignore route in referer ranking (if True: domain.com/route1 == domain.com/route2 -> domain.com)",
+                typ_=bool),
+        ])
+
+    cfg.add_section("plots", desc="", entries=[
+        CFG_Entry("plot_dpi",
+                dflt=300,
+                desc="DPI for plots",
+                typ_=int),
+        CFG_Entry("plot_size_broad",
+                dflt="14, 5",
+                desc="plot size for broad plots: width, heigh",
+                typ_=(int, int)),
+        CFG_Entry("plot_size_narrow",
+                dflt="7, 5",
+                desc="plot size for narrow plots: width, height",
+                typ_=(int, int)),
+        ])
+
+    with open("generated-default.cfg", "w") as file:
+        file.write(f"{cfg}")
 
 def get_bool(bool_str: str, fallback=False):
     if bool_str in ["true", "True"]: return True
@@ -53,3 +348,72 @@ def read_settings_file(filepath: str, settings:dict, ignore_invalid_lines=True,
                     else: continue
         else:
             settings[vals[0]] = vals[1].strip(" ")
+
+
+class ReginaSettings:
+    def __init__(self, config_file):
+        parser = ConfigParser()
+        # with open(config_file, "r") as file
+        # default settings, these are overwriteable through a config file
+        self._settings = {
+            # GENERAL
+            "server_name": "default_sever",
+            # DATA COLLECTION
+            "access_log": "",
+            "db": "",
+            "locs_and_dirs": [],
+            "auto_group_filetypes": [],
+            "filegroups": "",
+            "request_location_blacklist": "",
+            "request_is_same_on_same_day": True,  # mutiple requests from same visitor to same file at same day are counted as 1
+            "unique_visitor_is_ip_address": False,
+            "get_visitor_location": False,
+            "get_cities_for_countries": [""],  # list if country codes for which the ip address ranges need to be collected at city level, not country level
+            "hash_ip_address": True,
+
+            # VISUALIZATION
+            "get_human_percentage": False,
+            "human_needs_success": True,  # a human must have at least 1 successful request (status < 300)
+            "status_300_is_success": False,  # 300 codes are success
+            "do_geoip_rankings": False,
+            "geoip_only_humans": True,
+            "city_ranking_blacklist": "",
+            "country_ranking_blacklist": "",
+            # "file_ranking_whitelist": r".*\.((txt)|(html)|(css)|(php)|(png)|(jpeg)|(jpg)|(svg)|(gif))",
+            "file_ranking_whitelist": r".*\.(html)",
+            "file_ranking_ignore_error_files": False,  # skip files that only had unsuccessful requests (status < 300)
+            "referer_ranking_ignore_protocol": True,
+            "referer_ranking_ignore_subdomain": False,
+            "referer_ranking_ignore_location": True,
+            "referer_ranking_ignore_tld": False,
+            "referer_ranking_whitelist": r"^[^\-].*",  # minus means empty
+            "visitor_agent_ranking_whitelist": r"",
+            "file_ranking_plot_max_files": 15,
+            # "plot_figsize": (60, 40),
+            "plot_dpi": 300,
+            "plot_add_count_label": True,
+            "plot_size_broad": (10, 5),
+            "plot_size_narrow": (6.5, 5),
+            "img_dir": "",
+            "img_location": "",
+            "img_filetype": "svg",
+            "template_html": "",
+            "html_out_path": "",
+            "last_x_days": 30,
+            # regina
+            "debug": False
+        }
+
+
+        def __getitem__(self, key):
+            return self._settings[key]
+
+        def __setitem__(self, key, value):
+            """
+            set key to value.
+            if key already exists, TypeError is raised if value is not of the same type as the current value
+            """
+            if key in self._settings.keys():
+                if type(value) != type(self._settings[key]):
+                    raise TypeError(f"ReginaSettings: Trying to set value of '{key}' to '{value}' of type '{type(value)}', but the current type is '{type(self._settings[key])}'.")
+            self._settings[key] = value
diff --git a/regina/utility/utility.py b/regina/utility/utility.py
index 90a4d70..3395837 100644
--- a/regina/utility/utility.py
+++ b/regina/utility/utility.py
@@ -2,6 +2,7 @@
 # print(f"{__file__}: __name__={__name__}, __package__={__package__}, sys.path[0]={path[0]}")
 from sys import exit
 from os import path
+from re import fullmatch
 
 from regina.utility.globals import settings
 
@@ -9,6 +10,29 @@ from regina.utility.globals import settings
 Various utitity
 """
 
+def is_whitelisted(val: str, whitelist: str|list[str]|None):
+    """
+    Check if val is in a regex whitelist
+    whitelist: regexp, list of regexp or None
+    if whitelist is None, always return True
+    """
+    if not whitelist: return True
+    if type(whitelist) == str:
+        return fullmatch(whitelist, val)
+    if type(whitelist) == list:
+        for w in whitelist:
+            if not fullmatch(w, val): return False
+    return True
+
+def is_blacklisted(val: str, blacklist: str|list[str]|None):
+    """
+    Check if val is in a regex blacklist
+    blacklist: regexp, list of regexp or None
+    if blacklist is None, always return False
+    """
+    return not is_whitelisted(val, blacklist)
+
+
 def pdebug(*args, **keys):
     if settings["debug"]: print(*args, **keys)
 
diff --git a/test/test.db b/test/test.db
new file mode 100644
index 0000000..6b46b46
Binary files /dev/null and b/test/test.db differ