From 42e508aa0bcfdb4164524fe229459a4150c327b9 Mon Sep 17 00:00:00 2001 From: "Matthias@Dell" Date: Thu, 1 Dec 2022 23:37:58 +0100 Subject: [PATCH] fixed request duplicate detection --- regina/db_operation/collect.py | 16 +++++++++++++--- regina/utility/globals.py | 21 ++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/regina/db_operation/collect.py b/regina/db_operation/collect.py index 7234d7d..2c82fde 100644 --- a/regina/db_operation/collect.py +++ b/regina/db_operation/collect.py @@ -138,6 +138,18 @@ def is_user_human(cur: sql.Cursor, user_id: int): # user is human return True +def request_exists(cur: sql.Cursor, request: Request, user_id: int, group_id: int): + # get all requests from same user to same location + sql.execute(f"SELECT request_id, date FROM {t_request} WHERE user_id = '{user_id}' AND group_id = '{group_id}'") + date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d") + for request_id, date1 in sql.fetchall(): + if settings["request_is_same_on_same_day"]: + date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d") + if date0 == date1: + pdebug(f"request_exists: Request is on same day as request {request_id}") + return True + return False + # re_user_agent = r"(?: ?([\w\- ]+)(?:\/([\w.]+))?(?: \(([^()]*)\))?)" # 1: platform, 2: version, 3: details @@ -188,9 +200,7 @@ def add_requests_to_db(requests: list[Request], db_name: str): conn.commit() group_id: int = get_filegroup(request.request_file, cursor) # check if request is unique - group_id_name = database_tables[t_filegroup].key.name - user_id_name = database_tables[t_user].key.name - if sql_exists(cursor, t_request, [(group_id_name, group_id), (user_id_name, user_id), ("date", request.time_local)]): + if request_exists(cursor, request, user_id, group_id): # pdebug("request exists:", request) pass else: diff --git a/regina/utility/globals.py b/regina/utility/globals.py index 0e4d61a..6e6ecb0 100644 --- a/regina/utility/globals.py +++ b/regina/utility/globals.py @@ -13,6 +13,7 @@ settings = { "auto_group_filetypes": [], "filegroups": "", "request_location_regex_blacklist": "", + "request_is_same_on_same_day": True, # mutiple requests from same user to same file at same day are counted as 1 "unique_user_is_ip_address": False, "user_get_country": True, @@ -44,4 +45,22 @@ settings = { # these oses and browser can be detected: # lower element takes precedence user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"] -user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"] +""" +some browsers have multiple browsers in their user agent: + SeaMonkey: Firefox + Waterfox: Firefox + Chrome: Safari + Edge: Chrome, Safari + SamsungBrowser: Chrome, Safari + +""" +user_agent_browsers = [ + # todo YaBrowser/Yowser, OPR, Edg + # order does not matter, as long as firefox, chrome safari come later + "DuckDuckGo", "SeaMonkey", "Waterfox", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", + # order does matter + # Edg sometimes uses Edg or EdgA (android) + "Firefox", "Opera", "Edg", "Chromium", "Chrome", "Safari" +] + +