fixed request duplicate detection

This commit is contained in:
Matthias@Dell 2022-12-01 23:37:58 +01:00
parent 31c45fb202
commit 42e508aa0b
2 changed files with 33 additions and 4 deletions

View File

@ -138,6 +138,18 @@ def is_user_human(cur: sql.Cursor, user_id: int):
# user is human
return True
def request_exists(cur: sql.Cursor, request: Request, user_id: int, group_id: int):
# get all requests from same user to same location
sql.execute(f"SELECT request_id, date FROM {t_request} WHERE user_id = '{user_id}' AND group_id = '{group_id}'")
date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d")
for request_id, date1 in sql.fetchall():
if settings["request_is_same_on_same_day"]:
date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
if date0 == date1:
pdebug(f"request_exists: Request is on same day as request {request_id}")
return True
return False
# re_user_agent = r"(?: ?([\w\- ]+)(?:\/([\w.]+))?(?: \(([^()]*)\))?)"
# 1: platform, 2: version, 3: details
@ -188,9 +200,7 @@ def add_requests_to_db(requests: list[Request], db_name: str):
conn.commit()
group_id: int = get_filegroup(request.request_file, cursor)
# check if request is unique
group_id_name = database_tables[t_filegroup].key.name
user_id_name = database_tables[t_user].key.name
if sql_exists(cursor, t_request, [(group_id_name, group_id), (user_id_name, user_id), ("date", request.time_local)]):
if request_exists(cursor, request, user_id, group_id):
# pdebug("request exists:", request)
pass
else:

View File

@ -13,6 +13,7 @@ settings = {
"auto_group_filetypes": [],
"filegroups": "",
"request_location_regex_blacklist": "",
"request_is_same_on_same_day": True, # mutiple requests from same user to same file at same day are counted as 1
"unique_user_is_ip_address": False,
"user_get_country": True,
@ -44,4 +45,22 @@ settings = {
# these oses and browser can be detected:
# lower element takes precedence
user_agent_operating_systems = ["Windows", "Android", "Linux", "iPhone", "iPad", "Mac", "BSD"]
user_agent_browsers = ["Firefox", "DuckDuckGo", "SeaMonkey", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany", "Chromium", "Chrome", "Safari", "Opera", "Edge"]
"""
some browsers have multiple browsers in their user agent:
SeaMonkey: Firefox
Waterfox: Firefox
Chrome: Safari
Edge: Chrome, Safari
SamsungBrowser: Chrome, Safari
"""
user_agent_browsers = [
# todo YaBrowser/Yowser, OPR, Edg
# order does not matter, as long as firefox, chrome safari come later
"DuckDuckGo", "SeaMonkey", "Waterfox", "Vivaldi", "Yandex", "Brave", "SamsungBrowser", "Lynx", "Epiphany",
# order does matter
# Edg sometimes uses Edg or EdgA (android)
"Firefox", "Opera", "Edg", "Chromium", "Chrome", "Safari"
]