Changed database structure

added new tables to reduce the size of the database
This commit is contained in:
matthias@arch 2023-05-15 22:01:30 +02:00
parent b3703ae199
commit 0e0ece77ea
5 changed files with 571 additions and 200 deletions

280
database.svg Normal file
View File

@ -0,0 +1,280 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC '-//W3C//DTD SVG 1.0//EN'
'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd'>
<svg fill-opacity="1" xmlns:xlink="http://www.w3.org/1999/xlink" color-rendering="auto" color-interpolation="auto" text-rendering="auto" stroke="black" stroke-linecap="square" width="910" stroke-miterlimit="10" shape-rendering="auto" stroke-opacity="1" fill="black" stroke-dasharray="none" font-weight="normal" stroke-width="1" viewBox="350 130 910 630" height="630" xmlns="http://www.w3.org/2000/svg" font-family="'Dialog'" font-style="normal" stroke-linejoin="miter" font-size="12px" stroke-dashoffset="0" image-rendering="auto"
><!--Generated by the Batik Graphics2D SVG Generator--><defs id="genericDefs"
/><g
><defs id="defs1"
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath1"
><path d="M0 0 L2147483647 0 L2147483647 2147483647 L0 2147483647 L0 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath2"
><path d="M0 0 L0 100 L190 100 L190 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath3"
><path d="M0 0 L0 100 L180 100 L180 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath4"
><path d="M0 0 L0 130 L200 130 L200 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath5"
><path d="M0 0 L0 110 L190 110 L190 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath6"
><path d="M0 0 L0 130 L190 130 L190 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath7"
><path d="M0 0 L0 100 L200 100 L200 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath8"
><path d="M0 0 L0 170 L190 170 L190 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath9"
><path d="M0 0 L0 180 L200 180 L200 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath10"
><path d="M0 0 L0 50 L110 50 L110 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath11"
><path d="M0 0 L0 120 L40 120 L40 0 Z"
/></clipPath
><clipPath clipPathUnits="userSpaceOnUse" id="clipPath12"
><path d="M0 0 L0 50 L120 50 L120 0 Z"
/></clipPath
></defs
><g fill="rgb(0,255,255)" fill-opacity="0.4902" transform="translate(840,150)" stroke-opacity="0.4902" stroke="rgb(0,255,255)"
><rect x="0.5" width="188.5" height="98.5" y="0.5" clip-path="url(#clipPath2)" stroke="none"
/></g
><g transform="translate(840,150)"
><rect fill="none" x="0.5" width="188.5" height="98.5" y="0.5" clip-path="url(#clipPath2)"
/><text x="65" font-size="14px" y="23.1094" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>referer</text
><path fill="none" d="M1 29.1094 L189 29.1094" clip-path="url(#clipPath2)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>- referer_id: INTEGER</text
><path fill="none" d="M1 66.3281 L189 66.3281" clip-path="url(#clipPath2)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>- name: TEXT UNIQUE</text
><path fill="none" d="M1 87.4375 L189 87.4375" clip-path="url(#clipPath2)"
/></g
><g fill="rgb(255,0,255)" fill-opacity="0.4902" transform="translate(610,150)" stroke-opacity="0.4902" stroke="rgb(255,0,255)"
><rect x="0.5" width="188.5" height="98.5" y="0.5" clip-path="url(#clipPath2)" stroke="none"
/></g
><g transform="translate(610,150)"
><rect fill="none" x="0.5" width="188.5" height="98.5" y="0.5" clip-path="url(#clipPath2)"
/><text x="65" font-size="14px" y="23.1094" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>browser</text
><path fill="none" d="M1 29.1094 L189 29.1094" clip-path="url(#clipPath2)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>- browser_id: INTEGER</text
><path fill="none" d="M1 66.3281 L189 66.3281" clip-path="url(#clipPath2)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath2)" font-family="monospace" stroke="none" xml:space="preserve"
>- name: TEXT UNIQUE</text
><path fill="none" d="M1 87.4375 L189 87.4375" clip-path="url(#clipPath2)"
/></g
><g fill="rgb(0,255,255)" fill-opacity="0.4902" transform="translate(1050,340)" stroke-opacity="0.4902" stroke="rgb(0,255,255)"
><rect x="0.5" width="178.5" height="98.5" y="0.5" clip-path="url(#clipPath3)" stroke="none"
/></g
><g transform="translate(1050,340)"
><rect fill="none" x="0.5" width="178.5" height="98.5" y="0.5" clip-path="url(#clipPath3)"
/><text x="68" font-size="14px" y="23.1094" clip-path="url(#clipPath3)" font-family="monospace" stroke="none" xml:space="preserve"
>route</text
><path fill="none" d="M1 29.1094 L179 29.1094" clip-path="url(#clipPath3)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath3)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath3)" font-family="monospace" stroke="none" xml:space="preserve"
>- route_id: INTEGER</text
><path fill="none" d="M1 66.3281 L179 66.3281" clip-path="url(#clipPath3)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath3)" font-family="monospace" stroke="none" xml:space="preserve"
>- name: TEXT UNIQUE</text
><path fill="none" d="M1 87.4375 L179 87.4375" clip-path="url(#clipPath3)"
/></g
><g fill="rgb(255,165,0)" fill-opacity="0.4902" transform="translate(490,610)" stroke-opacity="0.4902" stroke="rgb(255,165,0)"
><rect x="0.5" width="198.5" height="128.5" y="0.5" clip-path="url(#clipPath4)" stroke="none"
/></g
><g transform="translate(490,610)"
><rect fill="none" x="0.5" width="198.5" height="128.5" y="0.5" clip-path="url(#clipPath4)"
/><text x="66" font-size="14px" y="23.1094" clip-path="url(#clipPath4)" font-family="monospace" stroke="none" xml:space="preserve"
>ip_range</text
><path fill="none" d="M1 29.1094 L199 29.1094" clip-path="url(#clipPath4)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath4)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath4)" font-family="monospace" stroke="none" xml:space="preserve"
>- ip_range_id</text
><path fill="none" d="M1 66.3281 L199 66.3281" clip-path="url(#clipPath4)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath4)" font-family="monospace" stroke="none" xml:space="preserve"
>- low: INTEGER UNIQUE</text
><text x="5" font-size="14px" y="97.5469" clip-path="url(#clipPath4)" font-family="monospace" stroke="none" xml:space="preserve"
>- high: INTEGER UNIQUE</text
><text x="5" font-size="14px" y="113.6562" clip-path="url(#clipPath4)" font-family="monospace" stroke="none" xml:space="preserve"
>- city_id: INTEGER</text
></g
><g fill="rgb(255,165,0)" fill-opacity="0.4902" transform="translate(1050,610)" stroke-opacity="0.4902" stroke="rgb(255,165,0)"
><rect x="0.5" width="188.5" height="108.5" y="0.5" clip-path="url(#clipPath5)" stroke="none"
/></g
><g transform="translate(1050,610)"
><rect fill="none" x="0.5" width="188.5" height="108.5" y="0.5" clip-path="url(#clipPath5)"
/><text x="65" font-size="14px" y="23.1094" clip-path="url(#clipPath5)" font-family="monospace" stroke="none" xml:space="preserve"
>country</text
><path fill="none" d="M1 29.1094 L189 29.1094" clip-path="url(#clipPath5)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath5)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath5)" font-family="monospace" stroke="none" xml:space="preserve"
>- country_id: INTEGER</text
><path fill="none" d="M1 66.3281 L189 66.3281" clip-path="url(#clipPath5)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath5)" font-family="monospace" stroke="none" xml:space="preserve"
>- name: TEXT UNIQUE</text
><text x="5" font-size="14px" y="97.5469" clip-path="url(#clipPath5)" font-family="monospace" stroke="none" xml:space="preserve"
>- code: TEXT UNIQUE</text
></g
><g fill="rgb(255,165,0)" fill-opacity="0.4902" transform="translate(780,610)" stroke-opacity="0.4902" stroke="rgb(255,165,0)"
><rect x="0.5" width="188.5" height="128.5" y="0.5" clip-path="url(#clipPath6)" stroke="none"
/></g
><g transform="translate(780,610)"
><rect fill="none" x="0.5" width="188.5" height="128.5" y="0.5" clip-path="url(#clipPath6)"
/><text x="78" font-size="14px" y="23.1094" clip-path="url(#clipPath6)" font-family="monospace" stroke="none" xml:space="preserve"
>city</text
><path fill="none" d="M1 29.1094 L189 29.1094" clip-path="url(#clipPath6)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath6)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath6)" font-family="monospace" stroke="none" xml:space="preserve"
>- city_id: INTEGER</text
><path fill="none" d="M1 66.3281 L189 66.3281" clip-path="url(#clipPath6)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath6)" font-family="monospace" stroke="none" xml:space="preserve"
>- country_id: INTEGER</text
><text x="5" font-size="14px" y="97.5469" clip-path="url(#clipPath6)" font-family="monospace" stroke="none" xml:space="preserve"
>- name: TEXT</text
><text x="5" font-size="14px" y="113.6562" clip-path="url(#clipPath6)" font-family="monospace" stroke="none" xml:space="preserve"
>- region: TEXT</text
></g
><g fill="rgb(255,0,255)" fill-opacity="0.4902" transform="translate(370,150)" stroke-opacity="0.4902" stroke="rgb(255,0,255)"
><rect x="0.5" width="198.5" height="98.5" y="0.5" clip-path="url(#clipPath7)" stroke="none"
/></g
><g transform="translate(370,150)"
><rect fill="none" x="0.5" width="198.5" height="98.5" y="0.5" clip-path="url(#clipPath7)"
/><text x="66" font-size="14px" y="23.1094" clip-path="url(#clipPath7)" font-family="monospace" stroke="none" xml:space="preserve"
>platform</text
><path fill="none" d="M1 29.1094 L199 29.1094" clip-path="url(#clipPath7)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath7)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath7)" font-family="monospace" stroke="none" xml:space="preserve"
>- platform_id: INTEGER</text
><path fill="none" d="M1 66.3281 L199 66.3281" clip-path="url(#clipPath7)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath7)" font-family="monospace" stroke="none" xml:space="preserve"
>- name: TEXT UNIQUE</text
><path fill="none" d="M1 87.4375 L199 87.4375" clip-path="url(#clipPath7)"
/></g
><g fill="rgb(0,255,255)" fill-opacity="0.4902" transform="translate(780,340)" stroke-opacity="0.4902" stroke="rgb(0,255,255)"
><rect x="0.5" width="188.5" height="168.5" y="0.5" clip-path="url(#clipPath8)" stroke="none"
/></g
><g transform="translate(780,340)"
><rect fill="none" x="0.5" width="188.5" height="168.5" y="0.5" clip-path="url(#clipPath8)"
/><text x="65" font-size="14px" y="23.1094" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>request</text
><path fill="none" d="M1 29.1094 L189 29.1094" clip-path="url(#clipPath8)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>- request_id: INTEGER</text
><path fill="none" d="M1 66.3281 L189 66.3281" clip-path="url(#clipPath8)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>- visitor_id: INTEGER</text
><text x="5" font-size="14px" y="97.5469" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>- route_id: INTEGER</text
><text x="5" font-size="14px" y="113.6562" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>- referer_id: INTEGER</text
><path fill="none" d="M1 119.6562 L189 119.6562" clip-path="url(#clipPath8)"
/><text x="5" font-size="14px" y="134.7656" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>- time: INTEGER</text
><text x="5" font-size="14px" y="150.875" clip-path="url(#clipPath8)" font-family="monospace" stroke="none" xml:space="preserve"
>- status: INTEGER</text
></g
><g fill="rgb(255,0,255)" fill-opacity="0.4902" transform="translate(490,340)" stroke-opacity="0.4902" stroke="rgb(255,0,255)"
><rect x="0.5" width="198.5" height="178.5" y="0.5" clip-path="url(#clipPath9)" stroke="none"
/></g
><g transform="translate(490,340)"
><rect fill="none" x="0.5" width="198.5" height="178.5" y="0.5" clip-path="url(#clipPath9)"
/><text x="70" font-size="14px" y="23.1094" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>visitor</text
><path fill="none" d="M1 29.1094 L199 29.1094" clip-path="url(#clipPath9)"
/><text x="5" font-size="14px" y="44.2188" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>«PK»</text
><text x="5" font-size="14px" y="60.3281" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- visitor_id: INTEGER</text
><path fill="none" d="M1 66.3281 L199 66.3281" clip-path="url(#clipPath9)"
/><text x="5" font-size="14px" y="81.4375" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- ip_address: INTEGER</text
><text x="5" font-size="14px" y="97.5469" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- platform_id: INTEGER</text
><text x="5" font-size="14px" y="113.6562" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- browser_id: INTEGER</text
><text x="5" font-size="14px" y="129.7656" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- is_mobile: INTEGER</text
><text x="5" font-size="14px" y="145.875" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- is_human: INTEGER</text
><text x="5" font-size="14px" y="161.9844" clip-path="url(#clipPath9)" font-family="monospace" stroke="none" xml:space="preserve"
>- ip_range_id: INTEGER</text
></g
><g transform="translate(960,370)"
><path fill="none" d="M10.5 20.5 L90.5 20.5" clip-path="url(#clipPath10)"
/><text x="29" font-size="14px" y="34.1094" clip-path="url(#clipPath10)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
><text x="61.5713" font-size="14px" y="34.1094" clip-path="url(#clipPath10)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
></g
><g transform="translate(870,240)"
><path fill="none" d="M10.5 100.5 L10.5 10.5" clip-path="url(#clipPath11)"
/><text x="14" font-size="14px" y="84" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
><text x="14" font-size="14px" y="39.1094" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
></g
><g transform="translate(540,240)"
><path fill="none" d="M10.5 100.5 L10.5 10.5" clip-path="url(#clipPath11)"
/><text x="14" font-size="14px" y="84" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
><text x="14" font-size="14px" y="39.1094" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
></g
><g transform="translate(580,510)"
><path fill="none" d="M10.5 100.5 L10.5 10.5" clip-path="url(#clipPath11)"
/><text x="14" font-size="14px" y="84" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
><text x="14" font-size="14px" y="39.1094" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
></g
><g transform="translate(680,630)"
><path fill="none" d="M100.5 20.5 L10.5 20.5" clip-path="url(#clipPath12)"
/><text x="71.5713" font-size="14px" y="34.1094" clip-path="url(#clipPath12)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
><text x="29" font-size="14px" y="34.1094" clip-path="url(#clipPath12)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
></g
><g transform="translate(960,630)"
><path fill="none" d="M90.5 20.5 L10.5 20.5" clip-path="url(#clipPath10)"
/><text x="61.5713" font-size="14px" y="34.1094" clip-path="url(#clipPath10)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
><text x="29" font-size="14px" y="34.1094" clip-path="url(#clipPath10)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
></g
><g transform="translate(680,370)"
><path fill="none" d="M10.5 20.5 L100.5 20.5" clip-path="url(#clipPath12)"
/><text x="29" font-size="14px" y="34.1094" clip-path="url(#clipPath12)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
><text x="71.5713" font-size="14px" y="34.1094" clip-path="url(#clipPath12)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
></g
><g transform="translate(620,240)"
><path fill="none" d="M10.5 100.5 L10.5 10.5" clip-path="url(#clipPath11)"
/><text x="14" font-size="14px" y="84" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>n</text
><text x="14" font-size="14px" y="39.1094" clip-path="url(#clipPath11)" font-family="monospace" stroke="none" xml:space="preserve"
>1</text
></g
></g
></svg
>

After

Width:  |  Height:  |  Size: 19 KiB

View File

@ -1,13 +1,13 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml version="1.0" encoding="UTF-8" standalone="no"?>
<diagram program="umlet" version="15.1"> <diagram program="umlet" version="15.1">
<zoom_level>8</zoom_level> <zoom_level>9</zoom_level>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>96</x> <x>441</x>
<y>248</y> <y>306</y>
<w>160</w> <w>180</w>
<h>144</h> <h>162</h>
</coordinates> </coordinates>
<panel_attributes>visitor <panel_attributes>visitor
-- --
@ -17,19 +17,20 @@
- ip_address: INTEGER - ip_address: INTEGER
- platform_id: INTEGER - platform_id: INTEGER
- browser_id: INTEGER - browser_id: INTEGER
- mobile: INTEGER - is_mobile: INTEGER
- is_human: INTEGER - is_human: INTEGER
- range_id: INTEGER - ip_range_id: INTEGER
style=autoresize</panel_attributes> style=autoresize
bg=MAGENTA</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>216</x> <x>558</x>
<y>168</y> <y>216</y>
<w>32</w> <w>36</w>
<h>96</h> <h>108</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=n m1=n
@ -40,10 +41,10 @@ m2=1
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>352</x> <x>702</x>
<y>240</y> <y>306</y>
<w>152</w> <w>171</w>
<h>136</h> <h>153</h>
</coordinates> </coordinates>
<panel_attributes>request <panel_attributes>request
-- --
@ -56,30 +57,31 @@ m2=1
-- --
- time: INTEGER - time: INTEGER
- status: INTEGER - status: INTEGER
style=autoresize</panel_attributes> style=autoresize
bg=CYAN</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>248</x> <x>612</x>
<y>248</y> <y>333</y>
<w>120</w> <w>108</w>
<h>40</h> <h>45</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
m2=n m2=n
</panel_attributes> </panel_attributes>
<additional_attributes>10.0;20.0;130.0;20.0</additional_attributes> <additional_attributes>10.0;20.0;100.0;20.0</additional_attributes>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>16</x> <x>333</x>
<y>96</y> <y>135</y>
<w>160</w> <w>180</w>
<h>80</h> <h>90</h>
</coordinates> </coordinates>
<panel_attributes>platform <panel_attributes>platform
-- --
@ -88,16 +90,17 @@ m2=n
-- --
- name: TEXT UNIQUE - name: TEXT UNIQUE
-- --
style=autoresize</panel_attributes> style=autoresize
bg=MAGENTA</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>328</x> <x>702</x>
<y>488</y> <y>549</y>
<w>152</w> <w>171</w>
<h>104</h> <h>117</h>
</coordinates> </coordinates>
<panel_attributes>city <panel_attributes>city
-- --
@ -107,95 +110,98 @@ style=autoresize</panel_attributes>
- country_id: INTEGER - country_id: INTEGER
- name: TEXT - name: TEXT
- region: TEXT - region: TEXT
style=autoresize</panel_attributes> style=autoresize
bg=ORANGE</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>536</x> <x>945</x>
<y>488</y> <y>549</y>
<w>152</w> <w>171</w>
<h>88</h> <h>99</h>
</coordinates> </coordinates>
<panel_attributes>country <panel_attributes>country
-- --
&lt;&lt;PK&gt;&gt; &lt;&lt;PK&gt;&gt;
- country_id: INTEGER - country_id: INTEGER
-- --
- name: TEXT - name: TEXT UNIQUE
- code: TEXT - code: TEXT UNIQUE
style=autoresize</panel_attributes> style=autoresize
bg=ORANGE</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>472</x> <x>864</x>
<y>504</y> <y>567</y>
<w>80</w> <w>99</w>
<h>40</h> <h>45</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
m2=n m2=n
</panel_attributes> </panel_attributes>
<additional_attributes>80.0;20.0;10.0;20.0</additional_attributes> <additional_attributes>90.0;20.0;10.0;20.0</additional_attributes>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>264</x> <x>612</x>
<y>504</y> <y>567</y>
<w>80</w> <w>108</w>
<h>40</h> <h>45</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
m2=n m2=n
</panel_attributes> </panel_attributes>
<additional_attributes>80.0;20.0;10.0;20.0</additional_attributes> <additional_attributes>100.0;20.0;10.0;20.0</additional_attributes>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>136</x> <x>441</x>
<y>488</y> <y>549</y>
<w>136</w> <w>180</w>
<h>104</h> <h>117</h>
</coordinates> </coordinates>
<panel_attributes>ip_range <panel_attributes>ip_range
-- --
&lt;&lt;PK&gt;&gt; &lt;&lt;PK&gt;&gt;
- range_id - ip_range_id
-- --
- from: INTEGER - low: INTEGER UNIQUE
- to: INTEGER - high: INTEGER UNIQUE
- city_id: INTEGER - city_id: INTEGER
style=autoresize</panel_attributes> style=autoresize
bg=ORANGE</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>176</x> <x>522</x>
<y>384</y> <y>459</y>
<w>32</w> <w>36</w>
<h>120</h> <h>108</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=1 m1=1
m2=n m2=n
</panel_attributes> </panel_attributes>
<additional_attributes>10.0;130.0;10.0;10.0</additional_attributes> <additional_attributes>10.0;100.0;10.0;10.0</additional_attributes>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>576</x> <x>945</x>
<y>264</y> <y>306</y>
<w>144</w> <w>162</w>
<h>80</h> <h>90</h>
</coordinates> </coordinates>
<panel_attributes>route <panel_attributes>route
-- --
@ -204,16 +210,17 @@ m2=n
-- --
- name: TEXT UNIQUE - name: TEXT UNIQUE
-- --
style=autoresize</panel_attributes> style=autoresize
bg=CYAN</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>208</x> <x>549</x>
<y>96</y> <y>135</y>
<w>152</w> <w>171</w>
<h>80</h> <h>90</h>
</coordinates> </coordinates>
<panel_attributes>browser <panel_attributes>browser
-- --
@ -222,16 +229,17 @@ style=autoresize</panel_attributes>
-- --
- name: TEXT UNIQUE - name: TEXT UNIQUE
-- --
style=autoresize</panel_attributes> style=autoresize
bg=MAGENTA</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>144</x> <x>486</x>
<y>168</y> <y>216</y>
<w>32</w> <w>36</w>
<h>96</h> <h>108</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=n m1=n
@ -242,10 +250,10 @@ m2=1
<element> <element>
<id>UMLClass</id> <id>UMLClass</id>
<coordinates> <coordinates>
<x>392</x> <x>756</x>
<y>96</y> <y>135</y>
<w>152</w> <w>171</w>
<h>80</h> <h>90</h>
</coordinates> </coordinates>
<panel_attributes>referer <panel_attributes>referer
-- --
@ -254,35 +262,36 @@ m2=1
-- --
- name: TEXT UNIQUE - name: TEXT UNIQUE
-- --
style=autoresize</panel_attributes> style=autoresize
bg=CYAN</panel_attributes>
<additional_attributes/> <additional_attributes/>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>400</x> <x>783</x>
<y>168</y> <y>216</y>
<w>32</w> <w>36</w>
<h>88</h> <h>108</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=n m1=n
m2=1 m2=1
</panel_attributes> </panel_attributes>
<additional_attributes>10.0;90.0;10.0;10.0</additional_attributes> <additional_attributes>10.0;100.0;10.0;10.0</additional_attributes>
</element> </element>
<element> <element>
<id>Relation</id> <id>Relation</id>
<coordinates> <coordinates>
<x>496</x> <x>864</x>
<y>288</y> <y>333</y>
<w>96</w> <w>99</w>
<h>40</h> <h>45</h>
</coordinates> </coordinates>
<panel_attributes>lt=- <panel_attributes>lt=-
m1=n m1=n
m2=1 m2=1
</panel_attributes> </panel_attributes>
<additional_attributes>10.0;20.0;100.0;20.0</additional_attributes> <additional_attributes>10.0;20.0;90.0;20.0</additional_attributes>
</element> </element>
</diagram> </diagram>

View File

@ -15,11 +15,11 @@ if __name__ == "__main__": # make relative imports work as described here: http
sys.path.insert(0, path.dirname(path.dirname(filepath))) sys.path.insert(0, path.dirname(path.dirname(filepath)))
# local # local
from regina.utility.sql_util import replace_null, sanitize, sql_select, sql_exists from regina.utility.sql_util import replace_null, sanitize, sql_select, sql_exists, sql_tablesize
from regina.utility.utility import pdebug, get_filepath, warning, pmessage, is_blacklisted, is_whitelisted from regina.utility.utility import pdebug, get_filepath, warning, pmessage, is_blacklisted, is_whitelisted
from regina.utility.globals import settings from regina.utility.globals import settings
from regina.data_collection.request import Request from regina.data_collection.request import Request
from regina.utility.globals import visitor_agent_operating_systems, visitor_agent_browsers, settings from regina.utility.globals import user_agent_platforms, user_agent_browsers, settings
""" """
create reginas database as shown in the uml diagram database.uxf create reginas database as shown in the uml diagram database.uxf
@ -32,14 +32,22 @@ class Database:
# verify that the database is created # verify that the database is created
self.cur.execute("pragma schema_version") self.cur.execute("pragma schema_version")
if self.cur.fetchone()[0] == 0: # not created if self.cur.fetchone()[0] == 0: # not created
pdebug(f"Database.__init__: Creating database at {database_path}") pdebug(f"Database.__init__: Creating new databse at {database_path}", lvl=1)
with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file: with open(pkg_resources.resource_filename("regina", "sql/create_db.sql"), "r") as file:
create_db = file.read() create_db = file.read()
self.cur.executescript(create_db) self.cur.executescript(create_db)
self.conn.commit() self.conn.commit()
else:
pdebug(f"Database.__init__: Opening existing database at {database_path}", lvl=1)
def __del__(self):
self.cur.close()
self.conn.commit()
self.conn.close()
def __call__(self, s): def __call__(self, s):
"""execute a command and return fetchall()""" """execute a command and return fetchall()"""
pdebug(f"Database: execute: \"{s}\"", lvl=4)
self.cur.execute(s) self.cur.execute(s)
return self.cur.fetchall() return self.cur.fetchall()
def execute(self, s): def execute(self, s):
@ -51,133 +59,147 @@ class Database:
# VISITOR # VISITOR
# #
def is_visitor_human(self, visitor_id: int): def is_visitor_human(self, visitor_id: int):
self.execute(f"SELECT is_human FROM visitor WHERE visitor_id = {visitor_id}")
if self.fetchone()[0] == 1:
return True
return False
def update_is_visitor_human(self, visitor_id: int):
""" """
check if they have a known platform AND browser check if they have a known platform AND browser
if settings "human_needs_success": check if at least one request did not result in an error (http status >= 400) if settings "human_needs_success": check if at least one request did not result in an error (http status >= 400)
updates the visitor.is_human column
@returns True if human, else False
""" """
max_success_status = 400 def set_not_human(debug_str=""):
if settings["status_300_is_success"]: max_success_status = 300 pdebug(f"update_is_visitor_human: visitor_id={visitor_id:5} is not human: Failed check: {debug_str}", lvl=3)
self.cur.execute(f"UPDATE visitor SET is_human = 0 WHERE visitor_id = {visitor_id}")
return False
self.cur.execute(f"SELECT browser_id, platform_id FROM visitor WHERE visitor_id = {visitor_id}") self.cur.execute(f"SELECT browser_id, platform_id FROM visitor WHERE visitor_id = {visitor_id}")
browsers_and_platforms = self.cur.fetchall() browser_id, platform_id = self.cur.fetchall()[0]
if len(browsers_and_platforms) != 1: browser = self.get_name("browser", browser_id)
pdebug(f"is_visitor_human: {visitor_id} - could not find visitor or found too many") if not browser in user_agent_browsers:
return False return set_not_human("browser")
browser = self.get_name("browser", browsers_and_platforms[0][0])
if not browser in visitor_agent_browsers: platform = self.get_name("platform", platform_id)
return False if not platform in user_agent_platforms:
platform = self.get_name("platform", browsers_and_platforms[0][1]) return set_not_human("platform")
if not platform in visitor_agent_operating_systems:
return False max_success_status = 300
if settings["human_needs_success"]: if settings["data-collection"]["status_300_is_success"]: max_success_status = 400
if settings["data-collection"]["human_needs_successful_request"]:
# check if at least request was successful (status < 400) # check if at least request was successful (status < 400)
self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM request WHERE visitor_id = {visitor_id} AND status < {max_success_status})") self.cur.execute(f"SELECT EXISTS (SELECT 1 FROM request WHERE visitor_id = {visitor_id} AND status < {max_success_status})")
if self.cur.fetchone()[0] == 1: if self.cur.fetchone()[0] == 0:
# pdebug(f"is_visitor_human: Visitor {visitor_id} is human") return set_not_human("successful request")
pass # if here, is human
else: self.cur.execute(f"UPDATE visitor SET is_human = 1 WHERE visitor_id = {visitor_id}")
# pdebug(f"is_visitor_human: Visitor {visitor_id} only had unsuccessful requests")
return False
return True return True
def get_visitor_id(self, request: Request, insert=True) -> int | None: def get_visitor_id(self, request: Request, insert=True) -> tuple[int | None, bool]:
"""
get the visitor_id. Adds the visitor if not already existing
"""
""" """
get the visitor_id: get the visitor_id:
If settings unique_visitor_is_ip_address: Check if visitor with ip address exists if settings unique_visitor_is_ip_address: Check if visitor with ip address exists
Else: check if visitor with ip_address, browser and platform exists else: check if visitor with ip_address, browser and platform exists
If visitor does not exist and insert: insert, return id @return visitor_id, is_new_visitor
Else: return None if visitor does not exist:
if insert: return visitor_id, True
else: return None, False
else: return visitor_id, False
""" """
if settings["hash_ip_address"]: ip_address = request.ip_address
ip_address = hash(request.ip_address)
else:
ip_address = request.ip_address
# if insert == True, ids will be int # if insert == True, ids will be int
browser_id: int | None = self.get_id("browser", request.get_browser(), insert=insert) browser_id: int | None = self.get_id("browser", request.get_browser(), insert=insert)
platform_id: int | None = self.get_id("platform", request.get_platform(), insert=insert) platform_id: int | None = self.get_id("platform", request.get_platform(), insert=insert)
constraints = [("ip_address", ip_address)] constraints = [("ip_address", ip_address)]
if not settings["unique_visitor_is_ip_address"]: if not settings["data-collection"]["unique_visitor_is_ip_address"]:
if browser_id: constraints.append(("browser_id", browser_id)) if browser_id: constraints.append(("browser_id", browser_id))
if platform_id: constraints.append(("platform_id", platform_id)) if platform_id: constraints.append(("platform_id", platform_id))
require_update_is_human = False is_new_visitor = False
if not sql_exists(self.cur, "visitor", constraints): if not sql_exists(self.cur, "visitor", constraints):
require_update_is_human = True is_new_visitor = True
if not insert: if not insert:
return None return None, False
is_mobile = int(request.get_mobile()) is_mobile = int(request.get_mobile())
ip_range_id = 0 ip_range_id = 0
if settings["get_visitor_location"]: if settings["data-collection"]["get_visitor_location"]:
ip_range_id = self.get_ip_range_id(request.ip_address) ip_range_id = self.get_ip_range_id(request.ip_address)
is_human = 0 # is_visitor_human cannot be called until visitor is in db is_human = 0 # update_is_visitor_human cannot be called until visitor is in db
self.cur.execute(f"INSERT INTO visitor (ip_address, ip_range_id, platform_id, browser_id, is_mobile, is_human, ip_range_id) VALUES ('{ip_address}', '{ip_range_id}', '{platform_id}', '{browser_id}', '{is_mobile}', '{is_human}');") self.cur.execute(f"INSERT INTO visitor (ip_address, ip_range_id, platform_id, browser_id, is_mobile, is_human) VALUES ('{ip_address}', '{ip_range_id}', '{platform_id}', '{browser_id}', '{is_mobile}', '{is_human}');")
visitor_id = sql_select(self.cur, "visitor", constraints)[0][0] visitor_id = sql_select(self.cur, "visitor", constraints)[0][0]
# TODO: if requests are not added yet, visitor might not be recognized since it does not have a successful requets yet return visitor_id, is_new_visitor
if require_update_is_human:
is_human = self.is_visitor_human(visitor_id)
if is_human:
self.cur.execute(f"UPDATE visitor SET is_human = 1 WHERE visitor_id = {visitor_id}")
return visitor_id
def get_visitor_ids_for_date(self, date:str) -> list[int]:
return [ visitor_id[0] for visitor_id in self(f"SELECT DISTINCT visitor_id FROM request WHERE {date}") ]
def get_visitor_count(self) -> int:
return sql_tablesize(self.cur, "visitor")
# #
# REQUEST # REQUEST
# #
def request_exists(self, request: Request, visitor_id: int, route_id: int): def get_request_count(self) -> int:
return sql_tablesize(self.cur, "request")
def request_exists(self, request_timestamp: int, visitor_id: int, route_id: int):
""" """
Check if a request from same visitor was made to same location in the same day, if setting "request_is_same_on_same_day" is True Return if a request from same visitor was made to same route within the timespan set by the 'ignore_duplicate_requests_within_x_seconds' option
If not, always returns False
""" """
if not settings["request_is_same_on_same_day"]: return False ignore_seconds = settings["data-collection"]["ignore_duplicate_requests_within_x_seconds"]
# get all requests from same visitor to same route time_min, time_max = max(0, request_timestamp - ignore_seconds), request_timestamp + ignore_seconds
self.cur.execute(f"SELECT request_id, time FROM request WHERE visitor_id = '{visitor_id}' AND = route_id = '{route_id}'") requests = self(f"SELECT request_id, time FROM request WHERE visitor_id = '{visitor_id}' AND route_id = '{route_id}' AND time BETWEEN {time_min} AND {time_max}")
# check if on same day if len(requests) > 0:
date0 = dt.fromtimestamp(request.time_local).strftime("%Y-%m-%d") pdebug(f"request_exists: Found {len(requests)} requests within {ignore_seconds} minutes (v_id={visitor_id}, r_id={route_id}, t={request_timestamp})")
for request_id, date1 in self.cur.fetchall(): return True
date1 = dt.fromtimestamp(date1).strftime("%Y-%m-%d")
if date0 == date1:
pdebug(f"request_exists: Request is on same day as request {request_id}")
return True
return False return False
def add_request(self, request: Request) -> (int | None): def add_request(self, request: Request) -> tuple[int | None, bool]:
"""returns visitor_id if new request was added, else None""" """
visitor_id = self.get_visitor_id(request) @returns visitor_id, is_new_visitor
self.conn.commit() if new request was added, else None
# browser_id = self.get_id("browser", request.get_browser()) """
# platform_id = self.get_id("platform", request.get_platform()) visitor_id, is_new_visitor = self.get_visitor_id(request)
referer_id = self.get_id("referer", request.referer) referer_id = self.get_id("referer", request.referer)
route_id = self.get_id("route", request.route) route_id = self.get_id("route", request.route)
# check if request is unique # check if request is unique
if self.request_exists(request, visitor_id, route_id): if self.request_exists(request.time_local, visitor_id, route_id):
# pdebug("request exists:", request) pdebug("add_request: exists:", request, lvl=3)
return None return None, is_new_visitor
else: else:
# pdebug("new request:", request) pdebug("add_request: added", request, lvl=3)
self.cur.execute(f"INSERT INTO request (visitor_id, route_id, referer_id, time, status) VALUES ({visitor_id}, {route_id}, {referer_id}, {request.time_local}, {request.status})") self.cur.execute(f"INSERT INTO request (visitor_id, route_id, referer_id, time, status) VALUES ({visitor_id}, {route_id}, {referer_id}, {request.time_local}, {request.status})")
return visitor_id return visitor_id, is_new_visitor
def add_requests(self, requests: list[Request]): def add_requests(self, requests: list[Request]):
added_requests = 0 """
Add a list of requests to the database
Adds the visitors, if needed
@returs added_request_count, visitors_count, new_visitors_count
"""
added_request_count = 0
# check the new visitors later # check the new visitors later
new_visitors = [] visitors: set[int] = set()
new_visitors: set[int] = set()
for i in range(len(requests)): for i in range(len(requests)):
if is_blacklisted(requests[i].request_route, settings["request_route_blacklist"]): continue if is_blacklisted(requests[i].route, settings["data-collection"]["request_route_blacklist"]): continue
if not is_whitelisted(requests[i].request_route, settings["request_route_whitelist"]): continue if not is_whitelisted(requests[i].route, settings["data-collection"]["request_route_whitelist"]): continue
visitor = self.add_request(requests[i]) visitor_id, is_new_visitor = self.add_request(requests[i])
if visitor: if visitor_id:
new_visitors.append(visitor) added_request_count += 1
visitors.add(visitor_id)
if is_new_visitor:
new_visitors.add(visitor_id)
# update the is_human column for all new visitors # update the is_human column for all new visitors
for visitor_id in new_visitors: for visitor_id in new_visitors:
# TODO this does not look right self.update_is_visitor_human(visitor_id)
if not sql_exists(self.cur, "visitor", [("visitor_id", visitor_id)]): continue
# pdebug(f"add_rq_to_db: {visitor_id} is_human? {is_human}, {self.cur.fetchall()}") return added_request_count, len(visitors), len(new_visitors)
self.conn.commit()
pmessage(f"Collection Summary: Added {len(new_visitors)} new visitors and {added_requests} new requests.")
def get_id(self, table: str, name: str, insert=True) -> int | None: def get_id(self, table: str, name: str, insert=True) -> int | None:
@ -192,7 +214,8 @@ class Database:
if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})") if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})")
name = sanitize(replace_null(name)) name = sanitize(replace_null(name))
# if non existent, add name # if non existent, add name
if not sql_exists(self.cur, table, [("name", name)]): pdebug(f"get_id(table={table},\tname={name}", lvl=4)
if not sql_exists(self.cur, table, [("name", name)], do_sanitize=False): # double sanitizing might lead to problems with quotes
if not insert: return None if not insert: return None
self.cur.execute(f"INSERT INTO {table} (name) VALUES ('{name}')") self.cur.execute(f"INSERT INTO {table} (name) VALUES ('{name}')")
return self(f"SELECT {table}_id FROM {table} WHERE name = '{name}'")[0][0] return self(f"SELECT {table}_id FROM {table} WHERE name = '{name}'")[0][0]
@ -207,8 +230,7 @@ class Database:
if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})") if not table in supported_tables: raise ValueError(f"table '{table}' is not supported ({supported_tables})")
ret = self(f"SELECT name FROM {table} WHERE {table}_id = '{id_}'") ret = self(f"SELECT name FROM {table} WHERE {table}_id = '{id_}'")
if len(ret) == 0: return None if len(ret) == 0: return None
# TODO check if this returns tuple or value return ret[0][0]
return ret[0]
@ -231,7 +253,7 @@ class Database:
""" """
update the ip_range_id column of visitor with visitor_id update the ip_range_id column of visitor with visitor_id
""" """
results = self(f"SELECT ip_address FROM visitor WHERE visitor_id = {visitor_id}") results = self(f"SELECT ip_address FROM visitor WHERE visitor_id = '{visitor_id}'")
if len(results) == 0: # sanity checks if len(results) == 0: # sanity checks
warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}") warning(f"update_ip_range_id: Invalid visitor_id={visitor_id}")
return return
@ -248,7 +270,9 @@ class Database:
get the id of country of name get the id of country of name
if not present, insert and return id if not present, insert and return id
""" """
if not sql_exists(self.cur, "country", [("name", name)]): name = sanitize(name)
code = sanitize(code)
if not sql_exists(self.cur, "country", [("name", name)], do_sanitize=False):
self.cur.execute(f"INSERT INTO country (name, code) VALUES ('{name}', '{code}')") self.cur.execute(f"INSERT INTO country (name, code) VALUES ('{name}', '{code}')")
countries = self(f"SELECT country_id FROM country WHERE name = '{name}'") countries = self(f"SELECT country_id FROM country WHERE name = '{name}'")
if len(countries) > 0: if len(countries) > 0:
@ -260,9 +284,11 @@ class Database:
return country_id_val return country_id_val
def get_city_id(self, name, region, country_id) -> int: def get_city_id(self, name, region, country_id) -> int:
if not sql_exists(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)]): name = sanitize(name)
region = sanitize(region)
if not sql_exists(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)], do_sanitize=False):
self.cur.execute(f"INSERT INTO city (name, region, country_id) VALUES ('{name}', '{region}', '{country_id}')") self.cur.execute(f"INSERT INTO city (name, region, country_id) VALUES ('{name}', '{region}', '{country_id}')")
cities = sql_select(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)]) cities = sql_select(self.cur, "city", [("name", name), ("region", region), ("country_id", country_id)], do_sanitize=False)
if len(cities) > 0: if len(cities) > 0:
city_id_val = cities[0][0] city_id_val = cities[0][0]
else: else:
@ -283,19 +309,36 @@ class Database:
""" """
# indices for the csv # indices for the csv
FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5 FROM = 0; TO = 1; CODE = 2; COUNTRY = 3; REGION = 4; CITY = 5
# FROM https://stackoverflow.com/questions/845058/how-to-get-line-count-of-a-large-file-cheaply-in-python (Quentin Pradet)
def _count_generator(reader):
b = reader(1024 * 1024)
while b:
yield b
b = reader(1024*1024)
def rawgencount(filename):
with open(filename, "rb") as file:
f_gen = _count_generator(file.raw.read)
return sum( buf.count(b'\n') for buf in f_gen )
pmessage(f"Recreating the GeoIP database from {geoip_city_csv_path}. This might take a long time...")
row_count = rawgencount(geoip_city_csv_path)
pmessage(f"Total rows: {row_count}")
with open(geoip_city_csv_path, 'r') as file: with open(geoip_city_csv_path, 'r') as file:
csv = reader(file, delimiter=',', quotechar='"') csv = reader(file, delimiter=',', quotechar='"')
file.seek(0)
# execute only if file could be opened # execute only if file could be opened
# delete all previous data # delete all previous data
self.cur.execute(f"DELETE FROM ip_range") self.cur.execute(f"DELETE FROM ip_range")
self.cur.execute(f"DELETE FROM city") self.cur.execute(f"DELETE FROM city")
self.cur.execute(f"DELETE FROM country") self.cur.execute(f"DELETE FROM country")
self.conn.commit()
self.cur.execute(f"VACUUM") self.cur.execute(f"VACUUM")
# guarantees that unkown city/country will have id 0 # guarantees that unkown city/country will have id 0
self.cur.execute(f"INSERT INTO country (country_id, name, code) VALUES (0, 'Unknown', 'XX') ") self.cur.execute(f"INSERT INTO country (country_id, name, code) VALUES (0, 'Unknown', 'XX') ")
self.cur.execute(f"INSERT INTO city (city_id, name, region) VALUES (0, 'Unknown', 'Unkown') ") self.cur.execute(f"INSERT INTO city (city_id, name, region) VALUES (0, 'Unknown', 'Unkown') ")
print(f"Recreating the geoip database from {geoip_city_csv_path}. This might take a long time...")
# for combining city ranges into a 'City in <Country>' range # for combining city ranges into a 'City in <Country>' range
# country_id for the range that was last added (for combining multiple csv rows in one ip_range) # country_id for the range that was last added (for combining multiple csv rows in one ip_range)
@ -307,18 +350,22 @@ class Database:
def add_range(low, high, city_name, region, country_id): def add_range(low, high, city_name, region, country_id):
city_id = self.get_city_id(city_name, region, country_id) city_id = self.get_city_id(city_name, region, country_id)
pdebug(f"update_ip_range_id: Adding range for city={city_name}, country_id={country_id}, low={low}, high={high}") pdebug(f"update_ip_range_id: Adding range for city={city_name:20}, country_id={country_id:3}, low={low:16}, high={high:16}", lvl=2)
self.cur.execute(f"INSERT INTO ip_range (low, high, city_id) VALUES ({low}, {high}, {city_id})") self.cur.execute(f"INSERT INTO ip_range (low, high, city_id) VALUES ({low}, {high}, {city_id})")
for row in csv: for i, row in enumerate(csv, 1):
# if i % 100 == 0:
pmessage(f"Updating GeoIP database: {i:7}/{row_count} ({100.0*i/row_count:.2f}%)", end="\r")
# these might contain problematic characters (') # these might contain problematic characters (')
row[CITY] = sanitize(row[CITY]) # row[CITY] = sanitize(row[CITY])
row[COUNTRY] = sanitize(row[COUNTRY]) if row[COUNTRY] == "United Kingdom of Great Britain and Northern Ireland":
row[REGION] = sanitize(row[REGION]) row[COUNTRY] = "United Kingdom"
# row[COUNTRY] = sanitize(row[COUNTRY])
# row[REGION] = sanitize(row[REGION])
# make sure country exists # make sure country exists
country_id = self.get_country_id(row[COUNTRY], row[CODE]) country_id = self.get_country_id(row[COUNTRY], row[CODE])
# only add cities for countries the user is interested in # only add cities for countries the user is interested in
if row[CODE] in settings["get_cities_for_countries"]: if row[CODE] in settings["data-collection"]["get_cities_for_countries"]:
add_range(row[FROM], row[TO], row[CITY], row[REGION], country_id) add_range(row[FROM], row[TO], row[CITY], row[REGION], country_id)
else: else:
# if continuing # if continuing
@ -343,13 +390,13 @@ class Database:
# REQUEST # REQUEST
# #
# TIME/DATE # TIME/DATE
def get_earliest_date(self) -> int: def get_earliest_timestamp(self) -> int:
"""return the earliest time as unixepoch""" """return the earliest time as unixepoch"""
date = self(f"SELECT MIN(time) FROM request")[0][0] date = self(f"SELECT MIN(time) FROM request")[0][0]
if not isinstance(date, int): return 0 if not isinstance(date, int): return 0
else: return date else: return date
def get_latest_date(self) -> int: def get_latest_timestamp(self) -> int:
"""return the latest time as unixepoch""" """return the latest time as unixepoch"""
date = self(f"SELECT MAX(time) FROM request")[0][0] date = self(f"SELECT MAX(time) FROM request")[0][0]
if not isinstance(date, int): return 0 if not isinstance(date, int): return 0

View File

@ -1,7 +1,7 @@
-- see database.uxf -- see database.uxf
CREATE TABLE IF NOT EXISTS visitor( CREATE TABLE IF NOT EXISTS visitor(
visitor_id INTEGER PRIMARY KEY, visitor_id INTEGER PRIMARY KEY,
ip_address INTEGER,
ip_range_id INTEGER, ip_range_id INTEGER,
platform_id INTEGER, platform_id INTEGER,
browser_id INTEGER, browser_id INTEGER,
@ -28,12 +28,12 @@ CREATE TABLE IF NOT EXISTS request(
request_id INTEGER PRIMARY KEY, request_id INTEGER PRIMARY KEY,
visitor_id INTEGER, visitor_id INTEGER,
route_id INTEGER, route_id INTEGER,
referer INTEGER, referer_id INTEGER,
time INTEGER, time INTEGER,
status INTEGER, status INTEGER,
FOREIGN KEY(visitor_id) REFERENCES visitor(visitor_id), FOREIGN KEY(visitor_id) REFERENCES visitor(visitor_id),
FOREIGN KEY(route_id) REFERENCES route(route_id), FOREIGN KEY(route_id) REFERENCES route(route_id),
FOREIGN KEY(referer) REFERENCES referer(referer_id) FOREIGN KEY(referer_id) REFERENCES referer(referer_id)
) STRICT; ) STRICT;
CREATE TABLE IF NOT EXISTS referer( CREATE TABLE IF NOT EXISTS referer(
@ -57,7 +57,7 @@ CREATE TABLE IF NOT EXISTS ip_range(
) STRICT; ) STRICT;
CREATE TABLE IF NOT EXISTS city( CREATE TABLE IF NOT EXISTS city(
city INTEGER PRIMARY KEY, city_id INTEGER PRIMARY KEY,
name TEXT, name TEXT,
region TEXT, region TEXT,
country_id INTEGER, country_id INTEGER,

View File

@ -1,6 +1,40 @@
import sqlite3 as sql import sqlite3 as sql
"""Various utilities""" """Various utilities"""
def get_date_constraint(at_date=None, min_date=None, max_date=None):
"""
get a condition string that sets a condition on the time to a certain date
the conditions can be a string representing a date or an int/float in unixepoch
"""
# dates in unix time
s = ""
if at_date is not None:
if isinstance(at_date, str):
s += f"DATE(time, 'unixepoch') = '{sanitize(at_date)}' AND "
elif isinstance(at_date, int|float):
s += f"time = {int(at_date)} AND "
else:
print(f"WARNING: get_where_date_str: Invalid type of argument at_date: {type(at_date)}")
if min_date is not None:
if isinstance(min_date, str):
s += f"DATE(time, 'unixepoch') >= '{sanitize(min_date)}' AND "
elif isinstance(min_date, int|float):
s += f"time >= {int(min_date)} AND "
else:
print(f"WARNING: get_where_date_str: Invalid type of argument min_date: {type(min_date)}")
if max_date is not None:
if isinstance(max_date, str):
s += f"DATE(time, 'unixepoch') <= '{sanitize(max_date)}' AND "
elif isinstance(max_date, int|float):
s += f"time <= {int(max_date)} AND "
else:
print(f"WARNING: get_where_date_str: Invalid type of argument max_date: {type(max_date)}")
if s == "":
print(f"WARNING: get_where_date_str: no date_str generated. Returning 'time > 0'. at_date={at_date}, min_date={min_date}, max_date={max_date}")
return "time > 0"
return s.removesuffix(" AND ")
def replace_null(s): def replace_null(s):
if not s: if not s:
return "None" return "None"
@ -11,10 +45,11 @@ def sanitize(s):
return s.replace("'", r"''").strip(" ") return s.replace("'", r"''").strip(" ")
# .replace('"', r'\"')\ # .replace('"', r'\"')\
def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND") -> str: def sql_get_constaint_str(constraints: list[tuple[str, str|int]], logic="AND", do_sanitize=True) -> str:
c_str = "" c_str = ""
for name, val in constraints: for name, val in constraints:
c_str += f"{name} = '{sanitize(val)}' {logic} " if do_sanitize: val = sanitize(val)
c_str += f"{name} = '{val}' {logic} "
return c_str.strip(logic + " ") return c_str.strip(logic + " ")
def sql_get_value_str(values: list[list]) -> str: def sql_get_value_str(values: list[list]) -> str:
@ -25,12 +60,12 @@ def sql_get_value_str(values: list[list]) -> str:
c_str = c_str.strip(", ") + "), " c_str = c_str.strip(", ") + "), "
return c_str.strip(", ") return c_str.strip(", ")
def sql_exists(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND") -> bool: def sql_exists(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND", do_sanitize=True) -> bool:
cur.execute(f"SELECT EXISTS (SELECT 1 FROM {table} WHERE {sql_get_constaint_str(constraints, logic)})") cur.execute(f"SELECT EXISTS (SELECT 1 FROM {table} WHERE {sql_get_constaint_str(constraints, logic, do_sanitize=do_sanitize)})")
return cur.fetchone()[0] == 1 return cur.fetchone()[0] == 1
def sql_select(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND"): def sql_select(cur: sql.Cursor, table: str, constraints: list[tuple[str, str|int]], logic="AND", do_sanitize=True):
cur.execute(f"SELECT * FROM {table} WHERE {sql_get_constaint_str(constraints, logic)}") cur.execute(f"SELECT * FROM {table} WHERE {sql_get_constaint_str(constraints, logic, do_sanitize=do_sanitize)}")
return cur.fetchall() return cur.fetchall()
def sql_insert(cur: sql.Cursor, table: str, values: list[list]): def sql_insert(cur: sql.Cursor, table: str, values: list[list]):