add sitemap.xml support
This commit is contained in:
parent
bdc3522f7f
commit
3180892cc7
80
Makefile
80
Makefile
@ -17,46 +17,77 @@
|
|||||||
# change these to fir your project
|
# change these to fir your project
|
||||||
#
|
#
|
||||||
|
|
||||||
# root dir for the project, all other paths relative to PROJECT_DIR (except for OUT_DIR and DEP_DIR)
|
# root dir for the project, most other paths are relative to PROJECT_DIR
|
||||||
|
# [absolute or relative to current working directory]
|
||||||
PROJECT_DIR = src
|
PROJECT_DIR = src
|
||||||
|
|
||||||
# path where final website will be in, this one is not relative to PROJECT_DIR
|
# path where final website will be in
|
||||||
|
# [absolute or relative to current working directory]
|
||||||
OUT_DIR = build
|
OUT_DIR = build
|
||||||
|
|
||||||
# SOURCE FILES:
|
# SOURCE FILES:
|
||||||
# all SRC_FLS and all files (recursively) in the SRC_DIRS will be built
|
# all SRC_FLS and all files (recursively) in the SRC_DIRS will be built
|
||||||
# all files in PROJECT_DIR (not recursively) are source files
|
# all files in PROJECT_DIR (not recursively) are source files
|
||||||
|
# [relative to PROJECT_DIR]
|
||||||
SRC_DIRS = de en script
|
SRC_DIRS = de en script
|
||||||
SRC_FLS =
|
SRC_FLS =
|
||||||
|
|
||||||
# CSS FILES:
|
# CSS FILES:
|
||||||
# directories which may contain sass and scss to compile sass to a correspondig css in OUT_DIR/CSS_DIR (also css, it will simply be copied)
|
# directories which may contain sass and scss to compile sass to a correspondig css in OUT_DIR/CSS_DIR (also css, it will simply be copied)
|
||||||
|
# [relative to PROJECT_DIR]
|
||||||
CSS_DIRS = style
|
CSS_DIRS = style
|
||||||
CSS_FILES =
|
CSS_FILES =
|
||||||
|
|
||||||
# RESOURCE FILES:
|
# RESOURCE FILES:
|
||||||
# all RESOURCE_FLS and all files in the RESOURCE_DIRS will be copied to OUT_DIR
|
# all RESOURCE_FLS and all files in the RESOURCE_DIRS will be copied to OUT_DIR
|
||||||
|
# [relative to PROJECT_DIR]
|
||||||
RESOURCE_DIRS = resources
|
RESOURCE_DIRS = resources
|
||||||
RESOURCE_FLS =
|
RESOURCE_FLS =
|
||||||
|
|
||||||
# THUMBNAILS:
|
# THUMBNAILS:
|
||||||
# if set, thumbnails for all resource files will be generated and placed in THUMB_OUT_DIR (relative to OUT_DIR)
|
# if set, thumbnails for all resource files having an extension in THUMB_FOR_TYPES will be generated and placed relative to THUMB_OUT_DIR
|
||||||
|
# [relative to OUT_DIR]
|
||||||
THUMB_OUT_DIR = thumbs
|
THUMB_OUT_DIR = thumbs
|
||||||
|
# build thumbnails for these types: supported: mp3, flac, wav, pdf and all image formats that magick can handle
|
||||||
|
THUMB_FOR_TYPES = png gif jpg jpeg webp pdf mp4 mp3 flac wav
|
||||||
|
# filetype for the thumbnails. (pdfs will always have .jpg)
|
||||||
|
THUMB_TYPE = jpg
|
||||||
|
# size for the thumbnails (not respected by pdf)
|
||||||
|
THUMB_SIZE = 300
|
||||||
|
|
||||||
# MULTI-LANG SOURCE FILES:
|
# MULTI-LANG SOURCE FILES:
|
||||||
# the files in COMMON_DIR will be built for all LANGS:
|
# the files in COMMON_DIR will be built for all LANGS:
|
||||||
|
# for example:
|
||||||
|
# LANGS = de en
|
||||||
|
# PROJECT_DIR/COMMON_DIR/home.html
|
||||||
|
# -> OUT_DIR/de/home.html
|
||||||
|
# -> OUT_DIR/en/home.html
|
||||||
# foreach html-file in COMMON_DIR:
|
# foreach html-file in COMMON_DIR:
|
||||||
# foreach lang in LANGS:
|
# foreach lang in LANGS:
|
||||||
# run HTML_PP_CMD with --var lang=lang on file and output to OUT_DIR without the COMMON_DIR prefix, so COMMON_DIR/subdir/file.html -> OUT_DIR/lang/subdir/file.html
|
# run HTML_PP_CMD with --var lang=lang on file and output to OUT_DIR without the COMMON_DIR prefix, so COMMON_DIR/subdir/file.html -> OUT_DIR/lang/subdir/file.html
|
||||||
# all non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied
|
# For all .html files, the proprocessor will make the variable `lang` available, for example lang=de
|
||||||
|
# All non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied.
|
||||||
# leave COMMON_DIR empty to disable multi-lang feature
|
# leave COMMON_DIR empty to disable multi-lang feature
|
||||||
COMMON_DIR =
|
# [relative to PROJECT_DIR]
|
||||||
|
COMMON_DIR = common
|
||||||
LANGS = de en
|
LANGS = de en
|
||||||
|
|
||||||
|
# SITEMAP
|
||||||
|
# sitemap relative to OUT_DIR, leave blank to not generate a sitemap [relative to OUT_DIR]
|
||||||
|
SITEMAP = sitemap.xml
|
||||||
|
# base url of the website, without trailing /
|
||||||
|
WEBSITE_URL = https://quintern.xyz
|
||||||
|
# file required during build process for sitemap generation [absolute or relative to current working directory]
|
||||||
|
SITEMAP_TEMP_FILE = .sitemap.pkl
|
||||||
|
# comment to keep the file extension on sitemap entries
|
||||||
|
SITEMAP_REMOVE_EXT = 1
|
||||||
|
|
||||||
# PREPROCESSOR
|
# PREPROCESSOR
|
||||||
# path to of the files that should be included
|
# path to of the files that should be included
|
||||||
|
# [relative to PROJECT_DIR]
|
||||||
INCLUDE_DIR = include
|
INCLUDE_DIR = include
|
||||||
# additional search paths passed to sass compiler
|
# additional search paths passed to sass compiler
|
||||||
|
# [relative to PROJECT_DIR]
|
||||||
SASS_INCLUDE_DIRS = include/style
|
SASS_INCLUDE_DIRS = include/style
|
||||||
|
|
||||||
|
|
||||||
@ -68,10 +99,10 @@ HTML_PP_CMD = python3 html-preprocessor --exit-on light
|
|||||||
# --source-maps-urls=absolute is appended for generating dependency files
|
# --source-maps-urls=absolute is appended for generating dependency files
|
||||||
SASS_CMD = sass --color
|
SASS_CMD = sass --color
|
||||||
|
|
||||||
|
# [absolute or relative to current working directory]
|
||||||
DEP_DIR = .dependencies
|
DEP_DIR = .dependencies
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# NOT SETTINGS ANYMORE
|
# NOT SETTINGS ANYMORE
|
||||||
# DO NOT CHANGE ANYTHING HERE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
# DO NOT CHANGE ANYTHING HERE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||||
@ -121,10 +152,8 @@ ML_OUT_FLS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_D
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef THUMB_OUT_DIR
|
ifdef THUMB_OUT_DIR
|
||||||
_THUMB_FOR_TYPES = png gif jpg jpeg webp pdf
|
|
||||||
_THUMB_TYPE = jpg
|
|
||||||
# files for which to generate thumbnails
|
# files for which to generate thumbnails
|
||||||
_THUMB_FLS = $(filter $(foreach type, $(_THUMB_FOR_TYPES), %.$(type)), $(_RES_FLS))
|
_THUMB_FLS = $(filter $(foreach type, $(THUMB_FOR_TYPES), %.$(type)), $(_RES_FLS))
|
||||||
THUMB_OUT_FLS = $(addsuffix .jpg, $(basename $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/$(THUMB_OUT_DIR)/%, $(_THUMB_FLS))))
|
THUMB_OUT_FLS = $(addsuffix .jpg, $(basename $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/$(THUMB_OUT_DIR)/%, $(_THUMB_FLS))))
|
||||||
THUMB_OUT_DIRS = $(sort $(dir $(THUMB_OUT_FLS))) # sort for removing duplicates
|
THUMB_OUT_DIRS = $(sort $(dir $(THUMB_OUT_FLS))) # sort for removing duplicates
|
||||||
endif
|
endif
|
||||||
@ -134,6 +163,14 @@ _DEP_DIRS = $(sort $(patsubst $(OUT_DIR)/%, $(DEP_DIR)/%, $(OUT_DIRS) $(ML_OUT
|
|||||||
# needed for reading
|
# needed for reading
|
||||||
_DEP_FLS = $(shell find $(DEP_DIR) -type f -name '*.d' 2>/dev/null)
|
_DEP_FLS = $(shell find $(DEP_DIR) -type f -name '*.d' 2>/dev/null)
|
||||||
|
|
||||||
|
ifdef SITEMAP
|
||||||
|
_SITEMAP = $(addprefix $(OUT_DIR)/, $(SITEMAP))
|
||||||
|
HTML_PP_CMD += --sitemap-temp-file "$(SITEMAP_TEMP_FILE)" --sitemap-base-url $(WEBSITE_URL) --sitemap-webroot-dir "$(OUT_DIR)"
|
||||||
|
endif
|
||||||
|
ifdef SITEMAP_REMOVE_EXT
|
||||||
|
HTML_PP_CMD += --sitemap-remove-ext
|
||||||
|
endif
|
||||||
|
|
||||||
# SASS, add load-paths
|
# SASS, add load-paths
|
||||||
_SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-path=$(includedir)) --source-map-urls=absolute
|
_SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-path=$(includedir)) --source-map-urls=absolute
|
||||||
|
|
||||||
@ -145,6 +182,7 @@ FMT_OUT_HTML ="\e[1;34mBuilding html\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
|||||||
FMT_OUT_CSS ="\e[1;34mBuilding css\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
FMT_OUT_CSS ="\e[1;34mBuilding css\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
||||||
FMT_OUT_THUMB ="\e[1;34mBuilding thumbnail\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
FMT_OUT_THUMB ="\e[1;34mBuilding thumbnail\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
||||||
FMT_OUT_OTHER ="\e[1;34mBuilding\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
FMT_OUT_OTHER ="\e[1;34mBuilding\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
||||||
|
FMT_OUT_SITEMAP ="\e[1;34mBuilding sitemap\e[0m: \e[1;35m%s\e[0m\n"
|
||||||
|
|
||||||
FMT_OUT_ML_HTML="\e[1;34mBuilding html\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
FMT_OUT_ML_HTML="\e[1;34mBuilding html\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
||||||
FMT_OUT_ML_OTHER="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
FMT_OUT_ML_OTHER="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
|
||||||
@ -159,7 +197,7 @@ FMT_OUT_ML_OTHER="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m
|
|||||||
include $(_DEP_FLS)
|
include $(_DEP_FLS)
|
||||||
|
|
||||||
all: normal multilang resources thumbnails
|
all: normal multilang resources thumbnails
|
||||||
normal: $(OUT_FLS)
|
normal: $(_SITEMAP) $(OUT_FLS)
|
||||||
multilang: $(ML_OUT_FLS)
|
multilang: $(ML_OUT_FLS)
|
||||||
resources: $(RES_OUT_FLS)
|
resources: $(RES_OUT_FLS)
|
||||||
thumbnails: $(THUMB_OUT_FLS)
|
thumbnails: $(THUMB_OUT_FLS)
|
||||||
@ -217,11 +255,19 @@ $(OUT_DIR)/$(THUMB_OUT_DIR)/%.jpg: | $(THUMB_OUT_DIRS)
|
|||||||
sources=($(_THUMB_FLS)); \
|
sources=($(_THUMB_FLS)); \
|
||||||
source=$$(printf "%s\n" $${sources[@]} | grep "$$target"'\.'); \
|
source=$$(printf "%s\n" $${sources[@]} | grep "$$target"'\.'); \
|
||||||
printf $(FMT_OUT_THUMB) "$$source" "$$fulltarget"; \
|
printf $(FMT_OUT_THUMB) "$$source" "$$fulltarget"; \
|
||||||
if [ "$${source##*.}" = "pdf" ]; then \
|
case "$${source##*.}" in \
|
||||||
pdftoppm -f 1 -singlefile -jpeg -r 50 "$$source" "$${fulltarget%.*}"; \
|
"mp4-") ffmpegthumbnailer -i "$$source" -o "$$fulltarget" -s 300 -q 5;; \
|
||||||
else \
|
"pdf") pdftoppm -f 1 -singlefile -jpeg -r 50 "$$source" "$${fulltarget%.*}";; \
|
||||||
magick "$$source" -thumbnail '100x100>' "$@"; \
|
"mp3"|"flac"|"wav") ffmpeg -hide_banner -i "$$source" "$$fulltarget" -y >/dev/null;; \
|
||||||
fi; \
|
"*") magick "$$source[0]" -thumbnail '$(THUMB_SIZE)x$(THUMB_SIZE)>' "$@";; \
|
||||||
|
esac
|
||||||
|
|
||||||
|
# SITEMAP
|
||||||
|
ifdef _SITEMAP
|
||||||
|
$(_SITEMAP): $(OUT_FLS) $(ML_OUT_FLS) # build sitemap after all other files
|
||||||
|
@printf $(FMT_OUT_SITEMAP) "$@"
|
||||||
|
@$(HTML_PP_CMD) --sitemap-generate "$@"
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -249,11 +295,13 @@ $(OUT_DIR)/%.css: $(PROJECT_DIR)/%.scss | $(OUT_DIRS) $(_DEP_DIRS)
|
|||||||
jq -r '.sources | @sh' $@.map | tr -d \' | sed 's|file://||g' >> "$$depfile"; \
|
jq -r '.sources | @sh' $@.map | tr -d \' | sed 's|file://||g' >> "$$depfile"; \
|
||||||
rm $@.map
|
rm $@.map
|
||||||
|
|
||||||
|
# this rule must be last!
|
||||||
$(OUT_DIR)/%: $(PROJECT_DIR)/% | $(OUT_DIRS) $(RES_OUT_DIRS)
|
$(OUT_DIR)/%: $(PROJECT_DIR)/% | $(OUT_DIRS) $(RES_OUT_DIRS)
|
||||||
@printf $(FMT_OUT_OTHER) "$<" "$@"
|
@printf $(FMT_OUT_OTHER) "$<" "$@"
|
||||||
@cp -r $< $@
|
@cp -r $< $@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# .DEFAULT:
|
# .DEFAULT:
|
||||||
# @echo "MISSING RULE: $@"
|
# @echo "MISSING RULE: $@"
|
||||||
|
|
||||||
@ -264,7 +312,7 @@ stop:
|
|||||||
killall nginx
|
killall nginx
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-@rm $(OUT_FLS) $(ML_OUT_FLS) 2>/dev/null
|
-@rm $(OUT_FLS) $(ML_OUT_FLS) $(SITEMAP_TEMP_FILE) $(SITEMAP) 2>/dev/null
|
||||||
-@rm -r $(DEP_DIR) 2>/dev/null
|
-@rm -r $(DEP_DIR) 2>/dev/null
|
||||||
|
|
||||||
cleaner:
|
cleaner:
|
||||||
|
62
README.md
62
README.md
@ -174,6 +174,7 @@ An entry is a html heading with a id: `<h1 id=myheading>This heading will be lin
|
|||||||
`<!-- #sidenav sidenav-command arguments -->`
|
`<!-- #sidenav sidenav-command arguments -->`
|
||||||
sidenav-command must be one of the following:
|
sidenav-command must be one of the following:
|
||||||
|
|
||||||
|
|
||||||
#### `include`
|
#### `include`
|
||||||
Include the generated sidenav at this position. This command will always be executed last, after the whole file has been parsed.
|
Include the generated sidenav at this position. This command will always be executed last, after the whole file has been parsed.
|
||||||
|
|
||||||
@ -183,6 +184,7 @@ Ignored
|
|||||||
**Return Value**:
|
**Return Value**:
|
||||||
The generated sidenav
|
The generated sidenav
|
||||||
|
|
||||||
|
|
||||||
#### `section`
|
#### `section`
|
||||||
Group all following entries in named section. `section` may not appear in conditional blocks and multiline comments.
|
Group all following entries in named section. `section` may not appear in conditional blocks and multiline comments.
|
||||||
|
|
||||||
@ -192,6 +194,7 @@ The name of the section
|
|||||||
**Return Value**
|
**Return Value**
|
||||||
Empty string
|
Empty string
|
||||||
|
|
||||||
|
|
||||||
#### `name`
|
#### `name`
|
||||||
Use a custom name instead of the heading itself for the link to the next heading.
|
Use a custom name instead of the heading itself for the link to the next heading.
|
||||||
|
|
||||||
@ -201,6 +204,7 @@ The link-name of the next heading
|
|||||||
**Return Value**:
|
**Return Value**:
|
||||||
Empty string
|
Empty string
|
||||||
|
|
||||||
|
|
||||||
#### `custom`
|
#### `custom`
|
||||||
Include a custom link in the sidenav.
|
Include a custom link in the sidenav.
|
||||||
|
|
||||||
@ -215,6 +219,64 @@ Empty string
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
### sitemap
|
||||||
|
Used for automatically generating a `sitemap.xml` for the website.
|
||||||
|
|
||||||
|
#### `include`
|
||||||
|
Include the current page in the sitemap
|
||||||
|
|
||||||
|
**Synopsis**:
|
||||||
|
`<!-- #sitemap include -->`
|
||||||
|
`<!-- #sitemap include https://use.custom.link/for-this/site -->`
|
||||||
|
|
||||||
|
**Argument**:
|
||||||
|
Optional: Use a different link for this page
|
||||||
|
|
||||||
|
**Return Value**:
|
||||||
|
Empty string
|
||||||
|
|
||||||
|
|
||||||
|
#### `priority`
|
||||||
|
Set the `priority` field
|
||||||
|
|
||||||
|
**Synopsis**:
|
||||||
|
`<!-- #sitemap priority 0.8 -->`
|
||||||
|
|
||||||
|
**Argument**:
|
||||||
|
Float between 0.0 and 1.0
|
||||||
|
|
||||||
|
**Return Value**:
|
||||||
|
Empty string
|
||||||
|
|
||||||
|
|
||||||
|
#### `changefreq`
|
||||||
|
Set the `changefreq` field
|
||||||
|
|
||||||
|
**Synopsis**:
|
||||||
|
`<!-- #sitemap changefreq never -->`
|
||||||
|
|
||||||
|
**Argument**:
|
||||||
|
One of *always, hourly, daily, weekly, monthly, yearly, never*
|
||||||
|
|
||||||
|
**Return Value**:
|
||||||
|
Empty string
|
||||||
|
|
||||||
|
|
||||||
|
#### `lastmod`
|
||||||
|
Set the `lastmod` field
|
||||||
|
|
||||||
|
**Synopsis**:
|
||||||
|
`<!-- #sitemap lastmod 2023-12-29T14:00:05+01:00 -->`
|
||||||
|
|
||||||
|
**Argument**:
|
||||||
|
The lastmod date in w3c date format
|
||||||
|
|
||||||
|
**Return Value**:
|
||||||
|
Empty string
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
## Pitfalls
|
## Pitfalls
|
||||||
- The `#include` command must not be in the last line of the file
|
- The `#include` command must not be in the last line of the file
|
||||||
- The `#include` command can not be in multi-line comment if the included file also contains comments
|
- The `#include` command can not be in multi-line comment if the included file also contains comments
|
||||||
|
@ -5,6 +5,7 @@ import re
|
|||||||
from sys import argv
|
from sys import argv
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
import argparse
|
import argparse
|
||||||
|
import pickle
|
||||||
|
|
||||||
"""
|
"""
|
||||||
TODO:
|
TODO:
|
||||||
@ -27,6 +28,11 @@ sidenav_content_section = "<li class=\"sidenav_section\">#name</li>"
|
|||||||
|
|
||||||
exit_on_include_failure = False
|
exit_on_include_failure = False
|
||||||
|
|
||||||
|
sitemap_begin = """\
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n"""
|
||||||
|
sitemap_end = "</urlset>"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
************************************************************ REGULAR EXPRESSIONS ************************************************************
|
************************************************************ REGULAR EXPRESSIONS ************************************************************
|
||||||
"""
|
"""
|
||||||
@ -47,6 +53,10 @@ re_variable_use = r"#\$\(([a-zA-Z0-9_]+)\)"
|
|||||||
""" only in comments """
|
""" only in comments """
|
||||||
re_preprocessor_command = r"#([a-zA-Z]+) *(.*) *"
|
re_preprocessor_command = r"#([a-zA-Z]+) *(.*) *"
|
||||||
|
|
||||||
|
# https://www.w3.org/TR/NOTE-datetime
|
||||||
|
re_w3cdate = r"\d{4}-(?)]-\d{2}"
|
||||||
|
r"\d{4}-(?:0[1-9]|1[0-2])-(?:[0-2]\d|3[01])(T(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d([\+\-](?:0\d|1[0-2]):[0-5]\d)?)?"
|
||||||
|
|
||||||
COMMENT_BEGIN = "<!--"
|
COMMENT_BEGIN = "<!--"
|
||||||
COMMENT_END = "-->"
|
COMMENT_END = "-->"
|
||||||
|
|
||||||
@ -67,6 +77,9 @@ error_levels = {
|
|||||||
}
|
}
|
||||||
exit_on_error_level = error_levels["serious"]
|
exit_on_error_level = error_levels["serious"]
|
||||||
|
|
||||||
|
# url that the currently processed file have
|
||||||
|
current_file_url = ""
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
************************************************************ UTILITY ************************************************************
|
************************************************************ UTILITY ************************************************************
|
||||||
@ -122,6 +135,86 @@ def evaluate_condition(input_string) -> bool:
|
|||||||
error(f"Pythonized condition is invalid: {condition}", level=error_levels["light"])
|
error(f"Pythonized condition is invalid: {condition}", level=error_levels["light"])
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
"""
|
||||||
|
************************************************************ SITEMAP ************************************************************
|
||||||
|
"""
|
||||||
|
class Sitemap:
|
||||||
|
urls:dict = {}
|
||||||
|
def __init__(self, url=None):
|
||||||
|
self.url = url
|
||||||
|
self.priority = None
|
||||||
|
self.changefreq = None
|
||||||
|
self.lastmod = None
|
||||||
|
|
||||||
|
def set_url(self, url):
|
||||||
|
self.url = url
|
||||||
|
|
||||||
|
def set_priority(self, priority):
|
||||||
|
try:
|
||||||
|
priority = float(priority)
|
||||||
|
except ValueError:
|
||||||
|
error(f"Sitemap: invalid priority: '{priority}'", level=error_levels["serious"])
|
||||||
|
if not (type(priority) == float and 0.0 <= priority and priority <= 1.0):
|
||||||
|
error(f"Sitemap: invalid priority: '{priority}'", level=error_levels["serious"])
|
||||||
|
self.priority = priority
|
||||||
|
|
||||||
|
def set_changefreq(self, changefreq):
|
||||||
|
if not (type(changefreq) == str and changefreq in ["always", "hourly", "daily", "weekly", "monthly", "yearly", "never"]):
|
||||||
|
error(f"Sitemap: invalid changefreq: '{changefreq}'", level=error_levels["serious"])
|
||||||
|
self.changefreq = changefreq
|
||||||
|
|
||||||
|
def set_lastmod(self, lastmod):
|
||||||
|
if not (type(lastmod) == str and re.fullmatch(re_w3cdate, lastmod)):
|
||||||
|
error(f"Sitemap: invalid lastmod: '{lastmod}'", level=error_levels["serious"])
|
||||||
|
self.lastmod = lastmod
|
||||||
|
|
||||||
|
def get_entry(self):
|
||||||
|
s = f"<url>\n\t<loc>{self.url}</loc>"
|
||||||
|
if self.priority is not None: s += f"\n\t<priority>{self.priority}</priority>"
|
||||||
|
if self.changefreq is not None: s += f"\n\t<changefreq>{self.changefreq}</changefreq>"
|
||||||
|
if self.lastmod is not None: s += f"\n\t<lastmod>{self.lastmod}</lastmod>"
|
||||||
|
s += "\n</url>"
|
||||||
|
return s
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"Sitemap(url={self.url}, priority={self.priority}, changefreq={self.changefreq}, lastmod={self.lastmod})"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def gen_sidemap():
|
||||||
|
s = sitemap_begin
|
||||||
|
for url in Sitemap.urls.values():
|
||||||
|
s += "\t" + url.get_entry().replace("\n", "\n\t").strip("\t") + "\n"
|
||||||
|
s += sitemap_end
|
||||||
|
return s
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def cmd_sitemap(args:str, variables:dict[str,str]) -> str:
|
||||||
|
space = args.find(" ")
|
||||||
|
if space < 0:
|
||||||
|
space = len(args)
|
||||||
|
cmd = args[:space]
|
||||||
|
cmd_args = ""
|
||||||
|
|
||||||
|
if 0 < space and space < len(args) - 1:
|
||||||
|
cmd_args = args[space+1:].strip(" ")
|
||||||
|
pdebug(f"cmd_sitemap: cmd='{cmd}' cmd_args='{cmd_args}'")
|
||||||
|
if not current_file_url in Sitemap.urls:
|
||||||
|
Sitemap.urls[current_file_url] = Sitemap()
|
||||||
|
if cmd == "include":
|
||||||
|
if cmd_args:
|
||||||
|
Sitemap.urls[current_file_url].set_url(cmd_args)
|
||||||
|
else:
|
||||||
|
Sitemap.urls[current_file_url].set_url(current_file_url)
|
||||||
|
elif cmd == "priority":
|
||||||
|
Sitemap.urls[current_file_url].set_priority(cmd_args)
|
||||||
|
elif cmd == "changefreq":
|
||||||
|
Sitemap.urls[current_file_url].set_changefreq(cmd_args)
|
||||||
|
elif cmd == "lastmod":
|
||||||
|
Sitemap.urls[current_file_url].set_lastmod(cmd_args)
|
||||||
|
else:
|
||||||
|
error(f"cmd_sitemap: Invalid command '{cmd}'", error_levels["serious"])
|
||||||
|
ptrace(f"Sitemap[{current_file_url}] is now: {Sitemap.urls[current_file_url]}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -182,9 +275,9 @@ class Sidenav:
|
|||||||
space = len(args)
|
space = len(args)
|
||||||
cmd = args[:space]
|
cmd = args[:space]
|
||||||
cmd_args = ""
|
cmd_args = ""
|
||||||
pdebug(f"cmd_sidenav: cmd='{cmd}' cmd_args='{cmd_args}'")
|
|
||||||
if 0 < space and space < len(args) - 1:
|
if 0 < space and space < len(args) - 1:
|
||||||
cmd_args = args[space+1:].strip(" ")
|
cmd_args = args[space+1:].strip(" ")
|
||||||
|
pdebug(f"cmd_sidenav: cmd='{cmd}' cmd_args='{cmd_args}'")
|
||||||
if cmd == "skip":
|
if cmd == "skip":
|
||||||
Sidenav.skipNext()
|
Sidenav.skipNext()
|
||||||
elif cmd == "section":
|
elif cmd == "section":
|
||||||
@ -354,6 +447,7 @@ command2function:dict[str, Callable[[str, dict[str,str]], str]] = {
|
|||||||
"comment": cmd_comment,
|
"comment": cmd_comment,
|
||||||
"uncomment": cmd_uncomment,
|
"uncomment": cmd_uncomment,
|
||||||
"sidenav": Sidenav.cmd_sidenav,
|
"sidenav": Sidenav.cmd_sidenav,
|
||||||
|
"sitemap": Sitemap.cmd_sitemap,
|
||||||
"warning": cmd_warning,
|
"warning": cmd_warning,
|
||||||
"error": cmd_error,
|
"error": cmd_error,
|
||||||
}
|
}
|
||||||
@ -620,15 +714,20 @@ def substitute_variables(html:str, variables:dict[str, str]):
|
|||||||
"""
|
"""
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(prog="bUwUma html preprocessor")
|
parser = argparse.ArgumentParser(prog="bUwUma html preprocessor")
|
||||||
parser.add_argument("--input", action="store", help="path to the input file", required=True)
|
parser.add_argument("--input", action="store", help="path to the input file", default="")
|
||||||
parser.add_argument("--output", action="store", help="output to this file", default="")
|
parser.add_argument("--output", action="store", help="output to this file", default="")
|
||||||
parser.add_argument("--inplace", action="store_true", help="overwrite input file")
|
parser.add_argument("--inplace", action="store_true", help="overwrite input file")
|
||||||
|
parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False)
|
||||||
parser.add_argument("--var", action="append", help="set a variable --var varname=value", default=[])
|
parser.add_argument("--var", action="append", help="set a variable --var varname=value", default=[])
|
||||||
parser.add_argument("--output-deps", action="store", help="output a Makefile listing all dependencies", default="")
|
parser.add_argument("--output-deps", action="store", help="output a Makefile listing all dependencies", default="")
|
||||||
|
parser.add_argument("--sitemap-generate", action="store", help="generate the sitemap from the sitemap-temp-file", default="")
|
||||||
|
parser.add_argument("--sitemap-temp-file", action="store", help="file for storing sitemap data during build process", default="/tmp/sitemap.pkl")
|
||||||
|
parser.add_argument("--sitemap-webroot-dir", action="store", help="directory of the webroot, without trailing /. This will be removed from the output path for generating the sitemap url entry", default="")
|
||||||
|
parser.add_argument("--sitemap-base-url", action="store", help="base url of the website, without trailing /", default="https://www.example.com")
|
||||||
|
parser.add_argument("--sitemap-remove-ext", action="store_true", help="remove the file extenstion in the sitemap entry")
|
||||||
parser.add_argument("--exit-on", action="store", help="exit when an error of the given severity occures", choices=["light", "serious", "critical"], default="serious")
|
parser.add_argument("--exit-on", action="store", help="exit when an error of the given severity occures", choices=["light", "serious", "critical"], default="serious")
|
||||||
parser.add_argument("--debug", action="store_true", help="be more verbose", default=False)
|
parser.add_argument("--debug", action="store_true", help="be more verbose", default=False)
|
||||||
parser.add_argument("--trace", action="store_true", help="be extremly verbose", default=False)
|
parser.add_argument("--trace", action="store_true", help="be extremly verbose", default=False)
|
||||||
parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False)
|
|
||||||
variables:dict[str, str] = {}
|
variables:dict[str, str] = {}
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -643,47 +742,81 @@ if __name__ == "__main__":
|
|||||||
args.input = args.input.strip(" ")
|
args.input = args.input.strip(" ")
|
||||||
args.output = args.output.strip(" ")
|
args.output = args.output.strip(" ")
|
||||||
args.output_deps = args.output_deps.strip(" ")
|
args.output_deps = args.output_deps.strip(" ")
|
||||||
|
args.sitemap_temp_file = args.sitemap_temp_file.strip(" ")
|
||||||
|
args.sitemap_generate = args.sitemap_generate.strip(" ")
|
||||||
TRACE = args.trace
|
TRACE = args.trace
|
||||||
if args.trace: args.debug = True
|
if args.trace: args.debug = True
|
||||||
DEBUG = args.debug
|
DEBUG = args.debug
|
||||||
|
|
||||||
# sanity checks
|
# either input file or sitemap_generate is required
|
||||||
if not path.isfile(args.input):
|
if not (bool(args.input) ^ bool(args.sitemap_generate)):
|
||||||
parser.error(f"Invalid input file:: {args.input}")
|
parser.error(f"Exactly one if --input or --sitemap-generate must be given")
|
||||||
if args.output:
|
|
||||||
if not path.isdir(path.dirname(args.output)):
|
|
||||||
parser.error(f"Invalid path to output file - directory does not exist: '{path.dirname(args.output)}'")
|
|
||||||
elif args.inplace:
|
|
||||||
args.output = args.input
|
|
||||||
if args.inplace and args.output:
|
|
||||||
parser.error(f"Only one of --output or --inplace mut be given")
|
|
||||||
if args.output_deps:
|
|
||||||
if not path.isdir(path.dirname(args.output_deps)):
|
|
||||||
parser.error(f"Invalid path to dependency file - directory does not exist: '{path.dirname(args.output_deps)}'")
|
|
||||||
if not args.output:
|
|
||||||
parser.error(f"--output-deps requires either --output <file> our --inplace")
|
|
||||||
|
|
||||||
# get html
|
if args.input:
|
||||||
with open(args.input, "r") as file:
|
if args.sitemap_webroot_dir:
|
||||||
target_html = file.read()
|
current_file_url = args.sitemap_base_url + args.output.replace(args.sitemap_webroot_dir, "")
|
||||||
|
else:
|
||||||
|
current_file_url = args.sitemap_base_url + args.output
|
||||||
|
|
||||||
output_html = parse_file(target_html, variables, not args.preserve_comments)
|
if args.sitemap_remove_ext:
|
||||||
# remove empty lines
|
current_file_url = os.path.splitext(current_file_url)[0]
|
||||||
output_html = re.sub(r"[\t\r ]*\n(?:[\t\r ]*\n[\t\r ]*)+", r"\n", output_html)
|
|
||||||
|
|
||||||
# pdebug(f"Output: {output_html}")
|
pdebug(f"current_file={current_file_url}")
|
||||||
|
|
||||||
# save
|
# sanity checks
|
||||||
if args.output:
|
if not path.isfile(args.input):
|
||||||
with open(args.output, "w") as file:
|
parser.error(f"Invalid input file:: {args.input}")
|
||||||
file.write(output_html)
|
if args.output:
|
||||||
else:
|
if not path.isdir(path.dirname(args.output)):
|
||||||
print(output_html)
|
parser.error(f"Invalid path to output file - directory does not exist: '{path.dirname(args.output)}'")
|
||||||
|
elif args.inplace:
|
||||||
|
args.output = args.input
|
||||||
|
if args.inplace and args.output:
|
||||||
|
parser.error(f"Only one of --output or --inplace mut be given")
|
||||||
|
if args.output_deps:
|
||||||
|
if not path.isdir(path.dirname(args.output_deps)):
|
||||||
|
parser.error(f"Invalid path to dependency file - directory does not exist: '{path.dirname(args.output_deps)}'")
|
||||||
|
if not args.output:
|
||||||
|
parser.error(f"--output-deps requires either --output <file> our --inplace")
|
||||||
|
|
||||||
if args.output_deps:
|
if args.sitemap_temp_file:
|
||||||
if args.output != args.input:
|
if path.isfile(args.sitemap_temp_file):
|
||||||
glob_dependcies.append(args.input)
|
with open(args.sitemap_temp_file, "rb") as file:
|
||||||
depfile = generate_dependecy_file(args.output, glob_dependcies)
|
Sitemap.urls = pickle.load(file)
|
||||||
pdebug(f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}")
|
|
||||||
with open(args.output_deps, "w") as file:
|
# get html
|
||||||
file.write(depfile)
|
with open(args.input, "r") as file:
|
||||||
|
target_html = file.read()
|
||||||
|
|
||||||
|
output_html = parse_file(target_html, variables, not args.preserve_comments)
|
||||||
|
# remove empty lines
|
||||||
|
output_html = re.sub(r"[\t\r ]*\n(?:[\t\r ]*\n[\t\r ]*)+", r"\n", output_html)
|
||||||
|
|
||||||
|
# pdebug(f"Output: {output_html}")
|
||||||
|
|
||||||
|
# save
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w") as file:
|
||||||
|
file.write(output_html)
|
||||||
|
else:
|
||||||
|
print(output_html)
|
||||||
|
|
||||||
|
if args.output_deps:
|
||||||
|
if args.output != args.input:
|
||||||
|
glob_dependcies.append(args.input)
|
||||||
|
depfile = generate_dependecy_file(args.output, glob_dependcies)
|
||||||
|
pdebug(f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}")
|
||||||
|
with open(args.output_deps, "w") as file:
|
||||||
|
file.write(depfile)
|
||||||
|
if args.sitemap_temp_file:
|
||||||
|
with open(args.sitemap_temp_file, "wb") as file:
|
||||||
|
pickle.dump(Sitemap.urls, file)
|
||||||
|
else: # sitemap_generate
|
||||||
|
if not path.isfile(args.sitemap_temp_file):
|
||||||
|
parser.error(f"Invalid sitemap-temp-file: '{args.sitemap_temp_file}'")
|
||||||
|
with open(args.sitemap_temp_file, "rb") as file:
|
||||||
|
Sitemap.urls = pickle.load(file)
|
||||||
|
sitemap = Sitemap.gen_sidemap()
|
||||||
|
pdebug(f"Writing sitemap to {os.path.abspath(args.sitemap_generate)}")
|
||||||
|
with open(args.sitemap_generate, "w") as file:
|
||||||
|
file.write(sitemap)
|
||||||
|
Loading…
Reference in New Issue
Block a user