Compare commits

...

14 Commits

3 changed files with 530 additions and 134 deletions

168
Makefile
View File

@ -17,43 +17,97 @@
# change these to fir your project
#
# root dir for the project, all other paths relative to PROJECT_DIR (except for OUT_DIR and DEP_DIR)
# root dir for the project, most other paths are relative to PROJECT_DIR
# [absolute or relative to current working directory]
PROJECT_DIR = src
# path where final website will be in, this one is not relative to PROJECT_DIR
# path where final website will be in
# [absolute or relative to current working directory]
OUT_DIR = build
# SOURCE FILES:
# all SRC_FLS and all files (recursively) in the SRC_DIRS will be built
# all files in PROJECT_DIR (not recursively) are source files
# [relative to PROJECT_DIR]
SRC_DIRS = de en script
SRC_FLS =
# CSS FILES:
# directories which may contain sass and scss to compile sass to a correspondig css in OUT_DIR/CSS_DIR (also css, it will simply be copied)
# [relative to PROJECT_DIR]
CSS_DIRS = style
CSS_FILES =
# SOURCE FILES:
# RESOURCE FILES:
# all RESOURCE_FLS and all files in the RESOURCE_DIRS will be copied to OUT_DIR
# [relative to PROJECT_DIR]
RESOURCE_DIRS = resources
RESOURCE_FLS =
# MULTI-LANG SOURCE FILES:
# the files in COMMON_DIR will be built for all LANGS:
# for example:
# LANGS = de en
# PROJECT_DIR/COMMON_DIR/home.html
# -> OUT_DIR/de/home.html
# -> OUT_DIR/en/home.html
# foreach html-file in COMMON_DIR:
# foreach lang in LANGS:
# run HTML_PP_CMD with --var lang=lang on file and output to OUT_DIR without the COMMON_DIR prefix, so COMMON_DIR/subdir/file.html -> OUT_DIR/lang/subdir/file.html
# all non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied
# leave COMMON_DIR empty to disable multi-lang feature
COMMON_DIR =
# For all .html files, the proprocessor will make the variable `lang` available, for example lang=de
# All non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied.
# leave COMMON_DIR blank to disable multi-lang feature
# [relative to PROJECT_DIR]
COMMON_DIR = common
LANGS = de en
# FAVICON
# image from which the favicons will be generated
# leave FAVICON_SRC blank to not generate favicons
# [relative to PROJECT_DIR]
FAVICON_SRC = resources/favicon.png
# directory where all genreated favicons will be placed
# [relative to OUT_DIR]
FAVICON_DIR = favicon
# in addition to the ones below, a favicon.ico containing the 16x16, 32x32 and 48x48will be generated
# all apple-touch-icon-XXxXX.png sizes
APPLE_ICON_SIZES = 180x180
# all mstile-XXxXX.png sizes
WINDOWS_ICON_SIZES = 150x150
# all android-chrome-XXxXX.png sizes
ANDROID_ICON_SIZES = 192x192 512x512
# all favicon-XXxXX.png sizes
FAVICON_ICON_SIZES = 16x16 32x32 48x48
# THUMBNAILS:
# thumbnails for all resource files having an extension in THUMB_FOR_TYPES will be generated and placed relative to THUMB_OUT_DIR
# leave THUMB_OUT_DIR blank to not generate thumbnails
# [relative to OUT_DIR]
THUMB_OUT_DIR = thumbs
# build thumbnails for these types: supported: mp3, flac, wav, pdf and all image formats that magick can handle
THUMB_FOR_TYPES = png gif jpg jpeg webp pdf mp4 mp3 flac wav
# filetype for the thumbnails. (pdfs will always have .jpg)
THUMB_TYPE = jpg
# size for the thumbnails (not respected by pdf)
THUMB_SIZE = 300
# SITEMAP
# leave SITEMAP blank to not generate a sitemap
# [relative to OUT_DIR]
SITEMAP = sitemap.xml
# base url of the website, without trailing /
WEBSITE_URL = https://quintern.xyz
# file required during build process for sitemap generation [absolute or relative to current working directory]
SITEMAP_TEMP_FILE = .sitemap.pkl
# comment to keep the file extension on sitemap entries
SITEMAP_REMOVE_EXT = 1
# PREPROCESSOR
# path to of the files that should be included
# [relative to PROJECT_DIR]
INCLUDE_DIR = include
# additional search paths passed to sass compiler
# [relative to PROJECT_DIR]
SASS_INCLUDE_DIRS = include/style
@ -65,10 +119,10 @@ HTML_PP_CMD = python3 html-preprocessor --exit-on light
# --source-maps-urls=absolute is appended for generating dependency files
SASS_CMD = sass --color
# [absolute or relative to current working directory]
DEP_DIR = .dependencies
#
# NOT SETTINGS ANYMORE
# DO NOT CHANGE ANYTHING HERE UNLESS YOU KNOW WHAT YOU ARE DOING!
@ -117,11 +171,42 @@ ML_OUT_DIRS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_D
ML_OUT_FLS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_DIR)/$(lang)/%, $(_ML_SRC_FLS)))
endif
ifdef FAVICON_DIR
FAVICON_OUT_DIR = $(addprefix $(OUT_DIR)/,$(FAVICON_DIR))
else
FAVICON_OUT_DIR = $(OUT_DIR)
endif
ifdef FAVICON_SRC
_FAVICON = $(addprefix $(PROJECT_DIR)/,$(FAVICON_SRC))
FAVICON_ICO = $(FAVICON_OUT_DIR)/favicon.ico
APPLE_ICONS = $(addsuffix .png,$(addprefix apple-touch-icon-,$(APPLE_ICON_SIZES)))
WINDOWS_ICONS = $(addsuffix .png,$(addprefix mstile-,$(WINDOWS_ICON_SIZES)))
ANDROID_ICONS = $(addsuffix .png,$(addprefix android-chrome-,$(ANDROID_ICON_SIZES)))
FAVICON_ICONS = $(addsuffix .png,$(addprefix favicon-,$(FAVICON_ICON_SIZES)))
FAVICONS_PNG = $(addprefix $(FAVICON_OUT_DIR)/,$(APPLE_ICONS) $(WINDOWS_ICONS) $(ANDROID_ICONS) $(FAVICON_ICONS))
FAVICONS = $(FAVICONS_PNG) $(FAVICON_ICO)
endif
ifdef THUMB_OUT_DIR
# files for which to generate thumbnails
_THUMB_FLS = $(filter $(foreach type, $(THUMB_FOR_TYPES), %.$(type)), $(_RES_FLS))
THUMB_OUT_FLS = $(addsuffix .jpg, $(basename $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/$(THUMB_OUT_DIR)/%, $(_THUMB_FLS))))
THUMB_OUT_DIRS = $(sort $(dir $(THUMB_OUT_FLS))) # sort for removing duplicates
endif
# needed for creating them
_DEP_DIRS = $(sort $(patsubst $(OUT_DIR)/%, $(DEP_DIR)/%, $(OUT_DIRS) $(ML_OUT_DIRS)))
# needed for reading
_DEP_FLS = $(shell find $(DEP_DIR) -type f -name '*.d' 2>/dev/null)
ifdef SITEMAP
SITEMAP_OUT = $(addprefix $(OUT_DIR)/, $(SITEMAP))
HTML_PP_CMD += --sitemap-temp-file "$(SITEMAP_TEMP_FILE)" --sitemap-base-url $(WEBSITE_URL) --sitemap-webroot-dir "$(OUT_DIR)"
endif
ifdef SITEMAP_REMOVE_EXT
HTML_PP_CMD += --sitemap-remove-ext
endif
# SASS, add load-paths
_SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-path=$(includedir)) --source-map-urls=absolute
@ -129,26 +214,31 @@ _SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-pa
FMT_VAR_SRC ="Variable '\e[1;34m%s\e[0m': \e[0;33m%s\e[0m\n"
FMT_VAR_OUT ="Variable '\e[1;34m%s\e[0m': \e[0;35m%s\e[0m\n"
FMT_DIR ="\e[1;34mMaking directory\e[0m: \e[0;35m%s\e[0m\n"
FMT_OUT_HTML ="\e[1;34mBuilding html\e[0m \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_CSS ="\e[1;34mBuilding css\e[0m \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_HTML ="\e[1;34mBuilding html\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_CSS ="\e[1;34mBuilding css\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_THUMB ="\e[1;34mGenerating thumbnail\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_SITEMAP ="\e[1;34mGenerating sitemap\e[0m: \e[1;35m%s\e[0m\n"
FMT_OUT_FAVICON ="\e[1;34mGenerating favicon\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_OTHER ="\e[1;34mBuilding\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_ML_HTML ="\e[1;34mBuilding html\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_ML_OTHER ="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
.SUFFIXES:
.SUFFIXES: .html .md
# .SUFFIXES:
# .SUFFIXES: .html .md
.PHONY: default normal multilang resources print start stop clean cleaner
.PHONY: default normal multilang resources sitemap favicons thumbnails print start stop clean cleaner
.DEFAULT_GOAL = all
# include all the dependency makefiles
include $(_DEP_FLS)
all: normal multilang resources
all: normal multilang resources thumbnails sitemap favicons
normal: $(OUT_FLS)
sitemap: $(SITEMAP_OUT)
favicons: $(FAVICONS) $(FAVICON_ICO)
multilang: $(ML_OUT_FLS)
resources: $(RES_OUT_FLS)
thumbnails: $(THUMB_OUT_FLS)
print:
@printf $(FMT_VAR_SRC) "PROJECT_DIR" "$(PROJECT_DIR)"
@ -164,10 +254,16 @@ ifdef COMMON_DIR
@printf $(FMT_VAR_OUT) "ML_OUT_FLS" "$(ML_OUT_FLS)"
endif
@printf $(FMT_VAR_SRC) "_DEP_FLS" "$(_DEP_FLS)"
ifdef THUMB_OUT_DIR
@printf $(FMT_VAR_SRC) "THUMB_OUT_DIR" "$(THUMB_OUT_DIR)"
@printf $(FMT_VAR_OUT) "_THUMB_FLS" "$(_THUMB_FLS)"
@printf $(FMT_VAR_OUT) "THUMB_OUT_FLS" "$(THUMB_OUT_FLS)"
@printf $(FMT_VAR_OUT) "THUMB_OUT_DIRS" "$(THUMB_OUT_DIRS)"
endif
@# @printf $(FMT_VAR_SRC) "y" "$(y)"
# DIRECTORIES
$(sort $(ML_OUT_DIRS) $(_DEP_DIRS) $(RES_OUT_DIRS) $(OUT_DIRS)):
$(sort $(ML_OUT_DIRS) $(_DEP_DIRS) $(RES_OUT_DIRS) $(OUT_DIRS) $(THUMB_OUT_DIRS) $(FAVICON_OUT_DIR)):
@printf $(FMT_DIR) "$@"
@mkdir -p $@
@ -190,6 +286,42 @@ $(foreach out_dir, $(ML_OUT_LANG_DIRS), $(out_dir)/%): $(_COMMON_DIR)/% | $(ML_O
cp $< $@
endif
ifdef FAVICONS
# must be first
$(FAVICON_ICO): $(_FAVICON) | $(FAVICON_OUT_DIR)
@printf $(FMT_OUT_FAVICON) "$<" "$@"
@convert "$<" -define icon:auto-resize=16,32,48 "$@"
$(FAVICONS_PNG): $(_FAVICON) | $(FAVICON_OUT_DIR)
@printf $(FMT_OUT_FAVICON) "$<" "$@"
@# resize to 512x512 and pad with transparency in case resize did not resize to correct size
@size=$$(echo "$@" | grep -o -P '\d{2,4}x\d{2,4}');\
convert "$<" -resize "$${size}" -background none -gravity center -extent "$${size}" "$@"
endif
# THUMBNAILS
$(OUT_DIR)/$(THUMB_OUT_DIR)/%.jpg: | $(THUMB_OUT_DIRS)
@fulltarget="$@"; \
target="$(patsubst $(OUT_DIR)/$(THUMB_OUT_DIR)/%.jpg,%,$@)"; \
sources=($(_THUMB_FLS)); \
source=$$(printf "%s\n" $${sources[@]} | grep "$$target"'\.'); \
printf $(FMT_OUT_THUMB) "$$source" "$$fulltarget"; \
case "$${source##*.}" in \
"mp4-use-magick-as-well") ffmpegthumbnailer -i "$$source" -o "$$fulltarget" -s 300 -q 5;; \
"pdf") pdftoppm -f 1 -singlefile -jpeg -r 50 "$$source" "$${fulltarget%.*}";; \
"mp3"|"flac"|"wav") ffmpeg -hide_banner -i "$$source" "$$fulltarget" -y >/dev/null;; \
*) magick "$${source}[0]" -thumbnail '$(THUMB_SIZE)x$(THUMB_SIZE)>' "$@";; \
esac
# SITEMAP
ifdef SITEMAP_OUT
$(SITEMAP_OUT): $(OUT_FLS) $(ML_OUT_FLS) # build sitemap after all other files
@printf $(FMT_OUT_SITEMAP) "$@"
@$(HTML_PP_CMD) --sitemap-generate "$@"
endif
#
# (NORMAL/RE-)SOURCE RULES
#
@ -215,11 +347,13 @@ $(OUT_DIR)/%.css: $(PROJECT_DIR)/%.scss | $(OUT_DIRS) $(_DEP_DIRS)
jq -r '.sources | @sh' $@.map | tr -d \' | sed 's|file://||g' >> "$$depfile"; \
rm $@.map
# this rule must be last!
$(OUT_DIR)/%: $(PROJECT_DIR)/% | $(OUT_DIRS) $(RES_OUT_DIRS)
@printf $(FMT_OUT_OTHER) "$<" "$@"
@cp -r $< $@
# .DEFAULT:
# @echo "MISSING RULE: $@"
@ -230,7 +364,7 @@ stop:
killall nginx
clean:
-@rm $(OUT_FLS) $(ML_OUT_FLS) 2>/dev/null
-@rm $(OUT_FLS) $(ML_OUT_FLS) $(SITEMAP_TEMP_FILE) $(SITEMAP) 2>/dev/null
-@rm -r $(DEP_DIR) 2>/dev/null
cleaner:

View File

@ -20,7 +20,6 @@ refer to the article [on my website](https://quintern.xyz/en/software/buwuma.htm
<!--
#command everything here is an argument
#anothercommand more arguments
While this is a comment right now, it will be UNCOMMENTED in the after the preprocessor finishes!
#comment This will be a single line html comment after the preprocessor finishes.
-->
```
@ -124,8 +123,6 @@ Any string
**Return Value**:
The argument in comment tags
This can be useful in multi-line comments that contain other commands: In that case, the comment tags will be removed and each command replaced with
its return value, so if you want to just have commented text in there you can use `#comment`
### uncomment
Uncomment the comment.
@ -145,7 +142,7 @@ This can be useful when you want to look at the unprocessed html without variabl
### conditionals
To turn on or off entire blocks, `if`, `elif` can `else` be used.
These commands can not be nested and must not appear in multi-line comments.
These commands can not be nested.
Logical and `&&` and logical or `||` can be used to chain conditions.
If a condition is true, the corresponding block is included while all other blocks are deleted.
@ -174,6 +171,7 @@ An entry is a html heading with a id: `<h1 id=myheading>This heading will be lin
`<!-- #sidenav sidenav-command arguments -->`
sidenav-command must be one of the following:
#### `include`
Include the generated sidenav at this position. This command will always be executed last, after the whole file has been parsed.
@ -183,8 +181,9 @@ Ignored
**Return Value**:
The generated sidenav
#### `section`
Group all following entries in named section. `section` may not appear in conditional blocks and multiline comments.
Group all following entries in named section. `section` may not appear in conditional blocks.
**Argument**:
The name of the section
@ -192,6 +191,7 @@ The name of the section
**Return Value**
Empty string
#### `name`
Use a custom name instead of the heading itself for the link to the next heading.
@ -201,6 +201,7 @@ The link-name of the next heading
**Return Value**:
Empty string
#### `custom`
Include a custom link in the sidenav.
@ -215,10 +216,68 @@ Empty string
---
### sitemap
Used for automatically generating a `sitemap.xml` for the website.
#### `include`
Include the current page in the sitemap
**Synopsis**:
`<!-- #sitemap include -->`
`<!-- #sitemap include https://use.custom.link/for-this/site -->`
**Argument**:
Optional: Use a different link for this page
**Return Value**:
Empty string
#### `priority`
Set the `priority` field
**Synopsis**:
`<!-- #sitemap priority 0.8 -->`
**Argument**:
Float between 0.0 and 1.0
**Return Value**:
Empty string
#### `changefreq`
Set the `changefreq` field
**Synopsis**:
`<!-- #sitemap changefreq never -->`
**Argument**:
One of *always, hourly, daily, weekly, monthly, yearly, never*
**Return Value**:
Empty string
#### `lastmod`
Set the `lastmod` field
**Synopsis**:
`<!-- #sitemap lastmod 2023-12-29T14:00:05+01:00 -->`
**Argument**:
The lastmod date in w3c date format
**Return Value**:
Empty string
---
## Pitfalls
- The `#include` command must not be in the last line of the file
- The `#include` command can not be in multi-line comment if the included file also contains comments
- `#if`, `#elif`, `#else` and `#endif` must not be in multi-line comments
- The `include` command must not be in the last line of the file
- The maps in `set` must have **at least 2** options
- The `section` commands must not be in a conditional block
- The conditionals must not be neseted
- If you want to use variables in markdown, you have to escape the `#` with a backslash, so `#$(var)` becomes `\#$(var)`
- You can not use the `return` command from within the arguments of other commands. Commands are executed in order, so `return` will end up as argument of the first command and thus never be executed

View File

@ -5,6 +5,7 @@ import re
from sys import argv
from collections.abc import Callable
import argparse
import pickle
"""
TODO:
@ -27,6 +28,11 @@ sidenav_content_section = "<li class=\"sidenav_section\">#name</li>"
exit_on_include_failure = False
sitemap_begin = """\
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n"""
sitemap_end = "</urlset>"
"""
************************************************************ REGULAR EXPRESSIONS ************************************************************
"""
@ -45,7 +51,11 @@ re_set_map_alt = r"([a-zA-Z0-9_]+) *\? *\{( *(?:[a-zA-Z0-9_*]+ *: *[^;]* *; *)+[
re_variable_use = r"#\$\(([a-zA-Z0-9_]+)\)"
""" only in comments """
re_preprocessor_command = r"#([a-zA-Z]+) *(.*) *"
re_preprocessor_command = r"[\t ]*#([a-zA-Z]+) *(.*)[\t ]*"
# https://www.w3.org/TR/NOTE-datetime
re_w3cdate = r"\d{4}-(?)]-\d{2}"
r"\d{4}-(?:0[1-9]|1[0-2])-(?:[0-2]\d|3[01])(T(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d([\+\-](?:0\d|1[0-2]):[0-5]\d)?)?"
COMMENT_BEGIN = "<!--"
COMMENT_END = "-->"
@ -67,24 +77,43 @@ error_levels = {
}
exit_on_error_level = error_levels["serious"]
# url that the currently processed file have
current_file_url = ""
"""
************************************************************ UTILITY ************************************************************
"""
RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
MAGENTA = '\033[95m'
CYAN = '\033[96m'
GRAY = '\033[97m'
RESET = '\033[0m'
BOLD = '\033[1m'
WHITE = '\033[37m'
DEBUG = False
def pdebug(*args, **keys):
if DEBUG: print(*args, **keys)
fname, *_args = args
if DEBUG: print(f"{CYAN}{fname}{GRAY}", *_args, RESET, **keys)
TRACE = False
def ptrace(*args, **keys):
if TRACE: print(*args, **keys)
fname, *_args = args
if TRACE: print(f"{BLUE}{fname}{GRAY}", *_args, RESET, **keys)
def error(*args, level:int=exit_on_error_level, exit_code:int=1, **keys):
fname, *_args = args
if level >= exit_on_error_level:
print(f"ERROR:", *args, **keys)
print(f"{RED}ERROR: {fname}{RESET}", *_args, RESET, **keys)
exit(exit_code)
else:
print(f"WARNING:", *args, **keys)
print(f"{YELLOW}WARNING: {fname}{RESET}", *_args, RESET, **keys)
def line_is_link_to_path(line, path):
# check if the line is a link to html thats currently being processed
@ -115,13 +144,93 @@ def evaluate_condition(input_string) -> bool:
words[i] = '"' + words[i].replace('"', r'\"') + '"'
condition = "".join(words).replace("&&", " and ").replace("||", " or ")
ptrace(f"> Evaluating condition {condition}")
ptrace("evaluate_conditon", f"Evaluating condition {condition}")
try:
return eval(condition)
except SyntaxError:
error(f"Pythonized condition is invalid: {condition}", level=error_levels["light"])
error("evaluate_conditon", f"Pythonized condition is invalid: {condition}", level=error_levels["light"])
return False
"""
************************************************************ SITEMAP ************************************************************
"""
class Sitemap:
urls:dict = {}
def __init__(self, url=None):
self.url = url
self.priority = None
self.changefreq = None
self.lastmod = None
def set_url(self, url):
self.url = url
def set_priority(self, priority):
try:
priority = float(priority)
except ValueError:
error("Sitemap.set_priority", f"invalid priority: '{priority}'", level=error_levels["serious"])
if not (type(priority) == float and 0.0 <= priority and priority <= 1.0):
error("Sitemap.set_priority", f"invalid priority: '{priority}'", level=error_levels["serious"])
self.priority = priority
def set_changefreq(self, changefreq):
if not (type(changefreq) == str and changefreq in ["always", "hourly", "daily", "weekly", "monthly", "yearly", "never"]):
error("Sitemap.set_changefreq", f"invalid changefreq: '{changefreq}'", level=error_levels["serious"])
self.changefreq = changefreq
def set_lastmod(self, lastmod):
if not (type(lastmod) == str and re.fullmatch(re_w3cdate, lastmod)):
error("Sitemap.set_lastmod", f"invalid lastmod: '{lastmod}'", level=error_levels["serious"])
self.lastmod = lastmod
def get_entry(self):
s = f"<url>\n\t<loc>{self.url}</loc>"
if self.priority is not None: s += f"\n\t<priority>{self.priority}</priority>"
if self.changefreq is not None: s += f"\n\t<changefreq>{self.changefreq}</changefreq>"
if self.lastmod is not None: s += f"\n\t<lastmod>{self.lastmod}</lastmod>"
s += "\n</url>"
return s
def __repr__(self) -> str:
return f"Sitemap(url={self.url}, priority={self.priority}, changefreq={self.changefreq}, lastmod={self.lastmod})"
@staticmethod
def gen_sidemap():
s = sitemap_begin
for url in Sitemap.urls.values():
s += "\t" + url.get_entry().replace("\n", "\n\t").strip("\t") + "\n"
s += sitemap_end
return s
@staticmethod
def cmd_sitemap(args:str, variables:dict[str,str]) -> str:
space = args.find(" ")
if space < 0:
space = len(args)
cmd = args[:space]
cmd_args = ""
if 0 < space and space < len(args) - 1:
cmd_args = args[space+1:].strip(" ")
pdebug("cmd_sitemap", f"cmd='{cmd}' cmd_args='{cmd_args}'")
if not current_file_url in Sitemap.urls:
Sitemap.urls[current_file_url] = Sitemap()
if cmd == "include":
if cmd_args:
Sitemap.urls[current_file_url].set_url(cmd_args)
else:
Sitemap.urls[current_file_url].set_url(current_file_url)
elif cmd == "priority":
Sitemap.urls[current_file_url].set_priority(cmd_args)
elif cmd == "changefreq":
Sitemap.urls[current_file_url].set_changefreq(cmd_args)
elif cmd == "lastmod":
Sitemap.urls[current_file_url].set_lastmod(cmd_args)
else:
error("cmd_sitemap", f"Invalid command '{cmd}'", error_levels["serious"])
ptrace("cmd_sitemap", f"Sitemap[{current_file_url}] is now: {Sitemap.urls[current_file_url]}")
return ""
"""
@ -154,7 +263,7 @@ class Sidenav:
Sidenav.skip_next = True
@staticmethod
def generate() -> str:
pdebug(f"Sidenav.generate(): found the following entries: {Sidenav.entries}")
pdebug("Sidenav.generate", f"found the following entries: {Sidenav.entries}")
sidenav:list[str] = sidenav_format.split('\n')
content_i = -1
for i in range(len(sidenav)): # find in which line the entries need to be placed
@ -162,16 +271,16 @@ class Sidenav:
content_i = i
break
if content_i >= 0:
sidenav.pop(content_i)
indent = sidenav.pop(content_i).replace("#sidenav-content", "")
added_links = []
for i in reversed(range(len(Sidenav.entries))):
entry = Sidenav.entries[i]
if entry[0] == Sidenav.LINK:
if entry[2] in added_links: continue # no duplicates
added_links.append(entry[2])
sidenav.insert(content_i, sidenav_content_link.replace("#name", entry[1]).replace("#link", entry[2]))
sidenav.insert(content_i, indent + sidenav_content_link.replace("#name", entry[1]).replace("#link", entry[2]))
else:
sidenav.insert(content_i, sidenav_content_section.replace("#name", entry[1]))
sidenav.insert(content_i, indent + sidenav_content_section.replace("#name", entry[1]))
sidenav_s = ""
for line in sidenav: sidenav_s += line + "\n" # cant use "".join because of newlines
return sidenav_s
@ -184,6 +293,7 @@ class Sidenav:
cmd_args = ""
if 0 < space and space < len(args) - 1:
cmd_args = args[space+1:].strip(" ")
pdebug("cmd_sidenav", f"cmd='{cmd}' cmd_args='{cmd_args}'")
if cmd == "skip":
Sidenav.skipNext()
elif cmd == "section":
@ -195,11 +305,11 @@ class Sidenav:
if match:
Sidenav.addEntry(match.groups()[1], match.groups()[0])
else:
error(f"cmd_sidenav: Invalid argument for command 'custom': '{cmd_args}'", level=error_levels["light"])
error("cmd_sidenav", f"Invalid argument for command 'custom': '{cmd_args}'", level=error_levels["light"])
elif cmd == "include":
return Sidenav.generate()
else:
error(f"cmd_sidenav: Invalid command: '{cmd}'", level=error_levels["light"])
error("cmd_sidenav", f"Invalid command: '{cmd}'", level=error_levels["light"])
return ""
@ -214,7 +324,7 @@ into the source file at the place where the command was.
"""
def cmd_include(args: str, variables:dict[str, str]={}) -> str:
args = args.split(' ')
pdebug(f"cmd_include: args='{args}', variables='{variables}'")
pdebug("cmd_include", f"args='{args}', variables='{variables}'")
filename = args[0]
content = ""
try:
@ -225,16 +335,17 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
p = HTMLParser(content, {})
p.pos["start"] = p.pos["end"] = -1
while p.i < len(p): # at start of new line or end of comment
p.next_line()
ptrace(f"cmd_include: Processing at i={p.i} in line {pos2line(p.file, p.i)}")
p.find_line_end()
ptrace("cmd_include", f"Processing at i={p.i} in line {pos2line(p.file, p.i)}: '{p[p.i:p.pos['line_end']]}'")
if not p.find_comment_begin(): continue
if not p.find_comment_end(): continue
p.replace_multiline_comments()
match = p.find_command()
if match:
command = match.groups()[0]
cmd_args = match.groups()[1].replace('\t', ' ').strip(' ')
pdebug(f"cmd_include Found command '{command}' with args '{cmd_args}'")
pdebug("cmd_include", f"Found command '{command}' with args '{cmd_args}'")
if command == "section":
if cmd_args.startswith(target_section):
p.pos["start"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1)
@ -242,23 +353,30 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
p.pos["end"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1)
# p.pos["end"] = p.pos["cmt_beg"]
p.replace_command_with_output("")
p.command_end()
p.command_end() # remove the command (+comment)
if p.pos["start"] >= 0 and p.pos["end"] > 0: break
continue
# section cmd in multiline comment is not supported, so simply jump to end of comment
p.i = p.pos["cmt_end"] + len(COMMENT_END)
p.pos["cmt_beg"] = -1
p.pos["cmd_beg"] = -1
p.pos["cmt_end"] = -1
p.pos["cmd_end"] = -1
if p.pos["start"] >= 0:
if p.pos["end"] < 0:
p.pos["end"] = len(p)
content = p[p.pos["start"]:p.pos["end"]]
else:
error(f"cmd_include: Could not find section {target_section} in file {filename}")
error("cmd_include", f"Could not find section {target_section} in file {filename}")
except FileNotFoundError:
error(f"cmd_include: Could not open file '{filename}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
error("cmd_include", f"Could not open file '{filename}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
content = f"<!-- Could not include '{filename}' -->"
if filename.endswith(".md"):
try:
from markdown import markdown
content = markdown(content, output_format="xhtml")
except:
error(f"cmd_include: Could convert markdown to html for file '{filename}'. Is python-markdown installed?", level=error_levels["critical"], exit_code=exit_codes["MarkdownConversionError"])
error("cmd_include", f"Could convert markdown to html for file '{filename}'. Is python-markdown installed?", level=error_levels["critical"], exit_code=exit_codes["MarkdownConversionError"])
content = f"<!-- Could not convert to html: '{filename}' -->"
glob_dependcies.append(filename)
return content
@ -270,10 +388,10 @@ def cmd_return(args: str, variables:dict[str, str]={}) -> str:
# re_set_map = r"([a-zA-Z0-9_]+)\?\{(([a-zA-Z0-9_]+:.+,)*([a-zA-Z0-9_]+:.+))\}"
# <!-- #set section=lang?{*:Fallback,de:Abschnitt,en:Section} -->
space = args.find(' ')
# pdebug(f"cmd_set: varname='{args[:space]}, 'arg='{args[space+1:]}', variables='{variables}'")
pdebug("cmd_set", f"varname='{args[:space]}, 'arg='{args[space+1:]}', variables='{variables}'")
if not (space > 0 and space < len(args)-1):
variables[args] = ""
pdebug(f"cmd_set: Setting to empty string: {args}")
pdebug("cmd_set", f"Setting to empty string: {args}")
else:
varname = args[:space]
variables[varname] = ""
@ -284,15 +402,15 @@ def cmd_return(args: str, variables:dict[str, str]={}) -> str:
match = re.fullmatch(re_set_map_alt, args[space+1:].strip(' '))
separator = ';'
if match:
pdebug(f"cmd_set: Map {match.group()}")
pdebug("cmd_set", f"Map {match.group()}")
depends = match.groups()[0]
if not depends in variables:
pdebug(f"cmd_set: Setting from map, but depends='{depends}' is not in variables")
pdebug("cmd_set", f"Setting from map, but depends='{depends}' is not in variables")
return ""
depends_val = variables[depends]
for option in match.groups()[1].split(separator):
option = option.strip(" ")
pdebug(f"cmd_set: Found option {option}")
pdebug("cmd_set", f"Found option {option}")
colon = option.find(':') # we will find one, regex guarantees
if option[:colon].strip(" ") == depends_val or option[:colon].strip(" ") == "*":
variables[varname] = option[colon+1:].strip(" ")
@ -300,7 +418,7 @@ def cmd_return(args: str, variables:dict[str, str]={}) -> str:
else: # simple asignment
value = args[space+1:].strip(" ")
variables[varname] = value
pdebug(f"cmd_set: Assignment {varname} -> {value}")
pdebug("cmd_set", f"Assignment {varname} -> {value}")
return variables[varname]
return ""
@ -311,7 +429,7 @@ def cmd_set(args: str, variables:dict[str, str]={}) -> str:
def cmd_unset(args: str, variables:dict[str, str]={}) -> str:
variable = args.strip(' ')
if variable not in variables:
pdebug(f"unset: variable '{variable}' is not set", level=error_levels["light"])
pdebug("cmd_unset", f"variable '{variable}' is not set", level=error_levels["light"])
else:
variables.pop(variable)
return ""
@ -329,10 +447,10 @@ def cmd_uncomment(args: str, variables:dict[str, str]={}) -> str:
return args
def cmd_error(args: str, variables:dict[str, str]={}) -> str:
error(f"Encounted 'error' command: {args}", level=error_levels["critical"])
error("cmd_error", f"Encounted 'error' command: {args}", level=error_levels["critical"])
return ""
def cmd_warning(args: str, variables:dict[str, str]={}) -> str:
error(f"Encounted 'warning' command: {args}", level=error_levels["light"])
error("cmd_warning", f"Encounted 'warning' command: {args}", level=error_levels["light"])
return ""
@ -346,6 +464,7 @@ command2function:dict[str, Callable[[str, dict[str,str]], str]] = {
"comment": cmd_comment,
"uncomment": cmd_uncomment,
"sidenav": Sidenav.cmd_sidenav,
"sitemap": Sitemap.cmd_sitemap,
"warning": cmd_warning,
"error": cmd_error,
}
@ -355,6 +474,10 @@ command2function:dict[str, Callable[[str, dict[str,str]], str]] = {
"""
class Parser():
"""
General purpose parser class
It has states and positions in a text, which are updated when portions of the text are replaced or removed
"""
def __init__(self, file):
self.file = file
self.pos: dict[str, int] = {}
@ -365,23 +488,23 @@ class Parser():
delete_length = stop - start
nl, esl = "\n", "\\n"
ptrace(f"- Deleting range [{start}, {stop}) of length {delete_length}: '{self.file[start:stop].replace(nl, esl)}'")
ptrace("Parser.remove", f"Deleting range [{start}, {stop}) of length {delete_length}: '{self.file[start:stop].replace(nl, esl)}'")
assert(stop >= start)
assert(stop <= len(self.file))
self.file = self.file[:start] + self.file[stop:]
for k,pos in self.pos.items():
if pos >= stop: self.pos[k] -= delete_length
elif pos > start and not k in ignore_bounds: error(f"Parser.remove: Position {k}={pos} within deleted range [{start},{stop})", level=1)
elif pos > start and not k in ignore_bounds: error("Parser.remove", f"Position {k}={pos} within deleted range [{start},{stop})", level=error_levels["light"])
def replace(self, start, stop, replacement):
def replace(self, start, stop, replacement, ignore_bounds=[]):
assert(stop >= start)
assert(stop <= len(self.file))
ptrace(f"- Replacing range [{start}, {stop}): '{self.file[start:stop]}' with '{replacement}'")
ptrace("Parser.replace", f"Replacing range [{start}, {stop}): '{self.file[start:stop]}' with '{replacement}'")
self.file = self.file[:start] + replacement + self.file[stop:]
length_difference = stop - start - len(replacement)
for k,pos in self.pos.items():
if pos >= stop: self.pos[k] -= length_difference
elif pos > start: error(f"Parser.replace: Position {k}={pos} within replaced range [{start},{stop})", level=1)
elif pos > start and k not in ignore_bounds: error("Parser.replace", f"Position {k}={pos} within replaced range [{start},{stop})", level=error_levels["light"])
def __getitem__(self, key):
return self.file[key]
@ -409,32 +532,50 @@ class HTMLParser(Parser):
self.state["last_condition"] = False # if the last if condition was true
self.remove_comments = remove_comments
def next_line(self):
"""update i and line_end"""
self.pos["line_end"] = self.file.find('\n', self.i+1)
if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
def use_variables(self):
"""replace variable usages in the current line"""
self.replace(self.i, self.pos["line_end"], substitute_variables(self[self.i:self.pos["line_end"]], self.variables))
ptrace("> Line after variable substitution:", self.file[self.i:self.pos["line_end"]])
ptrace("HTMLParser.use_variables", f"Line after variable substitution:", self.file[self.i:self.pos["line_end"]])
def add_sidenav_headings(self):
"""check if heading for sidenav in line"""
match = re.search(re_sidenav_heading, self[self.i:self.pos["line_end"]])
if match:
Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
ptrace("> Found heading with id:", match.groups())
ptrace("HTMLParser.add_sidenav_headings:", f"Found heading with id:", match.groups())
def get_leading_whitespaces(self):
"""returns the whitespaces at the start of the line"""
# find last newline
line_beg = self.file.rfind("\n", 0, self.i)
if line_beg < 0: line_beg = 0
else: line_beg += 1 # start after newline
match = re.match(r"^([ \t]*)", self.file[line_beg:self.pos['line_end']])
if not match: return ""
else: return match.groups()[0]
# Parsing functions
def find_line_end(self):
"""
line_end -> position of next newline char or EOF
"""
self.pos["line_end"] = self.file.find('\n', self.i+1)
if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
def find_comment_begin(self) -> bool:
"""
find the beginning of a comment in the current line
if comment begin was found, jump into the comment, return True
cmt_beg -> beginning of COMMENT_BEGIN
i -> first character after COMMENT_BEGIN / line_end + 1
"""
# look for comment begin
if self.pos["cmt_beg"] < 0: # if not in comment, find next comment
self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"])
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
if self.pos["cmt_beg"] < 0:
self.i = self.pos["line_end"] + 1
return False
@ -442,7 +583,7 @@ class HTMLParser(Parser):
# jump to comment_begin
old_i = self.i
self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}")
ptrace(f"HTMLParser.find_comment_begin", f"Found comment begin, jumping from pos {old_i} to {self.i}")
return True
return True # still in previous comment
@ -451,27 +592,42 @@ class HTMLParser(Parser):
"""
call after find_comment_begin returns true to update the cmt_end
call continue when returning false
cmt_end -> beginning of COMMENT_END / ---
cmt_beg -> --- / -1 when invalid comment
"""
# in comment, i at the character after COMMENT_BEGIN
self.pos["cmt_end"] = self.file.find(COMMENT_END, self.i) #, self.pos["line_end"])
# sanity checks
if self.pos["cmt_end"] < 0:
error(f"Comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
error("HTMLParser.find_comment_end", f"Comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
return False
else:
tmp_next_begin = self.file.find(COMMENT_BEGIN, self.i)
if 0 < tmp_next_begin and tmp_next_begin < self.pos["cmt_end"]:
error(f"Found next comment begin before the comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{self.file[self.i:self.pos['line_end']]}'", level=error_levels["light"])
error("HTMLParser.find_comment_end", f"Found next comment begin before the comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{self.file[self.i:self.pos['line_end']]}'", level=error_levels["light"])
self.pos["cmt_beg"] = -1
return False
return True
def replace_multiline_comments(self):
"""
if in a multiline comment, turn every line into a separate comment
"""
# not a multiline comment
if self.pos["line_end"] > self.pos["cmt_end"]: return
indent = self.get_leading_whitespaces()
self.replace(self.pos["cmt_beg"], self.pos["cmt_end"], self.file[self.pos["cmt_beg"]:self.pos["cmt_end"]].replace("\n", "-->\n" + indent + "<!--"), ignore_bounds=["line_end"])
self.find_line_end()
self.find_comment_end()
def find_command(self):
# either at newline (if in multiline comment) or at comment end
self.pos["cmd_beg"] = self.i
self.pos["cmd_end"] = min(self.pos["line_end"], self.pos["cmt_end"])
assert self.pos["cmd_end"] >= self.i, f"cmd_end={self.pos['cmd_end']}, i={self.i}, line_end={self.pos['line_end']}, cmt_end={self.pos['cmt_end']}"
ptrace(f"> Possible command end: {self.pos['cmd_end']}, possible command: '{self[self.i:self.pos['cmd_end']]}'")
ptrace("HTMLParser.find_command", f"Possible command end: {self.pos['cmd_end']}, possible command: '{self[self.i:self.pos['cmd_end']]}'")
# find commands
match = re.fullmatch(re_preprocessor_command, self[self.i:self.pos["cmd_end"]].strip(" "))
@ -480,8 +636,10 @@ class HTMLParser(Parser):
return match
def replace_command_with_output(self, command_output):
self.replace(self.i, self.pos["cmd_end"], command_output)
ptrace(f"> After insertion, the line is now '{self.file[self.i:self.pos['line_end']]}'")
# keep indent level
indent = self.get_leading_whitespaces()
self.replace(self.i, self.pos["cmd_end"], command_output.replace("\n", "\n" + indent))
ptrace(f"HTMLParser.replace_command_with_output", f"After command, the line is now '{self.file[self.i:self.pos['line_end']]}'")
def command_end(self):
if self.pos["cmd_end"] == self.pos["cmt_end"]: # reached end of comment
@ -490,7 +648,7 @@ class HTMLParser(Parser):
if self[self.pos["cmt_beg"]-1] == '\n' and self[self.pos["cmt_end"]+len(COMMENT_END)] == '\n': # if the comment consumes the whole line, remove the entire line
remove_newline = 1
if self.state["cmd_in_cmt"]: # remove comment tags if a command was found
ptrace(f"Deleting opening comment tags")
ptrace("HTMLParser.command_end", f"Deleting opening comment tags")
self.remove(self.pos["cmt_beg"], self.pos["cmt_beg"] + len(COMMENT_BEGIN))
self.remove(self.pos["cmt_end"], self.pos["cmt_end"] + len(COMMENT_END) + remove_newline, ignore_bounds=["cmt_end", "cmd_end", "line_end"])
# process the line again, because a command might have inserted new comments
@ -507,7 +665,7 @@ class HTMLParser(Parser):
self.pos["cmt_end"] = -1
self.pos["cmd_end"] = -1
self.i = self.pos["line_end"] + 1
ptrace(f"> Multiline comment, jumping to next line.")
ptrace(f"HTMLParser.command_end", f"Multiline comment, jumping to next line.")
# i = possible_command_end commented, because if something containing new commands is inserted we need to parse that as well
@ -516,8 +674,8 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
sidenav_include_pos = -1
while p.i < len(p): # at start of new line or end of comment
p.next_line()
ptrace(f"Processing at i={p.i} in line {pos2line(p.file, p.i)}")
p.find_line_end()
ptrace("parse_file", f"Processing at i={p.i} in line {pos2line(p.file, p.i)}: '{p[p.i:p.pos['line_end']]}'")
p.use_variables()
p.add_sidenav_headings()
@ -525,21 +683,22 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
if not p.find_comment_begin(): continue
if not p.find_comment_end(): continue
p.replace_multiline_comments()
match = p.find_command()
if match:
command = match.groups()[0]
args = match.groups()[1].replace('\t', ' ').strip(' ')
pdebug(f"> Found command '{command}' with args '{args}'")
pdebug("parse_file", f"Found command '{command}' with args '{args}'")
# delete from previous block if
if command in ["elif", "else", "endif"]:
if p.pos["conditional_block_beg"] < 0: error(f"Misplaced '{command}' in line {pos2line(p.file, p.i)}")
if p.pos["conditional_block_beg"] < 0: error("parse_file", f"Misplaced '{command}' in line {pos2line(p.file, p.i)}")
if p.state["last_condition"]:
# delete block from here at next endif
p.state["last_condition"] = False
else:
# delete block from last condition statement
ptrace(f"> Deleting block from last condition")
ptrace("parse_file", f"> Deleting block from last condition")
p.remove(p.pos["conditional_block_beg"], p.pos["cmt_beg"])
p.i = p.pos["cmd_beg"]
p.pos["conditional_block_beg"] = p.i
@ -552,14 +711,14 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
p.pos["conditional_block_beg"] = p.i
p.state["last_condition"] = evaluate_condition(args)
p.state["any_condition"] = p.state["last_condition"]
pdebug(f"> Command {command} condition evaluated to {p.state['last_condition']}")
pdebug("parse_file", f"Command {command} condition evaluated to {p.state['last_condition']}")
cmd_output = ""
elif command =="elif":
p.pos["conditional_block_beg"] = p.i
p.state["last_condition"] = evaluate_condition(args) if not p.state["any_condition"] else False
if p.state["last_condition"]:
p.state["any_condition"] = True
pdebug(f"> Command {command} condition evaluated to {p.state['last_condition']}")
pdebug("parse_file", f"Command {command} condition evaluated to {p.state['last_condition']}")
cmd_output = ""
elif command == "else":
p.pos["conditional_block_beg"] = p.i
@ -572,18 +731,23 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
elif command == "endif":
cmd_output = ""
elif command not in command2function:
error(f"Invalid command in line {pos2line(p.file, p.i)}: {command}", level=error_levels["light"])
error("parse_file", f"Invalid command in line {pos2line(p.file, p.i)}: {command}", level=error_levels["light"])
cmd_output = ""
else:
cmd_output = command2function[command](args, variables)
else:
cmd_output = ""
p.replace_command_with_output(cmd_output)
else:
pdebug("parse_file", f"Did not find command in comment {p.file[p.pos['cmt_beg']:p.pos['cmt_end']+len(COMMENT_END)]}")
p.command_end()
if sidenav_include_pos >= 0:
return p.file[:sidenav_include_pos] + Sidenav.generate() + p.file[sidenav_include_pos:]
p.i = sidenav_include_pos # required before get_leading_whitespaces
p.find_line_end() # required before get_leading_whitespaces
indent = p.get_leading_whitespaces()
return p.file[:sidenav_include_pos] + Sidenav.generate().replace("\n", "\n" + indent) + p.file[sidenav_include_pos:]
else:
return p.file
@ -597,11 +761,11 @@ def substitute_variables(html:str, variables:dict[str, str]):
matches.append(match)
html_list = list(html)
for match in reversed(matches):
pdebug(f"> Found variable usage {match.groups()[0]}, match from {match.start()} to {match.end()}")
pdebug("substitute_variables", f"Found variable usage {match.groups()[0]}, match from {match.start()} to {match.end()}")
value = ""
if match.groups()[0] in variables: value = variables[match.groups()[0]]
else:
pdebug(f"Variable {match.groups()[0]} is used but not defined")
pdebug("substitute_variables", f"Variable {match.groups()[0]} is used but not defined")
for _ in range(match.start(), match.end()):
html_list.pop(match.start())
html_list.insert(match.start(), value.strip(" "))
@ -612,15 +776,20 @@ def substitute_variables(html:str, variables:dict[str, str]):
"""
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="bUwUma html preprocessor")
parser.add_argument("--input", action="store", help="path to the input file", required=True)
parser.add_argument("--input", action="store", help="path to the input file", default="")
parser.add_argument("--output", action="store", help="output to this file", default="")
parser.add_argument("--inplace", action="store_true", help="overwrite input file")
parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False)
parser.add_argument("--var", action="append", help="set a variable --var varname=value", default=[])
parser.add_argument("--output-deps", action="store", help="output a Makefile listing all dependencies", default="")
parser.add_argument("--sitemap-generate", action="store", help="generate the sitemap from the sitemap-temp-file", default="")
parser.add_argument("--sitemap-temp-file", action="store", help="file for storing sitemap data during build process", default="/tmp/sitemap.pkl")
parser.add_argument("--sitemap-webroot-dir", action="store", help="directory of the webroot, without trailing /. This will be removed from the output path for generating the sitemap url entry", default="")
parser.add_argument("--sitemap-base-url", action="store", help="base url of the website, without trailing /", default="https://www.example.com")
parser.add_argument("--sitemap-remove-ext", action="store_true", help="remove the file extenstion in the sitemap entry")
parser.add_argument("--exit-on", action="store", help="exit when an error of the given severity occures", choices=["light", "serious", "critical"], default="serious")
parser.add_argument("--debug", action="store_true", help="be more verbose", default=False)
parser.add_argument("--trace", action="store_true", help="be extremly verbose", default=False)
parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False)
variables:dict[str, str] = {}
args = parser.parse_args()
@ -635,10 +804,27 @@ if __name__ == "__main__":
args.input = args.input.strip(" ")
args.output = args.output.strip(" ")
args.output_deps = args.output_deps.strip(" ")
args.sitemap_temp_file = args.sitemap_temp_file.strip(" ")
args.sitemap_generate = args.sitemap_generate.strip(" ")
TRACE = args.trace
if args.trace: args.debug = True
DEBUG = args.debug
# either input file or sitemap_generate is required
if not (bool(args.input) ^ bool(args.sitemap_generate)):
parser.error(f"Exactly one if --input or --sitemap-generate must be given")
if args.input:
if args.sitemap_webroot_dir:
current_file_url = args.sitemap_base_url + args.output.replace(args.sitemap_webroot_dir, "")
else:
current_file_url = args.sitemap_base_url + args.output
if args.sitemap_remove_ext:
current_file_url = os.path.splitext(current_file_url)[0]
pdebug("main", f"current_file={current_file_url}")
# sanity checks
if not path.isfile(args.input):
parser.error(f"Invalid input file:: {args.input}")
@ -655,13 +841,18 @@ if __name__ == "__main__":
if not args.output:
parser.error(f"--output-deps requires either --output <file> our --inplace")
if args.sitemap_temp_file:
if path.isfile(args.sitemap_temp_file):
with open(args.sitemap_temp_file, "rb") as file:
Sitemap.urls = pickle.load(file)
# get html
with open(args.input, "r") as file:
target_html = file.read()
output_html = parse_file(target_html, variables, not args.preserve_comments)
# remove empty lines
output_html = re.sub(r"[\t\r ]*\n(?:[\t\r ]*\n[\t\r ]*)+", r"\n", output_html)
output_html = re.sub(r"[\t\r ]*\n(?:[\t\r ]*\n)+", r"\n", output_html)
# pdebug(f"Output: {output_html}")
@ -676,6 +867,18 @@ if __name__ == "__main__":
if args.output != args.input:
glob_dependcies.append(args.input)
depfile = generate_dependecy_file(args.output, glob_dependcies)
pdebug(f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}")
pdebug("main", f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}")
with open(args.output_deps, "w") as file:
file.write(depfile)
if args.sitemap_temp_file:
with open(args.sitemap_temp_file, "wb") as file:
pickle.dump(Sitemap.urls, file)
else: # sitemap_generate
if not path.isfile(args.sitemap_temp_file):
parser.error(f"Invalid sitemap-temp-file: '{args.sitemap_temp_file}'")
with open(args.sitemap_temp_file, "rb") as file:
Sitemap.urls = pickle.load(file)
sitemap = Sitemap.gen_sidemap()
pdebug("main", f"Writing sitemap to {os.path.abspath(args.sitemap_generate)}")
with open(args.sitemap_generate, "w") as file:
file.write(sitemap)