Compare commits

...

14 Commits

3 changed files with 530 additions and 134 deletions

168
Makefile
View File

@ -17,43 +17,97 @@
# change these to fir your project # change these to fir your project
# #
# root dir for the project, all other paths relative to PROJECT_DIR (except for OUT_DIR and DEP_DIR) # root dir for the project, most other paths are relative to PROJECT_DIR
# [absolute or relative to current working directory]
PROJECT_DIR = src PROJECT_DIR = src
# path where final website will be in, this one is not relative to PROJECT_DIR # path where final website will be in
# [absolute or relative to current working directory]
OUT_DIR = build OUT_DIR = build
# SOURCE FILES: # SOURCE FILES:
# all SRC_FLS and all files (recursively) in the SRC_DIRS will be built # all SRC_FLS and all files (recursively) in the SRC_DIRS will be built
# all files in PROJECT_DIR (not recursively) are source files # all files in PROJECT_DIR (not recursively) are source files
# [relative to PROJECT_DIR]
SRC_DIRS = de en script SRC_DIRS = de en script
SRC_FLS = SRC_FLS =
# CSS FILES: # CSS FILES:
# directories which may contain sass and scss to compile sass to a correspondig css in OUT_DIR/CSS_DIR (also css, it will simply be copied) # directories which may contain sass and scss to compile sass to a correspondig css in OUT_DIR/CSS_DIR (also css, it will simply be copied)
# [relative to PROJECT_DIR]
CSS_DIRS = style CSS_DIRS = style
CSS_FILES = CSS_FILES =
# SOURCE FILES: # RESOURCE FILES:
# all RESOURCE_FLS and all files in the RESOURCE_DIRS will be copied to OUT_DIR # all RESOURCE_FLS and all files in the RESOURCE_DIRS will be copied to OUT_DIR
# [relative to PROJECT_DIR]
RESOURCE_DIRS = resources RESOURCE_DIRS = resources
RESOURCE_FLS = RESOURCE_FLS =
# MULTI-LANG SOURCE FILES: # MULTI-LANG SOURCE FILES:
# the files in COMMON_DIR will be built for all LANGS: # the files in COMMON_DIR will be built for all LANGS:
# for example:
# LANGS = de en
# PROJECT_DIR/COMMON_DIR/home.html
# -> OUT_DIR/de/home.html
# -> OUT_DIR/en/home.html
# foreach html-file in COMMON_DIR: # foreach html-file in COMMON_DIR:
# foreach lang in LANGS: # foreach lang in LANGS:
# run HTML_PP_CMD with --var lang=lang on file and output to OUT_DIR without the COMMON_DIR prefix, so COMMON_DIR/subdir/file.html -> OUT_DIR/lang/subdir/file.html # run HTML_PP_CMD with --var lang=lang on file and output to OUT_DIR without the COMMON_DIR prefix, so COMMON_DIR/subdir/file.html -> OUT_DIR/lang/subdir/file.html
# all non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied # For all .html files, the proprocessor will make the variable `lang` available, for example lang=de
# leave COMMON_DIR empty to disable multi-lang feature # All non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied.
COMMON_DIR = # leave COMMON_DIR blank to disable multi-lang feature
# [relative to PROJECT_DIR]
COMMON_DIR = common
LANGS = de en LANGS = de en
# FAVICON
# image from which the favicons will be generated
# leave FAVICON_SRC blank to not generate favicons
# [relative to PROJECT_DIR]
FAVICON_SRC = resources/favicon.png
# directory where all genreated favicons will be placed
# [relative to OUT_DIR]
FAVICON_DIR = favicon
# in addition to the ones below, a favicon.ico containing the 16x16, 32x32 and 48x48will be generated
# all apple-touch-icon-XXxXX.png sizes
APPLE_ICON_SIZES = 180x180
# all mstile-XXxXX.png sizes
WINDOWS_ICON_SIZES = 150x150
# all android-chrome-XXxXX.png sizes
ANDROID_ICON_SIZES = 192x192 512x512
# all favicon-XXxXX.png sizes
FAVICON_ICON_SIZES = 16x16 32x32 48x48
# THUMBNAILS:
# thumbnails for all resource files having an extension in THUMB_FOR_TYPES will be generated and placed relative to THUMB_OUT_DIR
# leave THUMB_OUT_DIR blank to not generate thumbnails
# [relative to OUT_DIR]
THUMB_OUT_DIR = thumbs
# build thumbnails for these types: supported: mp3, flac, wav, pdf and all image formats that magick can handle
THUMB_FOR_TYPES = png gif jpg jpeg webp pdf mp4 mp3 flac wav
# filetype for the thumbnails. (pdfs will always have .jpg)
THUMB_TYPE = jpg
# size for the thumbnails (not respected by pdf)
THUMB_SIZE = 300
# SITEMAP
# leave SITEMAP blank to not generate a sitemap
# [relative to OUT_DIR]
SITEMAP = sitemap.xml
# base url of the website, without trailing /
WEBSITE_URL = https://quintern.xyz
# file required during build process for sitemap generation [absolute or relative to current working directory]
SITEMAP_TEMP_FILE = .sitemap.pkl
# comment to keep the file extension on sitemap entries
SITEMAP_REMOVE_EXT = 1
# PREPROCESSOR # PREPROCESSOR
# path to of the files that should be included # path to of the files that should be included
# [relative to PROJECT_DIR]
INCLUDE_DIR = include INCLUDE_DIR = include
# additional search paths passed to sass compiler # additional search paths passed to sass compiler
# [relative to PROJECT_DIR]
SASS_INCLUDE_DIRS = include/style SASS_INCLUDE_DIRS = include/style
@ -65,10 +119,10 @@ HTML_PP_CMD = python3 html-preprocessor --exit-on light
# --source-maps-urls=absolute is appended for generating dependency files # --source-maps-urls=absolute is appended for generating dependency files
SASS_CMD = sass --color SASS_CMD = sass --color
# [absolute or relative to current working directory]
DEP_DIR = .dependencies DEP_DIR = .dependencies
# #
# NOT SETTINGS ANYMORE # NOT SETTINGS ANYMORE
# DO NOT CHANGE ANYTHING HERE UNLESS YOU KNOW WHAT YOU ARE DOING! # DO NOT CHANGE ANYTHING HERE UNLESS YOU KNOW WHAT YOU ARE DOING!
@ -117,11 +171,42 @@ ML_OUT_DIRS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_D
ML_OUT_FLS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_DIR)/$(lang)/%, $(_ML_SRC_FLS))) ML_OUT_FLS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_DIR)/$(lang)/%, $(_ML_SRC_FLS)))
endif endif
ifdef FAVICON_DIR
FAVICON_OUT_DIR = $(addprefix $(OUT_DIR)/,$(FAVICON_DIR))
else
FAVICON_OUT_DIR = $(OUT_DIR)
endif
ifdef FAVICON_SRC
_FAVICON = $(addprefix $(PROJECT_DIR)/,$(FAVICON_SRC))
FAVICON_ICO = $(FAVICON_OUT_DIR)/favicon.ico
APPLE_ICONS = $(addsuffix .png,$(addprefix apple-touch-icon-,$(APPLE_ICON_SIZES)))
WINDOWS_ICONS = $(addsuffix .png,$(addprefix mstile-,$(WINDOWS_ICON_SIZES)))
ANDROID_ICONS = $(addsuffix .png,$(addprefix android-chrome-,$(ANDROID_ICON_SIZES)))
FAVICON_ICONS = $(addsuffix .png,$(addprefix favicon-,$(FAVICON_ICON_SIZES)))
FAVICONS_PNG = $(addprefix $(FAVICON_OUT_DIR)/,$(APPLE_ICONS) $(WINDOWS_ICONS) $(ANDROID_ICONS) $(FAVICON_ICONS))
FAVICONS = $(FAVICONS_PNG) $(FAVICON_ICO)
endif
ifdef THUMB_OUT_DIR
# files for which to generate thumbnails
_THUMB_FLS = $(filter $(foreach type, $(THUMB_FOR_TYPES), %.$(type)), $(_RES_FLS))
THUMB_OUT_FLS = $(addsuffix .jpg, $(basename $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/$(THUMB_OUT_DIR)/%, $(_THUMB_FLS))))
THUMB_OUT_DIRS = $(sort $(dir $(THUMB_OUT_FLS))) # sort for removing duplicates
endif
# needed for creating them # needed for creating them
_DEP_DIRS = $(sort $(patsubst $(OUT_DIR)/%, $(DEP_DIR)/%, $(OUT_DIRS) $(ML_OUT_DIRS))) _DEP_DIRS = $(sort $(patsubst $(OUT_DIR)/%, $(DEP_DIR)/%, $(OUT_DIRS) $(ML_OUT_DIRS)))
# needed for reading # needed for reading
_DEP_FLS = $(shell find $(DEP_DIR) -type f -name '*.d' 2>/dev/null) _DEP_FLS = $(shell find $(DEP_DIR) -type f -name '*.d' 2>/dev/null)
ifdef SITEMAP
SITEMAP_OUT = $(addprefix $(OUT_DIR)/, $(SITEMAP))
HTML_PP_CMD += --sitemap-temp-file "$(SITEMAP_TEMP_FILE)" --sitemap-base-url $(WEBSITE_URL) --sitemap-webroot-dir "$(OUT_DIR)"
endif
ifdef SITEMAP_REMOVE_EXT
HTML_PP_CMD += --sitemap-remove-ext
endif
# SASS, add load-paths # SASS, add load-paths
_SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-path=$(includedir)) --source-map-urls=absolute _SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-path=$(includedir)) --source-map-urls=absolute
@ -129,26 +214,31 @@ _SASS_CMD = $(SASS_CMD) $(foreach includedir, $(_SASS_INCLUDE_DIRS), --load-pa
FMT_VAR_SRC ="Variable '\e[1;34m%s\e[0m': \e[0;33m%s\e[0m\n" FMT_VAR_SRC ="Variable '\e[1;34m%s\e[0m': \e[0;33m%s\e[0m\n"
FMT_VAR_OUT ="Variable '\e[1;34m%s\e[0m': \e[0;35m%s\e[0m\n" FMT_VAR_OUT ="Variable '\e[1;34m%s\e[0m': \e[0;35m%s\e[0m\n"
FMT_DIR ="\e[1;34mMaking directory\e[0m: \e[0;35m%s\e[0m\n" FMT_DIR ="\e[1;34mMaking directory\e[0m: \e[0;35m%s\e[0m\n"
FMT_OUT_HTML ="\e[1;34mBuilding html\e[0m \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" FMT_OUT_HTML ="\e[1;34mBuilding html\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_CSS ="\e[1;34mBuilding css\e[0m \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" FMT_OUT_CSS ="\e[1;34mBuilding css\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_THUMB ="\e[1;34mGenerating thumbnail\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_SITEMAP ="\e[1;34mGenerating sitemap\e[0m: \e[1;35m%s\e[0m\n"
FMT_OUT_FAVICON ="\e[1;34mGenerating favicon\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_OTHER ="\e[1;34mBuilding\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" FMT_OUT_OTHER ="\e[1;34mBuilding\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_ML_HTML ="\e[1;34mBuilding html\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" FMT_OUT_ML_HTML ="\e[1;34mBuilding html\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
FMT_OUT_ML_OTHER ="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" FMT_OUT_ML_OTHER ="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n"
.SUFFIXES: # .SUFFIXES:
.SUFFIXES: .html .md # .SUFFIXES: .html .md
.PHONY: default normal multilang resources print start stop clean cleaner .PHONY: default normal multilang resources sitemap favicons thumbnails print start stop clean cleaner
.DEFAULT_GOAL = all .DEFAULT_GOAL = all
# include all the dependency makefiles # include all the dependency makefiles
include $(_DEP_FLS) include $(_DEP_FLS)
all: normal multilang resources all: normal multilang resources thumbnails sitemap favicons
normal: $(OUT_FLS) normal: $(OUT_FLS)
sitemap: $(SITEMAP_OUT)
favicons: $(FAVICONS) $(FAVICON_ICO)
multilang: $(ML_OUT_FLS) multilang: $(ML_OUT_FLS)
resources: $(RES_OUT_FLS) resources: $(RES_OUT_FLS)
thumbnails: $(THUMB_OUT_FLS)
print: print:
@printf $(FMT_VAR_SRC) "PROJECT_DIR" "$(PROJECT_DIR)" @printf $(FMT_VAR_SRC) "PROJECT_DIR" "$(PROJECT_DIR)"
@ -164,10 +254,16 @@ ifdef COMMON_DIR
@printf $(FMT_VAR_OUT) "ML_OUT_FLS" "$(ML_OUT_FLS)" @printf $(FMT_VAR_OUT) "ML_OUT_FLS" "$(ML_OUT_FLS)"
endif endif
@printf $(FMT_VAR_SRC) "_DEP_FLS" "$(_DEP_FLS)" @printf $(FMT_VAR_SRC) "_DEP_FLS" "$(_DEP_FLS)"
ifdef THUMB_OUT_DIR
@printf $(FMT_VAR_SRC) "THUMB_OUT_DIR" "$(THUMB_OUT_DIR)"
@printf $(FMT_VAR_OUT) "_THUMB_FLS" "$(_THUMB_FLS)"
@printf $(FMT_VAR_OUT) "THUMB_OUT_FLS" "$(THUMB_OUT_FLS)"
@printf $(FMT_VAR_OUT) "THUMB_OUT_DIRS" "$(THUMB_OUT_DIRS)"
endif
@# @printf $(FMT_VAR_SRC) "y" "$(y)" @# @printf $(FMT_VAR_SRC) "y" "$(y)"
# DIRECTORIES # DIRECTORIES
$(sort $(ML_OUT_DIRS) $(_DEP_DIRS) $(RES_OUT_DIRS) $(OUT_DIRS)): $(sort $(ML_OUT_DIRS) $(_DEP_DIRS) $(RES_OUT_DIRS) $(OUT_DIRS) $(THUMB_OUT_DIRS) $(FAVICON_OUT_DIR)):
@printf $(FMT_DIR) "$@" @printf $(FMT_DIR) "$@"
@mkdir -p $@ @mkdir -p $@
@ -190,6 +286,42 @@ $(foreach out_dir, $(ML_OUT_LANG_DIRS), $(out_dir)/%): $(_COMMON_DIR)/% | $(ML_O
cp $< $@ cp $< $@
endif endif
ifdef FAVICONS
# must be first
$(FAVICON_ICO): $(_FAVICON) | $(FAVICON_OUT_DIR)
@printf $(FMT_OUT_FAVICON) "$<" "$@"
@convert "$<" -define icon:auto-resize=16,32,48 "$@"
$(FAVICONS_PNG): $(_FAVICON) | $(FAVICON_OUT_DIR)
@printf $(FMT_OUT_FAVICON) "$<" "$@"
@# resize to 512x512 and pad with transparency in case resize did not resize to correct size
@size=$$(echo "$@" | grep -o -P '\d{2,4}x\d{2,4}');\
convert "$<" -resize "$${size}" -background none -gravity center -extent "$${size}" "$@"
endif
# THUMBNAILS
$(OUT_DIR)/$(THUMB_OUT_DIR)/%.jpg: | $(THUMB_OUT_DIRS)
@fulltarget="$@"; \
target="$(patsubst $(OUT_DIR)/$(THUMB_OUT_DIR)/%.jpg,%,$@)"; \
sources=($(_THUMB_FLS)); \
source=$$(printf "%s\n" $${sources[@]} | grep "$$target"'\.'); \
printf $(FMT_OUT_THUMB) "$$source" "$$fulltarget"; \
case "$${source##*.}" in \
"mp4-use-magick-as-well") ffmpegthumbnailer -i "$$source" -o "$$fulltarget" -s 300 -q 5;; \
"pdf") pdftoppm -f 1 -singlefile -jpeg -r 50 "$$source" "$${fulltarget%.*}";; \
"mp3"|"flac"|"wav") ffmpeg -hide_banner -i "$$source" "$$fulltarget" -y >/dev/null;; \
*) magick "$${source}[0]" -thumbnail '$(THUMB_SIZE)x$(THUMB_SIZE)>' "$@";; \
esac
# SITEMAP
ifdef SITEMAP_OUT
$(SITEMAP_OUT): $(OUT_FLS) $(ML_OUT_FLS) # build sitemap after all other files
@printf $(FMT_OUT_SITEMAP) "$@"
@$(HTML_PP_CMD) --sitemap-generate "$@"
endif
# #
# (NORMAL/RE-)SOURCE RULES # (NORMAL/RE-)SOURCE RULES
# #
@ -215,11 +347,13 @@ $(OUT_DIR)/%.css: $(PROJECT_DIR)/%.scss | $(OUT_DIRS) $(_DEP_DIRS)
jq -r '.sources | @sh' $@.map | tr -d \' | sed 's|file://||g' >> "$$depfile"; \ jq -r '.sources | @sh' $@.map | tr -d \' | sed 's|file://||g' >> "$$depfile"; \
rm $@.map rm $@.map
# this rule must be last!
$(OUT_DIR)/%: $(PROJECT_DIR)/% | $(OUT_DIRS) $(RES_OUT_DIRS) $(OUT_DIR)/%: $(PROJECT_DIR)/% | $(OUT_DIRS) $(RES_OUT_DIRS)
@printf $(FMT_OUT_OTHER) "$<" "$@" @printf $(FMT_OUT_OTHER) "$<" "$@"
@cp -r $< $@ @cp -r $< $@
# .DEFAULT: # .DEFAULT:
# @echo "MISSING RULE: $@" # @echo "MISSING RULE: $@"
@ -230,7 +364,7 @@ stop:
killall nginx killall nginx
clean: clean:
-@rm $(OUT_FLS) $(ML_OUT_FLS) 2>/dev/null -@rm $(OUT_FLS) $(ML_OUT_FLS) $(SITEMAP_TEMP_FILE) $(SITEMAP) 2>/dev/null
-@rm -r $(DEP_DIR) 2>/dev/null -@rm -r $(DEP_DIR) 2>/dev/null
cleaner: cleaner:

View File

@ -20,7 +20,6 @@ refer to the article [on my website](https://quintern.xyz/en/software/buwuma.htm
<!-- <!--
#command everything here is an argument #command everything here is an argument
#anothercommand more arguments #anothercommand more arguments
While this is a comment right now, it will be UNCOMMENTED in the after the preprocessor finishes!
#comment This will be a single line html comment after the preprocessor finishes. #comment This will be a single line html comment after the preprocessor finishes.
--> -->
``` ```
@ -124,8 +123,6 @@ Any string
**Return Value**: **Return Value**:
The argument in comment tags The argument in comment tags
This can be useful in multi-line comments that contain other commands: In that case, the comment tags will be removed and each command replaced with
its return value, so if you want to just have commented text in there you can use `#comment`
### uncomment ### uncomment
Uncomment the comment. Uncomment the comment.
@ -145,7 +142,7 @@ This can be useful when you want to look at the unprocessed html without variabl
### conditionals ### conditionals
To turn on or off entire blocks, `if`, `elif` can `else` be used. To turn on or off entire blocks, `if`, `elif` can `else` be used.
These commands can not be nested and must not appear in multi-line comments. These commands can not be nested.
Logical and `&&` and logical or `||` can be used to chain conditions. Logical and `&&` and logical or `||` can be used to chain conditions.
If a condition is true, the corresponding block is included while all other blocks are deleted. If a condition is true, the corresponding block is included while all other blocks are deleted.
@ -174,6 +171,7 @@ An entry is a html heading with a id: `<h1 id=myheading>This heading will be lin
`<!-- #sidenav sidenav-command arguments -->` `<!-- #sidenav sidenav-command arguments -->`
sidenav-command must be one of the following: sidenav-command must be one of the following:
#### `include` #### `include`
Include the generated sidenav at this position. This command will always be executed last, after the whole file has been parsed. Include the generated sidenav at this position. This command will always be executed last, after the whole file has been parsed.
@ -183,8 +181,9 @@ Ignored
**Return Value**: **Return Value**:
The generated sidenav The generated sidenav
#### `section` #### `section`
Group all following entries in named section. `section` may not appear in conditional blocks and multiline comments. Group all following entries in named section. `section` may not appear in conditional blocks.
**Argument**: **Argument**:
The name of the section The name of the section
@ -192,6 +191,7 @@ The name of the section
**Return Value** **Return Value**
Empty string Empty string
#### `name` #### `name`
Use a custom name instead of the heading itself for the link to the next heading. Use a custom name instead of the heading itself for the link to the next heading.
@ -201,6 +201,7 @@ The link-name of the next heading
**Return Value**: **Return Value**:
Empty string Empty string
#### `custom` #### `custom`
Include a custom link in the sidenav. Include a custom link in the sidenav.
@ -215,10 +216,68 @@ Empty string
--- ---
### sitemap
Used for automatically generating a `sitemap.xml` for the website.
#### `include`
Include the current page in the sitemap
**Synopsis**:
`<!-- #sitemap include -->`
`<!-- #sitemap include https://use.custom.link/for-this/site -->`
**Argument**:
Optional: Use a different link for this page
**Return Value**:
Empty string
#### `priority`
Set the `priority` field
**Synopsis**:
`<!-- #sitemap priority 0.8 -->`
**Argument**:
Float between 0.0 and 1.0
**Return Value**:
Empty string
#### `changefreq`
Set the `changefreq` field
**Synopsis**:
`<!-- #sitemap changefreq never -->`
**Argument**:
One of *always, hourly, daily, weekly, monthly, yearly, never*
**Return Value**:
Empty string
#### `lastmod`
Set the `lastmod` field
**Synopsis**:
`<!-- #sitemap lastmod 2023-12-29T14:00:05+01:00 -->`
**Argument**:
The lastmod date in w3c date format
**Return Value**:
Empty string
---
## Pitfalls ## Pitfalls
- The `#include` command must not be in the last line of the file - The `include` command must not be in the last line of the file
- The `#include` command can not be in multi-line comment if the included file also contains comments
- `#if`, `#elif`, `#else` and `#endif` must not be in multi-line comments
- The maps in `set` must have **at least 2** options - The maps in `set` must have **at least 2** options
- The `section` commands must not be in a conditional block
- The conditionals must not be neseted
- If you want to use variables in markdown, you have to escape the `#` with a backslash, so `#$(var)` becomes `\#$(var)` - If you want to use variables in markdown, you have to escape the `#` with a backslash, so `#$(var)` becomes `\#$(var)`
- You can not use the `return` command from within the arguments of other commands. Commands are executed in order, so `return` will end up as argument of the first command and thus never be executed - You can not use the `return` command from within the arguments of other commands. Commands are executed in order, so `return` will end up as argument of the first command and thus never be executed

View File

@ -5,6 +5,7 @@ import re
from sys import argv from sys import argv
from collections.abc import Callable from collections.abc import Callable
import argparse import argparse
import pickle
""" """
TODO: TODO:
@ -27,6 +28,11 @@ sidenav_content_section = "<li class=\"sidenav_section\">#name</li>"
exit_on_include_failure = False exit_on_include_failure = False
sitemap_begin = """\
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n"""
sitemap_end = "</urlset>"
""" """
************************************************************ REGULAR EXPRESSIONS ************************************************************ ************************************************************ REGULAR EXPRESSIONS ************************************************************
""" """
@ -45,7 +51,11 @@ re_set_map_alt = r"([a-zA-Z0-9_]+) *\? *\{( *(?:[a-zA-Z0-9_*]+ *: *[^;]* *; *)+[
re_variable_use = r"#\$\(([a-zA-Z0-9_]+)\)" re_variable_use = r"#\$\(([a-zA-Z0-9_]+)\)"
""" only in comments """ """ only in comments """
re_preprocessor_command = r"#([a-zA-Z]+) *(.*) *" re_preprocessor_command = r"[\t ]*#([a-zA-Z]+) *(.*)[\t ]*"
# https://www.w3.org/TR/NOTE-datetime
re_w3cdate = r"\d{4}-(?)]-\d{2}"
r"\d{4}-(?:0[1-9]|1[0-2])-(?:[0-2]\d|3[01])(T(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d([\+\-](?:0\d|1[0-2]):[0-5]\d)?)?"
COMMENT_BEGIN = "<!--" COMMENT_BEGIN = "<!--"
COMMENT_END = "-->" COMMENT_END = "-->"
@ -67,24 +77,43 @@ error_levels = {
} }
exit_on_error_level = error_levels["serious"] exit_on_error_level = error_levels["serious"]
# url that the currently processed file have
current_file_url = ""
""" """
************************************************************ UTILITY ************************************************************ ************************************************************ UTILITY ************************************************************
""" """
RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
MAGENTA = '\033[95m'
CYAN = '\033[96m'
GRAY = '\033[97m'
RESET = '\033[0m'
BOLD = '\033[1m'
WHITE = '\033[37m'
DEBUG = False DEBUG = False
def pdebug(*args, **keys): def pdebug(*args, **keys):
if DEBUG: print(*args, **keys) fname, *_args = args
if DEBUG: print(f"{CYAN}{fname}{GRAY}", *_args, RESET, **keys)
TRACE = False TRACE = False
def ptrace(*args, **keys): def ptrace(*args, **keys):
if TRACE: print(*args, **keys) fname, *_args = args
if TRACE: print(f"{BLUE}{fname}{GRAY}", *_args, RESET, **keys)
def error(*args, level:int=exit_on_error_level, exit_code:int=1, **keys): def error(*args, level:int=exit_on_error_level, exit_code:int=1, **keys):
fname, *_args = args
if level >= exit_on_error_level: if level >= exit_on_error_level:
print(f"ERROR:", *args, **keys) print(f"{RED}ERROR: {fname}{RESET}", *_args, RESET, **keys)
exit(exit_code) exit(exit_code)
else: else:
print(f"WARNING:", *args, **keys) print(f"{YELLOW}WARNING: {fname}{RESET}", *_args, RESET, **keys)
def line_is_link_to_path(line, path): def line_is_link_to_path(line, path):
# check if the line is a link to html thats currently being processed # check if the line is a link to html thats currently being processed
@ -115,13 +144,93 @@ def evaluate_condition(input_string) -> bool:
words[i] = '"' + words[i].replace('"', r'\"') + '"' words[i] = '"' + words[i].replace('"', r'\"') + '"'
condition = "".join(words).replace("&&", " and ").replace("||", " or ") condition = "".join(words).replace("&&", " and ").replace("||", " or ")
ptrace(f"> Evaluating condition {condition}") ptrace("evaluate_conditon", f"Evaluating condition {condition}")
try: try:
return eval(condition) return eval(condition)
except SyntaxError: except SyntaxError:
error(f"Pythonized condition is invalid: {condition}", level=error_levels["light"]) error("evaluate_conditon", f"Pythonized condition is invalid: {condition}", level=error_levels["light"])
return False return False
"""
************************************************************ SITEMAP ************************************************************
"""
class Sitemap:
urls:dict = {}
def __init__(self, url=None):
self.url = url
self.priority = None
self.changefreq = None
self.lastmod = None
def set_url(self, url):
self.url = url
def set_priority(self, priority):
try:
priority = float(priority)
except ValueError:
error("Sitemap.set_priority", f"invalid priority: '{priority}'", level=error_levels["serious"])
if not (type(priority) == float and 0.0 <= priority and priority <= 1.0):
error("Sitemap.set_priority", f"invalid priority: '{priority}'", level=error_levels["serious"])
self.priority = priority
def set_changefreq(self, changefreq):
if not (type(changefreq) == str and changefreq in ["always", "hourly", "daily", "weekly", "monthly", "yearly", "never"]):
error("Sitemap.set_changefreq", f"invalid changefreq: '{changefreq}'", level=error_levels["serious"])
self.changefreq = changefreq
def set_lastmod(self, lastmod):
if not (type(lastmod) == str and re.fullmatch(re_w3cdate, lastmod)):
error("Sitemap.set_lastmod", f"invalid lastmod: '{lastmod}'", level=error_levels["serious"])
self.lastmod = lastmod
def get_entry(self):
s = f"<url>\n\t<loc>{self.url}</loc>"
if self.priority is not None: s += f"\n\t<priority>{self.priority}</priority>"
if self.changefreq is not None: s += f"\n\t<changefreq>{self.changefreq}</changefreq>"
if self.lastmod is not None: s += f"\n\t<lastmod>{self.lastmod}</lastmod>"
s += "\n</url>"
return s
def __repr__(self) -> str:
return f"Sitemap(url={self.url}, priority={self.priority}, changefreq={self.changefreq}, lastmod={self.lastmod})"
@staticmethod
def gen_sidemap():
s = sitemap_begin
for url in Sitemap.urls.values():
s += "\t" + url.get_entry().replace("\n", "\n\t").strip("\t") + "\n"
s += sitemap_end
return s
@staticmethod
def cmd_sitemap(args:str, variables:dict[str,str]) -> str:
space = args.find(" ")
if space < 0:
space = len(args)
cmd = args[:space]
cmd_args = ""
if 0 < space and space < len(args) - 1:
cmd_args = args[space+1:].strip(" ")
pdebug("cmd_sitemap", f"cmd='{cmd}' cmd_args='{cmd_args}'")
if not current_file_url in Sitemap.urls:
Sitemap.urls[current_file_url] = Sitemap()
if cmd == "include":
if cmd_args:
Sitemap.urls[current_file_url].set_url(cmd_args)
else:
Sitemap.urls[current_file_url].set_url(current_file_url)
elif cmd == "priority":
Sitemap.urls[current_file_url].set_priority(cmd_args)
elif cmd == "changefreq":
Sitemap.urls[current_file_url].set_changefreq(cmd_args)
elif cmd == "lastmod":
Sitemap.urls[current_file_url].set_lastmod(cmd_args)
else:
error("cmd_sitemap", f"Invalid command '{cmd}'", error_levels["serious"])
ptrace("cmd_sitemap", f"Sitemap[{current_file_url}] is now: {Sitemap.urls[current_file_url]}")
return ""
""" """
@ -154,7 +263,7 @@ class Sidenav:
Sidenav.skip_next = True Sidenav.skip_next = True
@staticmethod @staticmethod
def generate() -> str: def generate() -> str:
pdebug(f"Sidenav.generate(): found the following entries: {Sidenav.entries}") pdebug("Sidenav.generate", f"found the following entries: {Sidenav.entries}")
sidenav:list[str] = sidenav_format.split('\n') sidenav:list[str] = sidenav_format.split('\n')
content_i = -1 content_i = -1
for i in range(len(sidenav)): # find in which line the entries need to be placed for i in range(len(sidenav)): # find in which line the entries need to be placed
@ -162,16 +271,16 @@ class Sidenav:
content_i = i content_i = i
break break
if content_i >= 0: if content_i >= 0:
sidenav.pop(content_i) indent = sidenav.pop(content_i).replace("#sidenav-content", "")
added_links = [] added_links = []
for i in reversed(range(len(Sidenav.entries))): for i in reversed(range(len(Sidenav.entries))):
entry = Sidenav.entries[i] entry = Sidenav.entries[i]
if entry[0] == Sidenav.LINK: if entry[0] == Sidenav.LINK:
if entry[2] in added_links: continue # no duplicates if entry[2] in added_links: continue # no duplicates
added_links.append(entry[2]) added_links.append(entry[2])
sidenav.insert(content_i, sidenav_content_link.replace("#name", entry[1]).replace("#link", entry[2])) sidenav.insert(content_i, indent + sidenav_content_link.replace("#name", entry[1]).replace("#link", entry[2]))
else: else:
sidenav.insert(content_i, sidenav_content_section.replace("#name", entry[1])) sidenav.insert(content_i, indent + sidenav_content_section.replace("#name", entry[1]))
sidenav_s = "" sidenav_s = ""
for line in sidenav: sidenav_s += line + "\n" # cant use "".join because of newlines for line in sidenav: sidenav_s += line + "\n" # cant use "".join because of newlines
return sidenav_s return sidenav_s
@ -184,6 +293,7 @@ class Sidenav:
cmd_args = "" cmd_args = ""
if 0 < space and space < len(args) - 1: if 0 < space and space < len(args) - 1:
cmd_args = args[space+1:].strip(" ") cmd_args = args[space+1:].strip(" ")
pdebug("cmd_sidenav", f"cmd='{cmd}' cmd_args='{cmd_args}'")
if cmd == "skip": if cmd == "skip":
Sidenav.skipNext() Sidenav.skipNext()
elif cmd == "section": elif cmd == "section":
@ -195,11 +305,11 @@ class Sidenav:
if match: if match:
Sidenav.addEntry(match.groups()[1], match.groups()[0]) Sidenav.addEntry(match.groups()[1], match.groups()[0])
else: else:
error(f"cmd_sidenav: Invalid argument for command 'custom': '{cmd_args}'", level=error_levels["light"]) error("cmd_sidenav", f"Invalid argument for command 'custom': '{cmd_args}'", level=error_levels["light"])
elif cmd == "include": elif cmd == "include":
return Sidenav.generate() return Sidenav.generate()
else: else:
error(f"cmd_sidenav: Invalid command: '{cmd}'", level=error_levels["light"]) error("cmd_sidenav", f"Invalid command: '{cmd}'", level=error_levels["light"])
return "" return ""
@ -214,7 +324,7 @@ into the source file at the place where the command was.
""" """
def cmd_include(args: str, variables:dict[str, str]={}) -> str: def cmd_include(args: str, variables:dict[str, str]={}) -> str:
args = args.split(' ') args = args.split(' ')
pdebug(f"cmd_include: args='{args}', variables='{variables}'") pdebug("cmd_include", f"args='{args}', variables='{variables}'")
filename = args[0] filename = args[0]
content = "" content = ""
try: try:
@ -225,16 +335,17 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
p = HTMLParser(content, {}) p = HTMLParser(content, {})
p.pos["start"] = p.pos["end"] = -1 p.pos["start"] = p.pos["end"] = -1
while p.i < len(p): # at start of new line or end of comment while p.i < len(p): # at start of new line or end of comment
p.next_line() p.find_line_end()
ptrace(f"cmd_include: Processing at i={p.i} in line {pos2line(p.file, p.i)}") ptrace("cmd_include", f"Processing at i={p.i} in line {pos2line(p.file, p.i)}: '{p[p.i:p.pos['line_end']]}'")
if not p.find_comment_begin(): continue if not p.find_comment_begin(): continue
if not p.find_comment_end(): continue if not p.find_comment_end(): continue
p.replace_multiline_comments()
match = p.find_command() match = p.find_command()
if match: if match:
command = match.groups()[0] command = match.groups()[0]
cmd_args = match.groups()[1].replace('\t', ' ').strip(' ') cmd_args = match.groups()[1].replace('\t', ' ').strip(' ')
pdebug(f"cmd_include Found command '{command}' with args '{cmd_args}'") pdebug("cmd_include", f"Found command '{command}' with args '{cmd_args}'")
if command == "section": if command == "section":
if cmd_args.startswith(target_section): if cmd_args.startswith(target_section):
p.pos["start"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1) p.pos["start"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1)
@ -242,23 +353,30 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
p.pos["end"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1) p.pos["end"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1)
# p.pos["end"] = p.pos["cmt_beg"] # p.pos["end"] = p.pos["cmt_beg"]
p.replace_command_with_output("") p.replace_command_with_output("")
p.command_end() p.command_end() # remove the command (+comment)
if p.pos["start"] >= 0 and p.pos["end"] > 0: break if p.pos["start"] >= 0 and p.pos["end"] > 0: break
continue
# section cmd in multiline comment is not supported, so simply jump to end of comment
p.i = p.pos["cmt_end"] + len(COMMENT_END)
p.pos["cmt_beg"] = -1
p.pos["cmd_beg"] = -1
p.pos["cmt_end"] = -1
p.pos["cmd_end"] = -1
if p.pos["start"] >= 0: if p.pos["start"] >= 0:
if p.pos["end"] < 0: if p.pos["end"] < 0:
p.pos["end"] = len(p) p.pos["end"] = len(p)
content = p[p.pos["start"]:p.pos["end"]] content = p[p.pos["start"]:p.pos["end"]]
else: else:
error(f"cmd_include: Could not find section {target_section} in file {filename}") error("cmd_include", f"Could not find section {target_section} in file {filename}")
except FileNotFoundError: except FileNotFoundError:
error(f"cmd_include: Could not open file '{filename}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"]) error("cmd_include", f"Could not open file '{filename}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
content = f"<!-- Could not include '{filename}' -->" content = f"<!-- Could not include '{filename}' -->"
if filename.endswith(".md"): if filename.endswith(".md"):
try: try:
from markdown import markdown from markdown import markdown
content = markdown(content, output_format="xhtml") content = markdown(content, output_format="xhtml")
except: except:
error(f"cmd_include: Could convert markdown to html for file '{filename}'. Is python-markdown installed?", level=error_levels["critical"], exit_code=exit_codes["MarkdownConversionError"]) error("cmd_include", f"Could convert markdown to html for file '{filename}'. Is python-markdown installed?", level=error_levels["critical"], exit_code=exit_codes["MarkdownConversionError"])
content = f"<!-- Could not convert to html: '{filename}' -->" content = f"<!-- Could not convert to html: '{filename}' -->"
glob_dependcies.append(filename) glob_dependcies.append(filename)
return content return content
@ -270,10 +388,10 @@ def cmd_return(args: str, variables:dict[str, str]={}) -> str:
# re_set_map = r"([a-zA-Z0-9_]+)\?\{(([a-zA-Z0-9_]+:.+,)*([a-zA-Z0-9_]+:.+))\}" # re_set_map = r"([a-zA-Z0-9_]+)\?\{(([a-zA-Z0-9_]+:.+,)*([a-zA-Z0-9_]+:.+))\}"
# <!-- #set section=lang?{*:Fallback,de:Abschnitt,en:Section} --> # <!-- #set section=lang?{*:Fallback,de:Abschnitt,en:Section} -->
space = args.find(' ') space = args.find(' ')
# pdebug(f"cmd_set: varname='{args[:space]}, 'arg='{args[space+1:]}', variables='{variables}'") pdebug("cmd_set", f"varname='{args[:space]}, 'arg='{args[space+1:]}', variables='{variables}'")
if not (space > 0 and space < len(args)-1): if not (space > 0 and space < len(args)-1):
variables[args] = "" variables[args] = ""
pdebug(f"cmd_set: Setting to empty string: {args}") pdebug("cmd_set", f"Setting to empty string: {args}")
else: else:
varname = args[:space] varname = args[:space]
variables[varname] = "" variables[varname] = ""
@ -284,15 +402,15 @@ def cmd_return(args: str, variables:dict[str, str]={}) -> str:
match = re.fullmatch(re_set_map_alt, args[space+1:].strip(' ')) match = re.fullmatch(re_set_map_alt, args[space+1:].strip(' '))
separator = ';' separator = ';'
if match: if match:
pdebug(f"cmd_set: Map {match.group()}") pdebug("cmd_set", f"Map {match.group()}")
depends = match.groups()[0] depends = match.groups()[0]
if not depends in variables: if not depends in variables:
pdebug(f"cmd_set: Setting from map, but depends='{depends}' is not in variables") pdebug("cmd_set", f"Setting from map, but depends='{depends}' is not in variables")
return "" return ""
depends_val = variables[depends] depends_val = variables[depends]
for option in match.groups()[1].split(separator): for option in match.groups()[1].split(separator):
option = option.strip(" ") option = option.strip(" ")
pdebug(f"cmd_set: Found option {option}") pdebug("cmd_set", f"Found option {option}")
colon = option.find(':') # we will find one, regex guarantees colon = option.find(':') # we will find one, regex guarantees
if option[:colon].strip(" ") == depends_val or option[:colon].strip(" ") == "*": if option[:colon].strip(" ") == depends_val or option[:colon].strip(" ") == "*":
variables[varname] = option[colon+1:].strip(" ") variables[varname] = option[colon+1:].strip(" ")
@ -300,7 +418,7 @@ def cmd_return(args: str, variables:dict[str, str]={}) -> str:
else: # simple asignment else: # simple asignment
value = args[space+1:].strip(" ") value = args[space+1:].strip(" ")
variables[varname] = value variables[varname] = value
pdebug(f"cmd_set: Assignment {varname} -> {value}") pdebug("cmd_set", f"Assignment {varname} -> {value}")
return variables[varname] return variables[varname]
return "" return ""
@ -311,7 +429,7 @@ def cmd_set(args: str, variables:dict[str, str]={}) -> str:
def cmd_unset(args: str, variables:dict[str, str]={}) -> str: def cmd_unset(args: str, variables:dict[str, str]={}) -> str:
variable = args.strip(' ') variable = args.strip(' ')
if variable not in variables: if variable not in variables:
pdebug(f"unset: variable '{variable}' is not set", level=error_levels["light"]) pdebug("cmd_unset", f"variable '{variable}' is not set", level=error_levels["light"])
else: else:
variables.pop(variable) variables.pop(variable)
return "" return ""
@ -329,10 +447,10 @@ def cmd_uncomment(args: str, variables:dict[str, str]={}) -> str:
return args return args
def cmd_error(args: str, variables:dict[str, str]={}) -> str: def cmd_error(args: str, variables:dict[str, str]={}) -> str:
error(f"Encounted 'error' command: {args}", level=error_levels["critical"]) error("cmd_error", f"Encounted 'error' command: {args}", level=error_levels["critical"])
return "" return ""
def cmd_warning(args: str, variables:dict[str, str]={}) -> str: def cmd_warning(args: str, variables:dict[str, str]={}) -> str:
error(f"Encounted 'warning' command: {args}", level=error_levels["light"]) error("cmd_warning", f"Encounted 'warning' command: {args}", level=error_levels["light"])
return "" return ""
@ -346,6 +464,7 @@ command2function:dict[str, Callable[[str, dict[str,str]], str]] = {
"comment": cmd_comment, "comment": cmd_comment,
"uncomment": cmd_uncomment, "uncomment": cmd_uncomment,
"sidenav": Sidenav.cmd_sidenav, "sidenav": Sidenav.cmd_sidenav,
"sitemap": Sitemap.cmd_sitemap,
"warning": cmd_warning, "warning": cmd_warning,
"error": cmd_error, "error": cmd_error,
} }
@ -355,6 +474,10 @@ command2function:dict[str, Callable[[str, dict[str,str]], str]] = {
""" """
class Parser(): class Parser():
"""
General purpose parser class
It has states and positions in a text, which are updated when portions of the text are replaced or removed
"""
def __init__(self, file): def __init__(self, file):
self.file = file self.file = file
self.pos: dict[str, int] = {} self.pos: dict[str, int] = {}
@ -365,23 +488,23 @@ class Parser():
delete_length = stop - start delete_length = stop - start
nl, esl = "\n", "\\n" nl, esl = "\n", "\\n"
ptrace(f"- Deleting range [{start}, {stop}) of length {delete_length}: '{self.file[start:stop].replace(nl, esl)}'") ptrace("Parser.remove", f"Deleting range [{start}, {stop}) of length {delete_length}: '{self.file[start:stop].replace(nl, esl)}'")
assert(stop >= start) assert(stop >= start)
assert(stop <= len(self.file)) assert(stop <= len(self.file))
self.file = self.file[:start] + self.file[stop:] self.file = self.file[:start] + self.file[stop:]
for k,pos in self.pos.items(): for k,pos in self.pos.items():
if pos >= stop: self.pos[k] -= delete_length if pos >= stop: self.pos[k] -= delete_length
elif pos > start and not k in ignore_bounds: error(f"Parser.remove: Position {k}={pos} within deleted range [{start},{stop})", level=1) elif pos > start and not k in ignore_bounds: error("Parser.remove", f"Position {k}={pos} within deleted range [{start},{stop})", level=error_levels["light"])
def replace(self, start, stop, replacement): def replace(self, start, stop, replacement, ignore_bounds=[]):
assert(stop >= start) assert(stop >= start)
assert(stop <= len(self.file)) assert(stop <= len(self.file))
ptrace(f"- Replacing range [{start}, {stop}): '{self.file[start:stop]}' with '{replacement}'") ptrace("Parser.replace", f"Replacing range [{start}, {stop}): '{self.file[start:stop]}' with '{replacement}'")
self.file = self.file[:start] + replacement + self.file[stop:] self.file = self.file[:start] + replacement + self.file[stop:]
length_difference = stop - start - len(replacement) length_difference = stop - start - len(replacement)
for k,pos in self.pos.items(): for k,pos in self.pos.items():
if pos >= stop: self.pos[k] -= length_difference if pos >= stop: self.pos[k] -= length_difference
elif pos > start: error(f"Parser.replace: Position {k}={pos} within replaced range [{start},{stop})", level=1) elif pos > start and k not in ignore_bounds: error("Parser.replace", f"Position {k}={pos} within replaced range [{start},{stop})", level=error_levels["light"])
def __getitem__(self, key): def __getitem__(self, key):
return self.file[key] return self.file[key]
@ -409,32 +532,50 @@ class HTMLParser(Parser):
self.state["last_condition"] = False # if the last if condition was true self.state["last_condition"] = False # if the last if condition was true
self.remove_comments = remove_comments self.remove_comments = remove_comments
def next_line(self):
"""update i and line_end"""
self.pos["line_end"] = self.file.find('\n', self.i+1)
if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
def use_variables(self): def use_variables(self):
"""replace variable usages in the current line""" """replace variable usages in the current line"""
self.replace(self.i, self.pos["line_end"], substitute_variables(self[self.i:self.pos["line_end"]], self.variables)) self.replace(self.i, self.pos["line_end"], substitute_variables(self[self.i:self.pos["line_end"]], self.variables))
ptrace("> Line after variable substitution:", self.file[self.i:self.pos["line_end"]]) ptrace("HTMLParser.use_variables", f"Line after variable substitution:", self.file[self.i:self.pos["line_end"]])
def add_sidenav_headings(self): def add_sidenav_headings(self):
"""check if heading for sidenav in line""" """check if heading for sidenav in line"""
match = re.search(re_sidenav_heading, self[self.i:self.pos["line_end"]]) match = re.search(re_sidenav_heading, self[self.i:self.pos["line_end"]])
if match: if match:
Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}") Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
ptrace("> Found heading with id:", match.groups()) ptrace("HTMLParser.add_sidenav_headings:", f"Found heading with id:", match.groups())
def get_leading_whitespaces(self):
"""returns the whitespaces at the start of the line"""
# find last newline
line_beg = self.file.rfind("\n", 0, self.i)
if line_beg < 0: line_beg = 0
else: line_beg += 1 # start after newline
match = re.match(r"^([ \t]*)", self.file[line_beg:self.pos['line_end']])
if not match: return ""
else: return match.groups()[0]
# Parsing functions
def find_line_end(self):
"""
line_end -> position of next newline char or EOF
"""
self.pos["line_end"] = self.file.find('\n', self.i+1)
if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
def find_comment_begin(self) -> bool: def find_comment_begin(self) -> bool:
""" """
find the beginning of a comment in the current line find the beginning of a comment in the current line
if comment begin was found, jump into the comment, return True if comment begin was found, jump into the comment, return True
cmt_beg -> beginning of COMMENT_BEGIN
i -> first character after COMMENT_BEGIN / line_end + 1
""" """
# look for comment begin # look for comment begin
if self.pos["cmt_beg"] < 0: # if not in comment, find next comment if self.pos["cmt_beg"] < 0: # if not in comment, find next comment
self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"]) self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"])
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
if self.pos["cmt_beg"] < 0: if self.pos["cmt_beg"] < 0:
self.i = self.pos["line_end"] + 1 self.i = self.pos["line_end"] + 1
return False return False
@ -442,7 +583,7 @@ class HTMLParser(Parser):
# jump to comment_begin # jump to comment_begin
old_i = self.i old_i = self.i
self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}") ptrace(f"HTMLParser.find_comment_begin", f"Found comment begin, jumping from pos {old_i} to {self.i}")
return True return True
return True # still in previous comment return True # still in previous comment
@ -451,27 +592,42 @@ class HTMLParser(Parser):
""" """
call after find_comment_begin returns true to update the cmt_end call after find_comment_begin returns true to update the cmt_end
call continue when returning false call continue when returning false
cmt_end -> beginning of COMMENT_END / ---
cmt_beg -> --- / -1 when invalid comment
""" """
# in comment, i at the character after COMMENT_BEGIN # in comment, i at the character after COMMENT_BEGIN
self.pos["cmt_end"] = self.file.find(COMMENT_END, self.i) #, self.pos["line_end"]) self.pos["cmt_end"] = self.file.find(COMMENT_END, self.i) #, self.pos["line_end"])
# sanity checks # sanity checks
if self.pos["cmt_end"] < 0: if self.pos["cmt_end"] < 0:
error(f"Comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is never ended.", level=error_levels["serious"]) error("HTMLParser.find_comment_end", f"Comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
return False return False
else: else:
tmp_next_begin = self.file.find(COMMENT_BEGIN, self.i) tmp_next_begin = self.file.find(COMMENT_BEGIN, self.i)
if 0 < tmp_next_begin and tmp_next_begin < self.pos["cmt_end"]: if 0 < tmp_next_begin and tmp_next_begin < self.pos["cmt_end"]:
error(f"Found next comment begin before the comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{self.file[self.i:self.pos['line_end']]}'", level=error_levels["light"]) error("HTMLParser.find_comment_end", f"Found next comment begin before the comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{self.file[self.i:self.pos['line_end']]}'", level=error_levels["light"])
self.pos["cmt_beg"] = -1 self.pos["cmt_beg"] = -1
return False return False
return True return True
def replace_multiline_comments(self):
"""
if in a multiline comment, turn every line into a separate comment
"""
# not a multiline comment
if self.pos["line_end"] > self.pos["cmt_end"]: return
indent = self.get_leading_whitespaces()
self.replace(self.pos["cmt_beg"], self.pos["cmt_end"], self.file[self.pos["cmt_beg"]:self.pos["cmt_end"]].replace("\n", "-->\n" + indent + "<!--"), ignore_bounds=["line_end"])
self.find_line_end()
self.find_comment_end()
def find_command(self): def find_command(self):
# either at newline (if in multiline comment) or at comment end # either at newline (if in multiline comment) or at comment end
self.pos["cmd_beg"] = self.i self.pos["cmd_beg"] = self.i
self.pos["cmd_end"] = min(self.pos["line_end"], self.pos["cmt_end"]) self.pos["cmd_end"] = min(self.pos["line_end"], self.pos["cmt_end"])
assert self.pos["cmd_end"] >= self.i, f"cmd_end={self.pos['cmd_end']}, i={self.i}, line_end={self.pos['line_end']}, cmt_end={self.pos['cmt_end']}" assert self.pos["cmd_end"] >= self.i, f"cmd_end={self.pos['cmd_end']}, i={self.i}, line_end={self.pos['line_end']}, cmt_end={self.pos['cmt_end']}"
ptrace(f"> Possible command end: {self.pos['cmd_end']}, possible command: '{self[self.i:self.pos['cmd_end']]}'") ptrace("HTMLParser.find_command", f"Possible command end: {self.pos['cmd_end']}, possible command: '{self[self.i:self.pos['cmd_end']]}'")
# find commands # find commands
match = re.fullmatch(re_preprocessor_command, self[self.i:self.pos["cmd_end"]].strip(" ")) match = re.fullmatch(re_preprocessor_command, self[self.i:self.pos["cmd_end"]].strip(" "))
@ -480,8 +636,10 @@ class HTMLParser(Parser):
return match return match
def replace_command_with_output(self, command_output): def replace_command_with_output(self, command_output):
self.replace(self.i, self.pos["cmd_end"], command_output) # keep indent level
ptrace(f"> After insertion, the line is now '{self.file[self.i:self.pos['line_end']]}'") indent = self.get_leading_whitespaces()
self.replace(self.i, self.pos["cmd_end"], command_output.replace("\n", "\n" + indent))
ptrace(f"HTMLParser.replace_command_with_output", f"After command, the line is now '{self.file[self.i:self.pos['line_end']]}'")
def command_end(self): def command_end(self):
if self.pos["cmd_end"] == self.pos["cmt_end"]: # reached end of comment if self.pos["cmd_end"] == self.pos["cmt_end"]: # reached end of comment
@ -490,7 +648,7 @@ class HTMLParser(Parser):
if self[self.pos["cmt_beg"]-1] == '\n' and self[self.pos["cmt_end"]+len(COMMENT_END)] == '\n': # if the comment consumes the whole line, remove the entire line if self[self.pos["cmt_beg"]-1] == '\n' and self[self.pos["cmt_end"]+len(COMMENT_END)] == '\n': # if the comment consumes the whole line, remove the entire line
remove_newline = 1 remove_newline = 1
if self.state["cmd_in_cmt"]: # remove comment tags if a command was found if self.state["cmd_in_cmt"]: # remove comment tags if a command was found
ptrace(f"Deleting opening comment tags") ptrace("HTMLParser.command_end", f"Deleting opening comment tags")
self.remove(self.pos["cmt_beg"], self.pos["cmt_beg"] + len(COMMENT_BEGIN)) self.remove(self.pos["cmt_beg"], self.pos["cmt_beg"] + len(COMMENT_BEGIN))
self.remove(self.pos["cmt_end"], self.pos["cmt_end"] + len(COMMENT_END) + remove_newline, ignore_bounds=["cmt_end", "cmd_end", "line_end"]) self.remove(self.pos["cmt_end"], self.pos["cmt_end"] + len(COMMENT_END) + remove_newline, ignore_bounds=["cmt_end", "cmd_end", "line_end"])
# process the line again, because a command might have inserted new comments # process the line again, because a command might have inserted new comments
@ -507,7 +665,7 @@ class HTMLParser(Parser):
self.pos["cmt_end"] = -1 self.pos["cmt_end"] = -1
self.pos["cmd_end"] = -1 self.pos["cmd_end"] = -1
self.i = self.pos["line_end"] + 1 self.i = self.pos["line_end"] + 1
ptrace(f"> Multiline comment, jumping to next line.") ptrace(f"HTMLParser.command_end", f"Multiline comment, jumping to next line.")
# i = possible_command_end commented, because if something containing new commands is inserted we need to parse that as well # i = possible_command_end commented, because if something containing new commands is inserted we need to parse that as well
@ -516,8 +674,8 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
sidenav_include_pos = -1 sidenav_include_pos = -1
while p.i < len(p): # at start of new line or end of comment while p.i < len(p): # at start of new line or end of comment
p.next_line() p.find_line_end()
ptrace(f"Processing at i={p.i} in line {pos2line(p.file, p.i)}") ptrace("parse_file", f"Processing at i={p.i} in line {pos2line(p.file, p.i)}: '{p[p.i:p.pos['line_end']]}'")
p.use_variables() p.use_variables()
p.add_sidenav_headings() p.add_sidenav_headings()
@ -525,21 +683,22 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
if not p.find_comment_begin(): continue if not p.find_comment_begin(): continue
if not p.find_comment_end(): continue if not p.find_comment_end(): continue
p.replace_multiline_comments()
match = p.find_command() match = p.find_command()
if match: if match:
command = match.groups()[0] command = match.groups()[0]
args = match.groups()[1].replace('\t', ' ').strip(' ') args = match.groups()[1].replace('\t', ' ').strip(' ')
pdebug(f"> Found command '{command}' with args '{args}'") pdebug("parse_file", f"Found command '{command}' with args '{args}'")
# delete from previous block if # delete from previous block if
if command in ["elif", "else", "endif"]: if command in ["elif", "else", "endif"]:
if p.pos["conditional_block_beg"] < 0: error(f"Misplaced '{command}' in line {pos2line(p.file, p.i)}") if p.pos["conditional_block_beg"] < 0: error("parse_file", f"Misplaced '{command}' in line {pos2line(p.file, p.i)}")
if p.state["last_condition"]: if p.state["last_condition"]:
# delete block from here at next endif # delete block from here at next endif
p.state["last_condition"] = False p.state["last_condition"] = False
else: else:
# delete block from last condition statement # delete block from last condition statement
ptrace(f"> Deleting block from last condition") ptrace("parse_file", f"> Deleting block from last condition")
p.remove(p.pos["conditional_block_beg"], p.pos["cmt_beg"]) p.remove(p.pos["conditional_block_beg"], p.pos["cmt_beg"])
p.i = p.pos["cmd_beg"] p.i = p.pos["cmd_beg"]
p.pos["conditional_block_beg"] = p.i p.pos["conditional_block_beg"] = p.i
@ -552,14 +711,14 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
p.pos["conditional_block_beg"] = p.i p.pos["conditional_block_beg"] = p.i
p.state["last_condition"] = evaluate_condition(args) p.state["last_condition"] = evaluate_condition(args)
p.state["any_condition"] = p.state["last_condition"] p.state["any_condition"] = p.state["last_condition"]
pdebug(f"> Command {command} condition evaluated to {p.state['last_condition']}") pdebug("parse_file", f"Command {command} condition evaluated to {p.state['last_condition']}")
cmd_output = "" cmd_output = ""
elif command =="elif": elif command =="elif":
p.pos["conditional_block_beg"] = p.i p.pos["conditional_block_beg"] = p.i
p.state["last_condition"] = evaluate_condition(args) if not p.state["any_condition"] else False p.state["last_condition"] = evaluate_condition(args) if not p.state["any_condition"] else False
if p.state["last_condition"]: if p.state["last_condition"]:
p.state["any_condition"] = True p.state["any_condition"] = True
pdebug(f"> Command {command} condition evaluated to {p.state['last_condition']}") pdebug("parse_file", f"Command {command} condition evaluated to {p.state['last_condition']}")
cmd_output = "" cmd_output = ""
elif command == "else": elif command == "else":
p.pos["conditional_block_beg"] = p.i p.pos["conditional_block_beg"] = p.i
@ -572,18 +731,23 @@ def parse_file(_file:str, variables:dict[str,str], remove_comments):
elif command == "endif": elif command == "endif":
cmd_output = "" cmd_output = ""
elif command not in command2function: elif command not in command2function:
error(f"Invalid command in line {pos2line(p.file, p.i)}: {command}", level=error_levels["light"]) error("parse_file", f"Invalid command in line {pos2line(p.file, p.i)}: {command}", level=error_levels["light"])
cmd_output = "" cmd_output = ""
else: else:
cmd_output = command2function[command](args, variables) cmd_output = command2function[command](args, variables)
else: else:
cmd_output = "" cmd_output = ""
p.replace_command_with_output(cmd_output) p.replace_command_with_output(cmd_output)
else:
pdebug("parse_file", f"Did not find command in comment {p.file[p.pos['cmt_beg']:p.pos['cmt_end']+len(COMMENT_END)]}")
p.command_end() p.command_end()
if sidenav_include_pos >= 0: if sidenav_include_pos >= 0:
return p.file[:sidenav_include_pos] + Sidenav.generate() + p.file[sidenav_include_pos:] p.i = sidenav_include_pos # required before get_leading_whitespaces
p.find_line_end() # required before get_leading_whitespaces
indent = p.get_leading_whitespaces()
return p.file[:sidenav_include_pos] + Sidenav.generate().replace("\n", "\n" + indent) + p.file[sidenav_include_pos:]
else: else:
return p.file return p.file
@ -597,11 +761,11 @@ def substitute_variables(html:str, variables:dict[str, str]):
matches.append(match) matches.append(match)
html_list = list(html) html_list = list(html)
for match in reversed(matches): for match in reversed(matches):
pdebug(f"> Found variable usage {match.groups()[0]}, match from {match.start()} to {match.end()}") pdebug("substitute_variables", f"Found variable usage {match.groups()[0]}, match from {match.start()} to {match.end()}")
value = "" value = ""
if match.groups()[0] in variables: value = variables[match.groups()[0]] if match.groups()[0] in variables: value = variables[match.groups()[0]]
else: else:
pdebug(f"Variable {match.groups()[0]} is used but not defined") pdebug("substitute_variables", f"Variable {match.groups()[0]} is used but not defined")
for _ in range(match.start(), match.end()): for _ in range(match.start(), match.end()):
html_list.pop(match.start()) html_list.pop(match.start())
html_list.insert(match.start(), value.strip(" ")) html_list.insert(match.start(), value.strip(" "))
@ -612,15 +776,20 @@ def substitute_variables(html:str, variables:dict[str, str]):
""" """
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="bUwUma html preprocessor") parser = argparse.ArgumentParser(prog="bUwUma html preprocessor")
parser.add_argument("--input", action="store", help="path to the input file", required=True) parser.add_argument("--input", action="store", help="path to the input file", default="")
parser.add_argument("--output", action="store", help="output to this file", default="") parser.add_argument("--output", action="store", help="output to this file", default="")
parser.add_argument("--inplace", action="store_true", help="overwrite input file") parser.add_argument("--inplace", action="store_true", help="overwrite input file")
parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False)
parser.add_argument("--var", action="append", help="set a variable --var varname=value", default=[]) parser.add_argument("--var", action="append", help="set a variable --var varname=value", default=[])
parser.add_argument("--output-deps", action="store", help="output a Makefile listing all dependencies", default="") parser.add_argument("--output-deps", action="store", help="output a Makefile listing all dependencies", default="")
parser.add_argument("--sitemap-generate", action="store", help="generate the sitemap from the sitemap-temp-file", default="")
parser.add_argument("--sitemap-temp-file", action="store", help="file for storing sitemap data during build process", default="/tmp/sitemap.pkl")
parser.add_argument("--sitemap-webroot-dir", action="store", help="directory of the webroot, without trailing /. This will be removed from the output path for generating the sitemap url entry", default="")
parser.add_argument("--sitemap-base-url", action="store", help="base url of the website, without trailing /", default="https://www.example.com")
parser.add_argument("--sitemap-remove-ext", action="store_true", help="remove the file extenstion in the sitemap entry")
parser.add_argument("--exit-on", action="store", help="exit when an error of the given severity occures", choices=["light", "serious", "critical"], default="serious") parser.add_argument("--exit-on", action="store", help="exit when an error of the given severity occures", choices=["light", "serious", "critical"], default="serious")
parser.add_argument("--debug", action="store_true", help="be more verbose", default=False) parser.add_argument("--debug", action="store_true", help="be more verbose", default=False)
parser.add_argument("--trace", action="store_true", help="be extremly verbose", default=False) parser.add_argument("--trace", action="store_true", help="be extremly verbose", default=False)
parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False)
variables:dict[str, str] = {} variables:dict[str, str] = {}
args = parser.parse_args() args = parser.parse_args()
@ -635,10 +804,27 @@ if __name__ == "__main__":
args.input = args.input.strip(" ") args.input = args.input.strip(" ")
args.output = args.output.strip(" ") args.output = args.output.strip(" ")
args.output_deps = args.output_deps.strip(" ") args.output_deps = args.output_deps.strip(" ")
args.sitemap_temp_file = args.sitemap_temp_file.strip(" ")
args.sitemap_generate = args.sitemap_generate.strip(" ")
TRACE = args.trace TRACE = args.trace
if args.trace: args.debug = True if args.trace: args.debug = True
DEBUG = args.debug DEBUG = args.debug
# either input file or sitemap_generate is required
if not (bool(args.input) ^ bool(args.sitemap_generate)):
parser.error(f"Exactly one if --input or --sitemap-generate must be given")
if args.input:
if args.sitemap_webroot_dir:
current_file_url = args.sitemap_base_url + args.output.replace(args.sitemap_webroot_dir, "")
else:
current_file_url = args.sitemap_base_url + args.output
if args.sitemap_remove_ext:
current_file_url = os.path.splitext(current_file_url)[0]
pdebug("main", f"current_file={current_file_url}")
# sanity checks # sanity checks
if not path.isfile(args.input): if not path.isfile(args.input):
parser.error(f"Invalid input file:: {args.input}") parser.error(f"Invalid input file:: {args.input}")
@ -655,13 +841,18 @@ if __name__ == "__main__":
if not args.output: if not args.output:
parser.error(f"--output-deps requires either --output <file> our --inplace") parser.error(f"--output-deps requires either --output <file> our --inplace")
if args.sitemap_temp_file:
if path.isfile(args.sitemap_temp_file):
with open(args.sitemap_temp_file, "rb") as file:
Sitemap.urls = pickle.load(file)
# get html # get html
with open(args.input, "r") as file: with open(args.input, "r") as file:
target_html = file.read() target_html = file.read()
output_html = parse_file(target_html, variables, not args.preserve_comments) output_html = parse_file(target_html, variables, not args.preserve_comments)
# remove empty lines # remove empty lines
output_html = re.sub(r"[\t\r ]*\n(?:[\t\r ]*\n[\t\r ]*)+", r"\n", output_html) output_html = re.sub(r"[\t\r ]*\n(?:[\t\r ]*\n)+", r"\n", output_html)
# pdebug(f"Output: {output_html}") # pdebug(f"Output: {output_html}")
@ -676,6 +867,18 @@ if __name__ == "__main__":
if args.output != args.input: if args.output != args.input:
glob_dependcies.append(args.input) glob_dependcies.append(args.input)
depfile = generate_dependecy_file(args.output, glob_dependcies) depfile = generate_dependecy_file(args.output, glob_dependcies)
pdebug(f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}") pdebug("main", f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}")
with open(args.output_deps, "w") as file: with open(args.output_deps, "w") as file:
file.write(depfile) file.write(depfile)
if args.sitemap_temp_file:
with open(args.sitemap_temp_file, "wb") as file:
pickle.dump(Sitemap.urls, file)
else: # sitemap_generate
if not path.isfile(args.sitemap_temp_file):
parser.error(f"Invalid sitemap-temp-file: '{args.sitemap_temp_file}'")
with open(args.sitemap_temp_file, "rb") as file:
Sitemap.urls = pickle.load(file)
sitemap = Sitemap.gen_sidemap()
pdebug("main", f"Writing sitemap to {os.path.abspath(args.sitemap_generate)}")
with open(args.sitemap_generate, "w") as file:
file.write(sitemap)