From 9e79eb76d60f2bb54e77de45f53ba77903c24ab4 Mon Sep 17 00:00:00 2001 From: "matthias@arch" Date: Fri, 16 Dec 2022 05:11:16 +0100 Subject: [PATCH] initial commit --- Makefile | 192 +++++++++++++++++ README.md | 115 ++++++++++ html_preprocessor.py | 494 +++++++++++++++++++++++++++++++++++++++++++ nginx.conf | 38 ++++ 4 files changed, 839 insertions(+) create mode 100644 Makefile create mode 100644 README.md create mode 100755 html_preprocessor.py create mode 100644 nginx.conf diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f79072d --- /dev/null +++ b/Makefile @@ -0,0 +1,192 @@ +# ABOUT +# - In this Makefile, 'building a file' means: +# - if the file has a '.html' extension: run the html preprocessor on the file and place the output in the output directory +# - else: copy the file to the output directory +# - Folder structure from source directories will be preserved in the output directory +# - Abbreviations: +# - FLS: files +# - DIR: directory +# - SRC: source +# - LANG: language +# - PP: preprocessor +# - DEP: dependency + +# TODO: fix that you have invoke make twice to build both languages + +# +# NORMAL SETTINGS +# change these to fir your project +# + +# root dir for the project, all other paths relative to PROJECT_DIR (except for OUT_DIR) +PROJECT_DIR = . + +# path where final website will be in, this one is not relative to PROJECT_DIR +OUT_DIR = ../quintern-test + +# SOURCE FILES: +# all SRC_FLS and all files in the SRC_DIRS will be built +SRC_DIRS = de en script style +SRC_FLS = rss.xml + +# SOURCE FILES: +# all RESOURCE_FLS and all files in the RESOURCE_DIRS will be copied to OUT_DIR +RESOURCE_DIRS = resources +RESOURCE_FLS = + + +# MULTI-LANG SOURCE FILES: +# the files in COMMON_DIR will be built for all LANGS: +# foreach html-file in COMMON_DIR: +# foreach lang in LANGS: +# run HTML_PP_CMD with --var lang=lang on file and output to OUT_DIR without the COMMON_DIR prefix, so COMMON_DIR/subdir/file.html -> OUT_DIR/lang/subdir/file.html +# all non-html files will handled the same way, but without the preprocessor being run on them. They are simply copied +COMMON_DIR = common +LANGS = de en + +# PREPROCESSOR +# path to of the files that should be included +INCLUDE_DIR = include + + +# ADVANCED +# the command to run the html preprocessor +HTML_PP_CMD = python3 html_preprocessor.py --exit-on light + +DEP_DIR = .dependencies + + + +# +# NOT SETTINGS ANYMORE +# DO NOT CHANGE ANYTHING HERE UNLESS YOU KNOW WHAT YOU ARE DOING! +# +# all variables starting with _ are relative to PROJECT_DIR + +# make everything relative to PROJECT_DIR +_SRC_DIRS = $(addprefix $(PROJECT_DIR)/, $(SRC_DIRS)) +_SRC_FLS = $(addprefix $(PROJECT_DIR)/, $(SRC_FLS)) +_RES_DIRS = $(addprefix $(PROJECT_DIR)/, $(RESOURCE_DIRS)) +_RES_FLS = $(addprefix $(PROJECT_DIR)/, $(RESOURCE_FLS)) +_COMMON_DIR = $(addprefix $(PROJECT_DIR)/, $(COMMON_DIR)) +_INCLUDE_DIR = $(addprefix $(PROJECT_DIR)/, $(INCLUDE_DIR)) +_DEP_DIR = $(addprefix $(PROJECT_DIR)/, $(DEP_DIR)) + +# NORMAL SRC +# all SRC_DIRS + all subdirs of each srcdir +_SRC_SUB_DIRS = $(foreach srcdir, $(_SRC_DIRS), $(shell find $(srcdir)/ -type d)) +_SRC_FLS += $(foreach srcdir, $(_SRC_DIRS), $(shell find $(srcdir)/ -type f)) +# OUT_DIRS = $(OUT_DIR) $(addprefix $(OUT_DIR)/, $(_SRC_SUB_DIRS)) +OUT_DIRS = $(OUT_DIR)/ $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/%, $(_SRC_SUB_DIRS)) +# path of the source files after being processed +# OUT_FLS = $($(notdir _SRC_FLS):%=$(OUT_DIR)/%) +OUT_FLS = $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/%, $(_SRC_FLS)) + +# RESOURCES +_RES_SUB_DIRS = $(foreach srcdir, $(_RES_DIRS), $(shell find $(srcdir)/ -type d)) +_RES_FLS += $(foreach srcdir, $(_RES_DIRS), $(shell find $(srcdir)/ -type f)) +RES_OUT_DIRS = $(OUT_DIR)/ $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/%, $(_RES_SUB_DIRS)) +RES_OUT_FLS = $(patsubst $(PROJECT_DIR)/%, $(OUT_DIR)/%, $(_RES_FLS)) + +# MULTILANG +_ML_SRC_FLS = $(shell find $(_COMMON_DIR)/ -type f) +_ML_SRC_SUB_DIRS= $(shell find $(_COMMON_DIR)/ -type d) +# will contain one subdir for each lang, each of which contains every file from ML_SRC_FLS +ML_OUT_DIR = $(OUT_DIR) +ML_OUT_LANG_DIRS= $(foreach lang, $(LANGS), $(addprefix $(ML_OUT_DIR)/, $(lang))) +ML_OUT_DIRS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_DIR)/$(lang)/%, $(_ML_SRC_SUB_DIRS))) +ML_OUT_FLS = $(foreach lang, $(LANGS), $(patsubst $(_COMMON_DIR)/%, $(ML_OUT_DIR)/$(lang)/%, $(_ML_SRC_FLS))) + +# needed for creating them +_DEP_DIRS = $(sort $(patsubst $(OUT_DIR)/%, $(_DEP_DIR)/%, $(OUT_DIRS) $(ML_OUT_DIRS))) +# needed for reading +_DEP_FLS = $(shell find $(_DEP_DIR) -type f -name '*.d') + +# PRINTING +FMT_VAR_SRC ="Variable '\e[1;34m%s\e[0m': \e[0;33m%s\e[0m\n" +FMT_VAR_OUT ="Variable '\e[1;34m%s\e[0m': \e[0;35m%s\e[0m\n" +FMT_DIR ="\e[1;34mMaking directory\e[0m: \e[0;35m%s\e[0m\n" +FMT_OUT_HTML ="\e[1;34mBuilding html\e[0m \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" +FMT_OUT_OTHER ="\e[1;34mBuilding\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" + +FMT_OUT_ML_HTML="\e[1;34mBuilding html\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" +FMT_OUT_ML_OTHER="\e[1;34mBuilding\e[0m in lang \e[1;34m%s\e[0m: \e[1;33m%s\e[0m at \e[1;35m%s\e[0m\n" +.SUFFIXES: +.SUFFIXES: .html .md + +.PHONY: default normal multilang resources print start stop clean cleaner + +.DEFAULT_GOAL = all + +# include all the dependency makefiles +include $(_DEP_FLS) + +all: normal multilang resources +normal: $(OUT_FLS) +multilang: $(ML_OUT_FLS) +resources: $(RES_OUT_FLS) + +print: + @printf $(FMT_VAR_SRC) "PROJECT_DIR" "$(PROJECT_DIR)" + @printf $(FMT_VAR_OUT) "OUT_DIRS" "$(OUT_DIRS)" + @printf $(FMT_VAR_SRC) "_INCLUDE_DIR" "$(_INCLUDE_DIR)" + @printf $(FMT_VAR_SRC) "_SRC_FLS" "$(_SRC_FLS)" + @printf $(FMT_VAR_OUT) "OUT_FLS" "$(OUT_FLS)" + @printf $(FMT_VAR_SRC) "_RES_FLS" "$(_RES_FLS)" + @printf $(FMT_VAR_OUT) "RES_OUT_FLS" "$(RES_OUT_FLS)" + @printf $(FMT_VAR_SRC) "_ML_SRC_FLS" "$(_ML_SRC_FLS)" + @printf $(FMT_VAR_OUT) "ML_OUT_FLS" "$(ML_OUT_FLS)" + @printf $(FMT_VAR_SRC) "_DEP_FLS" "$(_DEP_FLS)" + @# @printf $(FMT_VAR_SRC) "y" "$(y)" + +# MULTILANG RULES +$(sort $(ML_OUT_DIRS) $(_DEP_DIRS) $(RES_OUT_DIRS) $(OUT_DIRS)): + @printf $(FMT_DIR) "$@" + @mkdir -p $@ + +# build/ml_tmp/lang/subdir/xyz.html +$(foreach out_dir, $(ML_OUT_LANG_DIRS), $(out_dir)/%.html): $(_COMMON_DIR)/%.html | $(ML_OUT_DIRS) $(_DEP_DIRS) + @#echo "$$@=$@, $$<=$< $$^=$^" + @# \$@=build/ml_tmp/lang/subdir/xyz.html, \$<=common/subdir/xyz.html + @lang=`echo $(patsubst $(ML_OUT_DIR)/%, %, $@) | awk -F "/" '{print $$1}'`; \ + printf $(FMT_OUT_ML_HTML) "$$lang" "$<" "$@"; \ + $(HTML_PP_CMD) --target "$<" --output "$@" --var include_dir=$(_INCLUDE_DIR) --var lang=$$lang --output-deps "$(patsubst $(OUT_DIR)/%, $(_DEP_DIR)/%.d, $@)"; + +# rule for all not html files +$(foreach out_dir, $(ML_OUT_LANG_DIRS), $(out_dir)/%): $(_COMMON_DIR)/% | $(ML_OUT_DIRS) + @lang=`echo $(patsubst $(ML_OUT_DIR)/%, %, $@) | awk -F "/" '{print $$1}'`; \ + printf $(FMT_OUT_ML_OTHER) "$$lang" "$<" "$@" ; \ + cp $< $@ + +# +# (NORMAL/RE-)SOURCE RULES +# + + +$(OUT_DIR)/%.html: %.html | $(OUT_DIRS) $(_DEP_DIRS) + @printf $(FMT_OUT_HTML) "$<" "$@"; + $(HTML_PP_CMD) --target "$<" --output "$@" --var include_dir=$(_INCLUDE_DIR) --output-deps "$(_DEP_DIR)/$<.d"; + @# remove comments and empty lines. two separate lines bc the substitution might create new empty lines + @#awk -i inplace '{FS="" sub(//,"")}1' $@ + @#awk -i inplace '{if (NF != 0) print}' $@ + +$(OUT_DIR)/%: % | $(OUT_DIRS) $(RES_OUT_DIRS) + @printf $(FMT_OUT_OTHER) "$<" "$@" + @cp -r $< $@ + + +# .DEFAULT: +# @echo "MISSING RULE: $@" + +start: + /usr/sbin/nginx -c nginx.conf -p $(shell pwd)& + firefox http://localhost:8080/ +stop: + killall nginx + +clean: + -rm $(OUT_FLS) $(ML_OUT_FLS) + -rm -r $(_DEP_DIR) + +cleaner: + -rm -r $(OUT_DIR) diff --git a/README.md b/README.md new file mode 100644 index 0000000..5c59d67 --- /dev/null +++ b/README.md @@ -0,0 +1,115 @@ +# bUwUma +`bUwUma` is a build system that uses **GNU make** and a **preprocessor** written in python to build **static**, **multilingual** websites. + +This readme only documents the preprocessor. +For more information and a quickstart guide on how to use `bUwUma`, please +refer to the article [on my website](https://quintern.xyz/en/software/buwuma.html). + +# HTML Preprocessor Documentation +## Syntax +### Commands + - All commands must be located within a html comment what starts with ``. + - Commands start with a `#` character, the command must follow the `#` immediately. + - Everything after the command until the end of the comment or a newline character are considered the argument of the command. + ```html + + + ``` + - All commands return a string, which can be empty. + - If a comment contains a command, the entire comment will replaced with the return value of the command. + - If there are multiple commands in a command, it will be replaced by all the return values added together. +### Variables + - Variable names must only consist of these characters: `a-zA-Z0-9_` + - A variable with name `varname` can be used like this: `#$(varname)` + - A variable usage will be replaced by the value of the variable + - Any variable that has is not defined has empty string as value +### General + - Whitespaces around a token are ignored, so `` is the same as `` + - If a command-comment takes up a whole line, the whole line including the newline character is replaced. + +## Commands +### include + Include the content of a file at the position of the command. + **Synopsis**: + `` + **Argument**: + A absolute or relative path to a text file + **Return Value**: + The content of the file or `` empty string if the file can not be opened. +### set + Set the value of a variable + **Synopsis**: + Set the value of `varname` to `this is the value`: + `` + Set the value of `varname` depending on the value of `othervar`: + `` + **Argument**: + Any string + **Return Value**: + The argument in comment tags + This can be useful in multiline comments that contain other commands: In that case, the comment tags will be removed and each command replaced with + its return value, so if you want to just have commented text in there you can use `#comment` +### uncomment + Uncomment the comment. + **Synopsis**: + `` + **Argument**: + Any string + **Return Value**: + The argument + This can be useful when you want to look at the unprocessed html without variables or when your syntax highlighting gets confused by a variable. +### sidenav + Manage the generation of a content menu which contains links to all headings in your html that have an id. The menu is called sidenav here. + An entry is a html heading with a id: `

This heading will be linked in the sidenav

` + **Synopsis**: + `` + sidenav-command must be one of the following: + #### `include` + Include the generated sidenav at this position. + **Argument**: + Ignored + **Return Value**: + The generated sidenav + #### `section` + Group all following entries in named section. + **Argument**: + The name of the section + **Return Value** + Empty string + #### `name` + Use a custom name instead of the heading itself for the link to the next heading. + **Argument**: + The link-name of the next heading + **Return Value**: + Empty string + #### `custom` + Include a custom link in the sidenav. + **Synopsis**: + `` + **Argument**: + Must be `href="..." name="..."`. Either single `'` or double `"` quotes are required. + **Return Value**: + Empty string +## Pitfalls + - The `#include` command must not be in the last line of the file + - The maps in `set` have **at least 2** options + - If you want to use variables in markdown, you have to escape the `#` with a backslash, so `#$(var)` becomes `\#$(var)` diff --git a/html_preprocessor.py b/html_preprocessor.py new file mode 100755 index 0000000..f66cf83 --- /dev/null +++ b/html_preprocessor.py @@ -0,0 +1,494 @@ +#!/bin/python3 +import os +import re +from sys import argv +from collections.abc import Callable + +""" +TODO: +- testing +- generate sidenav during parse_file for increased speed and to allow sidenav commands in multiline comments +- reintroduce the nav_selected class on nav feature +""" +""" +************************************************************ SETTINGS ************************************************************ +""" +sidenav_format = """\ +
+ +
+ """ +sidenav_content_link = "
  • #name
  • " +sidenav_content_section = "
  • #name
  • " + +exit_on_include_failure = False + +""" +************************************************************ REGULAR EXPRESSIONS ************************************************************ +""" +# SIDENAV +# heading with id +re_sidenav_heading = r"(.+)" +# custom entry +re_sidenav_custom = r"href=(?:\"|\')([^\"\' ]+)(?:\"|\') +name=(?:\"|\')(.+)(?:\"|\')" + +# commas +re_set_map = r"([a-zA-Z0-9_]+) *\? *\{( *(?:[a-zA-Z0-9_*]+ *: *[^,]*, *)+[a-zA-Z0-9_*]+ *: *[^,]*) *,? *\}" +# semicolons +re_set_map_alt = r"([a-zA-Z0-9_]+) *\? *\{( *(?:[a-zA-Z0-9_*]+ *: *[^;]* *; *)+[a-zA-Z0-9_*]+ *: *[^;]*) *;? *\}" + +""" #$(myvar) """ +re_variable_use = r"#\$\(([a-zA-Z0-9_]+)\)" + +""" only in comments """ +re_preprocessor_command = r"#([a-zA-Z]+) *(.*) *" + +COMMENT_BEGIN = "" + + +""" +************************************************************ GLOBALS ************************************************************ +""" +glob_dependcies: list[str] = [] + +exit_codes = { + "FileNotFound": 2, + "MarkdownConversionError": 3, +} +error_levels = { + "light": 0, + "serious": 1, + "critical": 2, +} +exit_on_error_level = error_levels["serious"] + + +""" +************************************************************ UTILITY ************************************************************ +""" +DEBUG = False +def pdebug(*args, **keys): + if DEBUG: print(*args, **keys) + +TRACE = False +def ptrace(*args, **keys): + if TRACE: print(*args, **keys) + +def error(*args, level:int=exit_on_error_level, exit_code:int=1, **keys): + if level >= exit_on_error_level: + print(f"ERROR:", *args, **keys) + exit(exit_code) + else: + print(f"WARNING:", *args, **keys) + +def line_is_link_to_path(line, path): + # check if the line is a link to html thats currently being processed + match = re.search(r"(.+)", line) + if match: + # get filename + match = re.match(r"[a-zA-Z0-9_\-]+\.html", match.groups()[1]) + if match and match.group() in path: + return True + return False + +def pos2line(s: str, pos:int): + return s[:pos].count('\n') + 1 + + +def generate_dependecy_file(filename:str, deps:list[str]): + line1 = f"{filename}:" + s = "" + for dep in deps: + line1 += f" {dep}" + s += f"{dep}:\n" + return line1 #+ "\n" + s + + + +""" +************************************************************ SIDENAV ************************************************************ +""" +class Sidenav: + LINK = 0 + SECTION = 1 + # 0: link, 1: section + entries: list[tuple[int, str, str]] = [] + skip_next = False + custom_name = None + @staticmethod + def addEntry(name: str, link: str): + if Sidenav.skip_next: + Sidenav.skip_next = None + return + if Sidenav.custom_name: + name = Sidenav.custom_name + Sidenav.custom_name = None + Sidenav.entries.append((Sidenav.LINK, name, link)) + @staticmethod + def addSection(name): + Sidenav.entries.append((Sidenav.SECTION, name, "")) + @staticmethod + def setCustomName(name: str): + Sidenav.custom_name = name + @staticmethod + def skipNext(): + Sidenav.skip_next = True + @staticmethod + def generate() -> str: + pdebug(f"Sidenav.generate(): found the following entries: {Sidenav.entries}") + sidenav:list[str] = sidenav_format.split('\n') + content_i = -1 + for i in range(len(sidenav)): # find in which line the entries need to be placed + if "#sidenav-content" in sidenav[i]: + content_i = i + break + if content_i >= 0: + sidenav.pop(content_i) + added_links = [] + for i in reversed(range(len(Sidenav.entries))): + entry = Sidenav.entries[i] + if entry[0] == Sidenav.LINK: + if entry[2] in added_links: continue # no duplicates + added_links.append(entry[2]) + sidenav.insert(content_i, sidenav_content_link.replace("#name", entry[1]).replace("#link", entry[2])) + else: + sidenav.insert(content_i, sidenav_content_section.replace("#name", entry[1])) + sidenav_s = "" + for line in sidenav: sidenav_s += line + "\n" # cant use "".join because of newlines + return sidenav_s + @staticmethod + def cmd_sidenav(args:str, variables:dict[str,str]) -> str: + space = args.find(" ") + if space < 0: + space = len(args) + cmd = args[:space] + cmd_args = "" + if 0 < space and space < len(args) - 1: + cmd_args = args[space+1:].strip(" ") + if cmd == "skip": + Sidenav.skipNext() + elif cmd == "section": + Sidenav.addSection(cmd_args) + elif cmd == "name": + Sidenav.setCustomName(cmd_args) + elif cmd == "custom": + match = re.fullmatch(re_sidenav_custom, cmd_args) + if match: + Sidenav.addEntry(match.groups()[1], match.groups()[0]) + else: + error(f"cmd_sidenav: Invalid argument for command 'custom': '{cmd_args}'", level=error_levels["light"]) + elif cmd == "include": + return Sidenav.generate() + else: + error(f"cmd_sidenav: Invalid command: '{cmd}'", level=error_levels["light"]) + + return "" + + +""" +************************************************************ COMMANDS ************************************************************ +All these commands take one arg with trimmed whitespaces. +The arg may be anything + +They all need to return a string, which will be placed +into the source file at the place where the command was. +""" +def cmd_include(args: str, variables:dict[str, str]={}) -> str: + pdebug(f"cmd_include: args='{args}', variables='{variables}'") + content = "" + try: + with open(args) as file: + content = file.read() + except: + error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"]) + content = f"" + if args.endswith(".md"): + try: + from markdown import markdown + content = markdown(content, output_format="xhtml") + except: + error(f"cmd_include: Could convert markdown to html for file '{args}'. Is python-markdown installed?", level=error_levels["critical"], exit_code=exit_codes["MarkdownConversionError"]) + content = f"" + glob_dependcies.append(args) + return content + +def cmd_set(args: str, variables:dict[str, str]={}) -> str: + # re_set_map = r"([a-zA-Z0-9_]+)\?\{(([a-zA-Z0-9_]+:.+,)*([a-zA-Z0-9_]+:.+))\}" + # + space = args.find(' ') + # pdebug(f"cmd_set: varname='{args[:space]}, 'arg='{args[space+1:]}', variables='{variables}'") + if not (space > 0 and space < len(args)-1): + variables[args] = "" + pdebug(f"cmd_set: Setting to emptry string: {args}") + else: + varname = args[:space] + variables[varname] = "" + # check if map assignment with either , or ; + separator = ',' + match = re.fullmatch(re_set_map, args[space+1:].strip(' ')) + if not match: + match = re.fullmatch(re_set_map_alt, args[space+1:].strip(' ')) + separator = ';' + if match: + pdebug(f"cmd_set: Map {match.group()}") + depends = match.groups()[0] + if not depends in variables: + pdebug(f"cmd_set: Setting from map, but depends='{depends}' is not in variables") + return "" + depends_val = variables[depends] + for option in match.groups()[1].split(separator): + option = option.strip(" ") + pdebug(f"cmd_set: Found option {option}") + colon = option.find(':') # we will find one, regex guarantees + if option[:colon].strip(" ") == depends_val or option[:colon].strip(" ") == "*": + variables[varname] = option[colon+1:].strip(" ") + + else: # simple asignment + value = args[space+1:] + variables[varname] = value.strip(" ") + pdebug(f"cmd_set: Assignment {varname} -> {value.strip(' ')}") + return "" + +def cmd_default(args: str, variables:dict[str, str]={}) -> str: + separator = args.find(' ') + if args[:separator] not in variables: + return cmd_set(args, variables) + return "" + + +def cmd_comment(args: str, variables:dict[str, str]={}) -> str: + return f"" +def cmd_uncomment(args: str, variables:dict[str, str]={}) -> str: + return args + + +command2function:dict[str, Callable[[str, dict[str,str]], str]] = { + "include": cmd_include, + "set": cmd_set, + "default": cmd_default, + "comment": cmd_comment, + "uncomment": cmd_uncomment, + "sidenav": Sidenav.cmd_sidenav +} + +""" +************************************************************ PARSING ************************************************************ +""" +def parse_file(file:str, variables:dict[str,str]): + sidenav_include_pos = -1 + comment_begin = -1 + remove_comment = False + i = 0 + # if file.count(COMMENT_BEGIN) != file.count(COMMENT_END): + + while i < len(file): # at start of new line or end of comment + # replace variable usages in the current line + line_end = file.find('\n', i) + if line_end < 0: line_end = len(file) + file = file[:i] + replace_variables(file[i:line_end], variables) + file[line_end:] + line_end = file.find('\n', i) + if line_end < 0: line_end = len(file) + ptrace("Line after replacing variables:", file[i:line_end]) + + # check if heading for sidenav in line + match = re.search(re_sidenav_heading, file[i:line_end]) + if match: + Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}") + ptrace("> Found heading with id:", match.groups()) + + if comment_begin < 0: # if not in comment, find next comment + comment_begin = file.find(COMMENT_BEGIN, i, line_end) + # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}") + if comment_begin < 0: + i = line_end + 1 + continue + else: + # jump to comment_begin + old_i = i + i = comment_begin + len(COMMENT_BEGIN) # after comment begin + ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}") + + # if here, i at the character after COMMENT_BEGIN + # sanity check + tmp_next_begin = file.find(COMMENT_BEGIN, i) + if 0 < tmp_next_begin and tmp_next_begin < file.find(COMMENT_END, i): + error(f"Found next comment begin before the comment starting in line {pos2line(file, comment_begin)} is ended! Skipping comment. Comment without proper closing tags: '{file[i:line_end]}'", level=error_levels["light"]) + comment_begin = -1 + continue + # either at newline (if in multiline comment) or at comment end + possible_command_end = line_end + comment_end = file.find(COMMENT_END, i, line_end) + # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}, comment_end={comment_end}, line={file[i:line_end]}") + if comment_end > 0: possible_command_end = comment_end + assert(possible_command_end >= i) + + ptrace(f"> Possible command end: {possible_command_end}, possible command: {file[i:possible_command_end]}") + # find commands + # pdebug(">>> Line ", file[i:possible_command_end]) + match = re.fullmatch(re_preprocessor_command, file[i:possible_command_end].strip(" ")) + if match: # command comment + remove_comment = True + command = match.groups()[0] + args = match.groups()[1].replace('\t', ' ').strip(' ') + ptrace(f"> Found command '{command}' with args '{args}'") + if command == "sidenav" and args == "include": # if args contains anything else this wont work + sidenav_include_pos = comment_begin # remove the comment + insert_str = "" + elif command not in command2function: + error(f"Invalid command in line {pos2line(file, i)}: {command}", level=error_levels["light"]) + insert_str = "" + else: + insert_str = command2function[command](args, variables) + file = file[:i] + insert_str + file[possible_command_end:] + # replaced string of length possible_command_end - i with one of length insert_str + index_correction = -(possible_command_end - i) + len(insert_str) + possible_command_end += index_correction + line_end += index_correction + comment_end += index_correction + ptrace(f"> After command, the line is now '{file[i:possible_command_end]}'") + # i += len(insert_str) + + # remove comment if done + if possible_command_end == comment_end: + remove_newline = 0 + if file[comment_begin-1] == '\n' and file[comment_end+len(COMMENT_END)] == '\n': # if the comment consumes the whole file, remove the entire line + remove_newline = 1 + + if remove_comment: + # remove the comment tags, basically uncomment the comment + # pdebug(f"Removing comment tags from pos {comment_begin} to {comment_end}") + file = file[:comment_begin] + file[comment_begin+len(COMMENT_BEGIN):comment_end] + file[comment_end+len(COMMENT_END)+remove_newline:] + possible_command_end -= len(COMMENT_BEGIN) + i -= len(COMMENT_BEGIN) + remove_comment = False + comment_begin = -1 + else: # multiline comment + i = line_end + 1 + ptrace(f"Multiline comment, jumping to next line. char[i]='{file[i]}'") + # i = possible_command_end commented, because if something containing new commands is inserted we need to parse that as well + if sidenav_include_pos >= 0: + file = file[:sidenav_include_pos] + Sidenav.generate() + file[sidenav_include_pos:] + return file + + +def replace_variables(html:str, variables:dict[str, str]): + """ + find usage of variables and replace them with their value + """ + matches = [] + for match in re.finditer(re_variable_use, html): + matches.append(match) + html_list = list(html) + for match in reversed(matches): + pdebug(f"Found variable usage {match.groups()[0]}, match from {match.start()} to {match.end()}") + value = "" + if match.groups()[0] in variables: value = variables[match.groups()[0]] + for _ in range(match.start(), match.end()): + html_list.pop(match.start()) + html_list.insert(match.start(), value.strip(" ")) + return ''.join(html_list) + +""" +************************************************************ COMMAND LINE ************************************************************ +""" +def missing_arg_val(arg): + print("Missing argument for", arg) + exit(1) + +def missing_arg(arg): + print("Missing ", arg) + exit(1) + +def help(): + helpstring = """Synopsis: + Inject into : + python3 html-inect.py --target --output [OPTIONS] + \nCommand line options: + --target path to the target file + --output output to this file instead of overwriting target + --inplace edit target file in place + --var = set the value of a variable. Can be used multiple times + --output-deps output a Makefile listing all dependencies + --help show this + --exit-on where errorlevel is 'light', 'serious' or 'critical' + """ + print(helpstring) + +if __name__ == "__main__": + variables:dict[str, str] = {} + # parse args + target_path = "" + output_path = "" + dep_output_path = "" + gen_sidenav = False + inplace = False + i = 1 + while i in range(1, len(argv)): + if argv[i] == "--target": + if len(argv) > i + 1: target_path = argv[i+1].strip(" ") + else: missing_arg_val(argv[i]) + i += 1 + elif argv[i] == "--output": + if len(argv) > i + 1: output_path = argv[i+1].strip(" ") + else: missing_arg_val(argv[i]) + i += 1 + elif argv[i] == "--output-deps": + if len(argv) > i + 1: dep_output_path = argv[i+1].strip(" ") + else: missing_arg_val(argv[i]) + i += 1 + elif argv[i] == "--exit-on": + if argv[i+1].strip(" ") in error_levels.keys(): + if len(argv) > i + 1: exit_on_error_level = error_levels[argv[i+1].strip(" ")] + else: missing_arg_val(argv[i]) + else: + error(f"Invalid argument for --exit-on: {argv[i+1]}. Valid are {error_levels.keys()}") + i += 1 + elif argv[i] == "--var": + if len(argv) > i + 1: + sep = argv[i+1].find('=') + if sep > 0 and sep < len(argv[i+1]): + variables[argv[i+1][:sep].strip(" ")] = argv[i+1][sep+1:].strip(" ") + else: missing_arg_val(argv[i]) + i += 1 + elif argv[i] == "--inplace": + inplace = True + elif argv[i] == "--help": + help() + exit(0) + else: + error(f"Invalid argument: {argv[i]}") + i += 1 + # sanity checks + if not target_path: missing_arg("--target") + if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)") + if inplace: output_path = target_path + if not output_path: + print("Missing output path, just printing to stdout. Use --output or --inplace to save the result.") + + # get html + with open(target_path, "r") as file: + target_html = file.read() + + + output_html = parse_file(target_html, variables) + + # pdebug(f"Output: {output_html}") + + # save + if output_path: + with open(output_path, "w") as file: + file.write(output_html) + else: + print(output_html) + + if dep_output_path: + if output_path != target_path: + glob_dependcies.append(target_path) + depfile = generate_dependecy_file(output_path, glob_dependcies) + pdebug(f"Writing dependency file to {os.path.abspath(dep_output_path)}: {depfile}") + with open(dep_output_path, "w") as file: + file.write(depfile) diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 0000000..233b0b4 --- /dev/null +++ b/nginx.conf @@ -0,0 +1,38 @@ +worker_processes 1; +error_log stderr; +daemon off; +pid nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + sendfile on; + keepalive_timeout 65; + + access_log /dev/null; + + server { + listen 8080; + server_name localhost; + + #charset koi8-r; + + #access_log logs/host.access.log main; + + location / { + root /home/user/www/; + index de/index.html; + } + #error_page 404 /404.html; + + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } + } +}