#!/bin/python3 import os from os import path import re from sys import argv from collections.abc import Callable import argparse """ TODO: - more testing - reintroduce the nav_selected class on nav feature """ """ ************************************************************ SETTINGS ************************************************************ """ sidenav_format = """\

""" sidenav_content_link = "

" sidenav_content_section = "

#name

" exit_on_include_failure = False """ ************************************************************ REGULAR EXPRESSIONS ************************************************************ """ # SIDENAV # heading with id re_sidenav_heading = r"(.+)" # custom entry re_sidenav_custom = r"href=(?:\"|\')([^\"\' ]+)(?:\"|\') +name=(?:\"|\')(.+)(?:\"|\')" # commas re_set_map = r"([a-zA-Z0-9_]+) *\? *\{( *(?:[a-zA-Z0-9_*]+ *: *[^,]*, *)+[a-zA-Z0-9_*]+ *: *[^,]*) *,? *\}" # semicolons re_set_map_alt = r"([a-zA-Z0-9_]+) *\? *\{( *(?:[a-zA-Z0-9_*]+ *: *[^;]* *; *)+[a-zA-Z0-9_*]+ *: *[^;]*) *;? *\}" """ #$(myvar) """ re_variable_use = r"#\$$([a-zA-Z0-9_]+)$" """ only in comments """ re_preprocessor_command = r"#([a-zA-Z]+) *(.*) *" COMMENT_BEGIN = "" """ ************************************************************ GLOBALS ************************************************************ """ glob_dependcies: list[str] = [] exit_codes = { "FileNotFound": 2, "MarkdownConversionError": 3, } error_levels = { "light": 0, "serious": 1, "critical": 2, } exit_on_error_level = error_levels["serious"] """ ************************************************************ UTILITY ************************************************************ """ DEBUG = False def pdebug(*args, **keys): if DEBUG: print(*args, **keys) TRACE = False def ptrace(*args, **keys): if TRACE: print(*args, **keys) def error(*args, level:int=exit_on_error_level, exit_code:int=1, **keys): if level >= exit_on_error_level: print(f"ERROR:", *args, **keys) exit(exit_code) else: print(f"WARNING:", *args, **keys) def line_is_link_to_path(line, path): # check if the line is a link to html thats currently being processed match = re.search(r"(.+)", line) if match: # get filename match = re.match(r"[a-zA-Z0-9_\-]+\.html", match.groups()[1]) if match and match.group() in path: return True return False def pos2line(s: str, pos:int): return s[:pos].count('\n') + 1 def generate_dependecy_file(filename:str, deps:list[str]): line1 = f"{filename}:" s = "" for dep in deps: line1 += f" {dep}" s += f"{dep}:\n" return line1 #+ "\n" + s def evaluate_condition(input_string) -> bool: words = re.split(r"(==|!=|&&|\|\|)", input_string.replace(" ", "")) for i in range(len(words)): if words[i] not in ["==", "!=", "&&", "||"]: words[i] = '"' + words[i].replace('"', r'\"') + '"' condition = "".join(words).replace("&&", " and ").replace("||", " or ") ptrace(f"> Evaluating condition {condition}") try: return eval(condition) except SyntaxError: error(f"Pythonized condition is invalid: {condition}", level=error_levels["light"]) return False """ ************************************************************ SIDENAV ************************************************************ """ class Sidenav: LINK = 0 SECTION = 1 # 0: link, 1: section entries: list[tuple[int, str, str]] = [] skip_next = False custom_name = None @staticmethod def addEntry(name: str, link: str): if Sidenav.skip_next: Sidenav.skip_next = None return if Sidenav.custom_name: name = Sidenav.custom_name Sidenav.custom_name = None Sidenav.entries.append((Sidenav.LINK, name, link)) @staticmethod def addSection(name): Sidenav.entries.append((Sidenav.SECTION, name, "")) @staticmethod def setCustomName(name: str): Sidenav.custom_name = name @staticmethod def skipNext(): Sidenav.skip_next = True @staticmethod def generate() -> str: pdebug(f"Sidenav.generate(): found the following entries: {Sidenav.entries}") sidenav:list[str] = sidenav_format.split('\n') content_i = -1 for i in range(len(sidenav)): # find in which line the entries need to be placed if "#sidenav-content" in sidenav[i]: content_i = i break if content_i >= 0: sidenav.pop(content_i) added_links = [] for i in reversed(range(len(Sidenav.entries))): entry = Sidenav.entries[i] if entry[0] == Sidenav.LINK: if entry[2] in added_links: continue # no duplicates added_links.append(entry[2]) sidenav.insert(content_i, sidenav_content_link.replace("#name", entry[1]).replace("#link", entry[2])) else: sidenav.insert(content_i, sidenav_content_section.replace("#name", entry[1])) sidenav_s = "" for line in sidenav: sidenav_s += line + "\n" # cant use "".join because of newlines return sidenav_s @staticmethod def cmd_sidenav(args:str, variables:dict[str,str]) -> str: space = args.find(" ") if space < 0: space = len(args) cmd = args[:space] cmd_args = "" if 0 < space and space < len(args) - 1: cmd_args = args[space+1:].strip(" ") if cmd == "skip": Sidenav.skipNext() elif cmd == "section": Sidenav.addSection(cmd_args) elif cmd == "name": Sidenav.setCustomName(cmd_args) elif cmd == "custom": match = re.fullmatch(re_sidenav_custom, cmd_args) if match: Sidenav.addEntry(match.groups()[1], match.groups()[0]) else: error(f"cmd_sidenav: Invalid argument for command 'custom': '{cmd_args}'", level=error_levels["light"]) elif cmd == "include": return Sidenav.generate() else: error(f"cmd_sidenav: Invalid command: '{cmd}'", level=error_levels["light"]) return "" """ ************************************************************ COMMANDS ************************************************************ All these commands take one arg with trimmed whitespaces. The arg may be anything They all need to return a string, which will be placed into the source file at the place where the command was. """ def cmd_include(args: str, variables:dict[str, str]={}) -> str: args = args.split(' ') pdebug(f"cmd_include: args='{args}', variables='{variables}'") filename = args[0] content = "" try: with open(filename) as file: content = file.read() if len(args) > 1: # if section was specified target_section = args[1] p = HTMLParser(content, {}) p.pos["start"] = p.pos["end"] = -1 while p.i < len(p): # at start of new line or end of comment p.next_line() ptrace(f"cmd_include: Processing at i={p.i} in line {pos2line(p.file, p.i)}") if not p.find_comment_begin(): continue if not p.find_comment_end(): continue match = p.find_command() if match: command = match.groups()[0] cmd_args = match.groups()[1].replace('\t', ' ').strip(' ') pdebug(f"cmd_include Found command '{command}' with args '{cmd_args}'") if command == "section": if cmd_args.startswith(target_section): p.pos["start"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1) elif p.pos["start"] >= 0: #end p.pos["end"] = max(p.pos["cmt_end"] + len(COMMENT_END), p.pos["line_end"] + 1) # p.pos["end"] = p.pos["cmt_beg"] p.replace_command_with_output("") p.command_end() if p.pos["start"] >= 0 and p.pos["end"] > 0: break if p.pos["start"] >= 0: if p.pos["end"] < 0: p.pos["end"] = len(p) content = p[p.pos["start"]:p.pos["end"]] else: error(f"cmd_include: Could not find section {target_section} in file {filename}") except FileNotFoundError: error(f"cmd_include: Could not open file '{filename}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"]) content = f"" if filename.endswith(".md"): try: from markdown import markdown content = markdown(content, output_format="xhtml") except: error(f"cmd_include: Could convert markdown to html for file '{filename}'. Is python-markdown installed?", level=error_levels["critical"], exit_code=exit_codes["MarkdownConversionError"]) content = f"" glob_dependcies.append(filename) return content def cmd_section(args: str, variables:dict[str, str]={}) -> str: return "" def cmd_return(args: str, variables:dict[str, str]={}) -> str: # re_set_map = r"([a-zA-Z0-9_]+)\?\{(([a-zA-Z0-9_]+:.+,)*([a-zA-Z0-9_]+:.+))\}" # space = args.find(' ') # pdebug(f"cmd_set: varname='{args[:space]}, 'arg='{args[space+1:]}', variables='{variables}'") if not (space > 0 and space < len(args)-1): variables[args] = "" pdebug(f"cmd_set: Setting to empty string: {args}") else: varname = args[:space] variables[varname] = "" # check if map assignment with either , or ; separator = ',' match = re.fullmatch(re_set_map, args[space+1:].strip(' ')) if not match: match = re.fullmatch(re_set_map_alt, args[space+1:].strip(' ')) separator = ';' if match: pdebug(f"cmd_set: Map {match.group()}") depends = match.groups()[0] if not depends in variables: pdebug(f"cmd_set: Setting from map, but depends='{depends}' is not in variables") return "" depends_val = variables[depends] for option in match.groups()[1].split(separator): option = option.strip(" ") pdebug(f"cmd_set: Found option {option}") colon = option.find(':') # we will find one, regex guarantees if option[:colon].strip(" ") == depends_val or option[:colon].strip(" ") == "*": variables[varname] = option[colon+1:].strip(" ") else: # simple asignment value = args[space+1:].strip(" ") variables[varname] = value pdebug(f"cmd_set: Assignment {varname} -> {value}") return variables[varname] return "" def cmd_set(args: str, variables:dict[str, str]={}) -> str: cmd_return(args, variables) return "" def cmd_unset(args: str, variables:dict[str, str]={}) -> str: variable = args.strip(' ') if variable not in variables: pdebug(f"unset: variable '{variable}' is not set", level=error_levels["light"]) else: variables.pop(variable) return "" def cmd_default(args: str, variables:dict[str, str]={}) -> str: separator = args.find(' ') if args[:separator] not in variables: cmd_return(args, variables) return "" def cmd_comment(args: str, variables:dict[str, str]={}) -> str: return f"" def cmd_uncomment(args: str, variables:dict[str, str]={}) -> str: return args def cmd_error(args: str, variables:dict[str, str]={}) -> str: error(f"Encounted 'error' command: {args}", level=error_levels["critical"]) return "" def cmd_warning(args: str, variables:dict[str, str]={}) -> str: error(f"Encounted 'warning' command: {args}", level=error_levels["light"]) return "" command2function:dict[str, Callable[[str, dict[str,str]], str]] = { "include": cmd_include, "section": cmd_section, "return": cmd_return, "set": cmd_set, "unset": cmd_unset, "default": cmd_default, "comment": cmd_comment, "uncomment": cmd_uncomment, "sidenav": Sidenav.cmd_sidenav, "warning": cmd_warning, "error": cmd_error, } """ ************************************************************ PARSING ************************************************************ """ class Parser(): def __init__(self, file): self.file = file self.pos: dict[str, int] = {} self.state: dict[str, bool] = {} def remove(self, start, stop, ignore_bounds=[]): """remove range [start, stop) of text and update positions""" delete_length = stop - start nl, esl = "\n", "\\n" ptrace(f"- Deleting range [{start}, {stop}) of length {delete_length}: '{self.file[start:stop].replace(nl, esl)}'") assert(stop >= start) assert(stop <= len(self.file)) self.file = self.file[:start] + self.file[stop:] for k,pos in self.pos.items(): if pos >= stop: self.pos[k] -= delete_length elif pos > start and not k in ignore_bounds: error(f"Parser.remove: Position {k}={pos} within deleted range [{start},{stop})", level=1) def replace(self, start, stop, replacement): assert(stop >= start) assert(stop <= len(self.file)) ptrace(f"- Replacing range [{start}, {stop}): '{self.file[start:stop]}' with '{replacement}'") self.file = self.file[:start] + replacement + self.file[stop:] length_difference = stop - start - len(replacement) for k,pos in self.pos.items(): if pos >= stop: self.pos[k] -= length_difference elif pos > start: error(f"Parser.replace: Position {k}={pos} within replaced range [{start},{stop})", level=1) def __getitem__(self, key): return self.file[key] def __len__(self): return len(self.file) class HTMLParser(Parser): """ Parse a html file Each function operates the positon indicated by i until the position "line_end" """ def __init__(self, file, variables:dict[str, str], remove_comments=False): super().__init__(file) self.i = 0 self.variables = variables self.pos["cmt_beg"] = -1 self.pos["cmt_end"] = -1 self.pos["cmd_beg"] = -1 self.pos["cmd_end"] = -1 self.pos["line_end"] = -1 self.pos["conditional_block_beg"] = -1 # char pos of the first char of the last block, if waiting for elif, else or endif self.state["cmd_in_cmt"] = False self.state["last_condition"] = False # if the last if condition was true self.remove_comments = remove_comments def next_line(self): """update i and line_end""" self.pos["line_end"] = self.file.find('\n', self.i+1) if self.pos["line_end"] < 0: self.pos["line_end"] = len(self) def use_variables(self): """replace variable usages in the current line""" self.replace(self.i, self.pos["line_end"], substitute_variables(self[self.i:self.pos["line_end"]], self.variables)) ptrace("> Line after variable substitution:", self.file[self.i:self.pos["line_end"]]) def add_sidenav_headings(self): """check if heading for sidenav in line""" match = re.search(re_sidenav_heading, self[self.i:self.pos["line_end"]]) if match: Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}") ptrace("> Found heading with id:", match.groups()) def find_comment_begin(self) -> bool: """ find the beginning of a comment in the current line if comment begin was found, jump into the comment, return True """ # look for comment begin if self.pos["cmt_beg"] < 0: # if not in comment, find next comment self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"]) # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}") if self.pos["cmt_beg"] < 0: self.i = self.pos["line_end"] + 1 return False else: # jump to comment_begin old_i = self.i self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}") return True return True # still in previous comment def find_comment_end(self): """ call afterfind_comment_begin returns true to update the cmt_end call continue when returning false """ # in comment, i at the character after COMMENT_BEGIN self.pos["cmt_end"] = self.file.find(COMMENT_END, self.i) #, self.pos["line_end"]) # sanity checks if self.pos["cmt_end"] < 0: error(f"Comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is never ended.", level=error_levels["serious"]) return False else: tmp_next_begin = self.file.find(COMMENT_BEGIN, self.i) if 0 < tmp_next_begin and tmp_next_begin < self.pos["cmt_end"]: error(f"Found next comment begin before the comment starting in line {pos2line(self.file, self.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{self.file[self.i:self.pos['line_end']]}'", level=error_levels["light"]) self.pos["cmt_beg"] = -1 return False return True def find_command(self): # either at newline (if in multiline comment) or at comment end self.pos["cmd_beg"] = self.i self.pos["cmd_end"] = min(self.pos["line_end"], self.pos["cmt_end"]) assert self.pos["cmd_end"] >= self.i, f"cmd_end={self.pos['cmd_end']}, i={self.i}, line_end={self.pos['line_end']}, cmt_end={self.pos['cmt_end']}" ptrace(f"> Possible command end: {self.pos['cmd_end']}, possible command: '{self[self.i:self.pos['cmd_end']]}'") # find commands match = re.fullmatch(re_preprocessor_command, self[self.i:self.pos["cmd_end"]].strip(" ")) if match: self.state["cmd_in_cmt"] = True return match def replace_command_with_output(self, command_output): self.replace(self.i, self.pos["cmd_end"], command_output) ptrace(f"> After insertion, the line is now '{self.file[self.i:self.pos['line_end']]}'") def command_end(self): if self.pos["cmd_end"] == self.pos["cmt_end"]: # reached end of comment if self.state["cmd_in_cmt"] or self.remove_comments: remove_newline = 0 if self[self.pos["cmt_beg"]-1] == '\n' and self[self.pos["cmt_end"]+len(COMMENT_END)] == '\n': # if the comment consumes the whole line, remove the entire line remove_newline = 1 if self.state["cmd_in_cmt"]: # remove comment tags if a command was found ptrace(f"Deleting opening comment tags") self.remove(self.pos["cmt_beg"], self.pos["cmt_beg"] + len(COMMENT_BEGIN)) self.remove(self.pos["cmt_end"], self.pos["cmt_end"] + len(COMMENT_END) + remove_newline, ignore_bounds=["cmt_end", "cmd_end", "line_end"]) # process the line again, because a command might have inserted new comments self.i -= len(COMMENT_BEGIN) elif self.remove_comments: # remove entire comment self.remove(self.pos["cmt_beg"], self.pos["cmt_end"] + len(COMMENT_END) + remove_newline, ignore_bounds=["cmt_end", "cmd_beg", "cmd_end", "line_end"]) self.i = self.pos["cmt_beg"] self.state["cmd_in_cmt"] = False self.pos["cmt_beg"] = -1 self.pos["cmd_beg"] = -1 self.pos["cmt_end"] = -1 self.pos["cmd_end"] = -1 else: # multiline comment self.pos["cmt_end"] = -1 self.pos["cmd_end"] = -1 self.i = self.pos["line_end"] + 1 ptrace(f"> Multiline comment, jumping to next line.") # i = possible_command_end commented, because if something containing new commands is inserted we need to parse that as well def parse_file(_file:str, variables:dict[str,str], remove_comments): p = HTMLParser(_file, variables, remove_comments=remove_comments) sidenav_include_pos = -1 while p.i < len(p): # at start of new line or end of comment p.next_line() ptrace(f"Processing at i={p.i} in line {pos2line(p.file, p.i)}") p.use_variables() p.add_sidenav_headings() if not p.find_comment_begin(): continue if not p.find_comment_end(): continue match = p.find_command() if match: command = match.groups()[0] args = match.groups()[1].replace('\t', ' ').strip(' ') pdebug(f"> Found command '{command}' with args '{args}'") # delete from previous block if if command in ["elif", "else", "endif"]: if p.pos["conditional_block_beg"] < 0: error(f"Misplaced '{command}' in line {pos2line(p.file, p.i)}") if p.state["last_condition"]: # delete block from here at next endif p.state["last_condition"] = False else: # delete block from last condition statement ptrace(f"> Deleting block from last condition") p.remove(p.pos["conditional_block_beg"], p.pos["cmt_beg"]) p.i = p.pos["cmd_beg"] p.pos["conditional_block_beg"] = p.i if command == "endif": p.pos["conditional_block_beg"] = -1 p.state["last_condition"] = False p.state["any_condition"] = False # evaluate ifs if command == "if": p.pos["conditional_block_beg"] = p.i p.state["last_condition"] = evaluate_condition(args) p.state["any_condition"] = p.state["last_condition"] pdebug(f"> Command {command} condition evaluated to {p.state['last_condition']}") cmd_output = "" elif command =="elif": p.pos["conditional_block_beg"] = p.i p.state["last_condition"] = evaluate_condition(args) if not p.state["any_condition"] else False if p.state["last_condition"]: p.state["any_condition"] = True pdebug(f"> Command {command} condition evaluated to {p.state['last_condition']}") cmd_output = "" elif command == "else": p.pos["conditional_block_beg"] = p.i p.state["last_condition"] = True if not p.state["any_condition"] else False cmd_output = "" elif p.pos["conditional_block_beg"] < 0 or p.state["last_condition"]: if command == "sidenav" and args == "include": # if args contains anything else this wont work sidenav_include_pos = p.pos["cmt_beg"] # remove the comment cmd_output = "" elif command == "endif": cmd_output = "" elif command not in command2function: error(f"Invalid command in line {pos2line(p.file, p.i)}: {command}", level=error_levels["light"]) cmd_output = "" else: cmd_output = command2function[command](args, variables) else: cmd_output = "" p.replace_command_with_output(cmd_output) p.command_end() if sidenav_include_pos >= 0: return p.file[:sidenav_include_pos] + Sidenav.generate() + p.file[sidenav_include_pos:] else: return p.file def substitute_variables(html:str, variables:dict[str, str]): """ find usage of variables and replace them with their value """ matches = [] for match in re.finditer(re_variable_use, html): matches.append(match) html_list = list(html) for match in reversed(matches): pdebug(f"> Found variable usage {match.groups()[0]}, match from {match.start()} to {match.end()}") value = "" if match.groups()[0] in variables: value = variables[match.groups()[0]] else: pdebug(f"Variable {match.groups()[0]} is used but not defined") for _ in range(match.start(), match.end()): html_list.pop(match.start()) html_list.insert(match.start(), value.strip(" ")) return ''.join(html_list) """ ************************************************************ COMMAND LINE ************************************************************ """ if __name__ == "__main__": parser = argparse.ArgumentParser(prog="bUwUma html preprocessor") parser.add_argument("--input", action="store", help="path to the input file", required=True) parser.add_argument("--output", action="store", help="output to this file", default="") parser.add_argument("--inplace", action="store_true", help="overwrite input file") parser.add_argument("--var", action="append", help="set a variable --var varname=value", default=[]) parser.add_argument("--output-deps", action="store", help="output a Makefile listing all dependencies", default="") parser.add_argument("--exit-on", action="store", help="exit when an error of the given severity occures", choices=["light", "serious", "critical"], default="serious") parser.add_argument("--debug", action="store_true", help="be more verbose", default=False) parser.add_argument("--trace", action="store_true", help="be extremly verbose", default=False) parser.add_argument("--preserve-comments", action="store_true", help="do not remove normal html comments", default=False) variables:dict[str, str] = {} args = parser.parse_args() for var in args.var: sep = var.find('=') if sep > 0 and sep < len(var) - 1: variables[var[:sep].strip(" ")] = var[sep+1:].strip(" ") else: parser.error(f"Invalid argument: --var '{var}'\n\tUsage: --var =") args.input = args.input.strip(" ") args.output = args.output.strip(" ") args.output_deps = args.output_deps.strip(" ") TRACE = args.trace if args.trace: args.debug = True DEBUG = args.debug # sanity checks if not path.isfile(args.input): parser.error(f"Invalid input file:: {args.input}") if args.output: if not path.isdir(path.dirname(args.output)): parser.error(f"Invalid path to output file - directory does not exist: '{path.dirname(args.output)}'") elif args.inplace: args.output = args.input if args.inplace and args.output: parser.error(f"Only one of --output or --inplace mut be given") if args.output_deps: if not path.isdir(path.dirname(args.output_deps)): parser.error(f"Invalid path to dependency file - directory does not exist: '{path.dirname(args.output_deps)}'") if not args.output: parser.error(f"--output-deps requires either --output our --inplace") # get html with open(args.input, "r") as file: target_html = file.read() output_html = parse_file(target_html, variables, not args.preserve_comments) # pdebug(f"Output: {output_html}") # save if args.output: with open(args.output, "w") as file: file.write(output_html) else: print(output_html) if args.output_deps: if args.output != args.input: glob_dependcies.append(args.input) depfile = generate_dependecy_file(args.output, glob_dependcies) pdebug(f"Writing dependency file to {os.path.abspath(args.output_deps)}: {depfile}") with open(args.output_deps, "w") as file: file.write(depfile)