diff --git a/html-preprocessor b/html-preprocessor
index 46ad756..80c3611 100755
--- a/html-preprocessor
+++ b/html-preprocessor
@@ -216,6 +216,54 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
try:
with open(args) as file:
content = file.read()
+ p = Parser(content)
+ p.pos["seg_beg"] = -1
+ p.pos["seg_end"] = -1
+ i = 0
+ while i < len(p): # at start of new line or end of comment
+ # simply search for the segment begin and end
+ ptrace(f"cmd_include: Processing at i={i} in line {pos2line(p.file, i)}")
+
+ # look for comment
+ if p.pos["cmt_beg"] < 0: # if not in comment, find next comment
+ p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
+ # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
+ if p.pos["cmt_beg"] < 0:
+ i = p.pos["line_end"] + 1
+ continue
+ else:
+ # jump to comment_begin
+ old_i = i
+ i = p.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
+ ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
+
+ # in comment, i at the character after COMMENT_BEGIN
+ p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
+ # sanity checks
+ if p.pos["cmt_end"] < 0:
+ error(f"Comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
+ else:
+ tmp_next_begin = p.file.find(COMMENT_BEGIN, i)
+ if 0 < tmp_next_begin and tmp_next_begin < p.pos["cmt_end"]:
+ error(f"Found next comment begin before the comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{p.file[i:p.pos['line_end']]}'", level=error_levels["light"])
+ p.pos["cmt_beg"] = -1
+ continue
+
+ # either at newline (if in multiline comment) or at comment end
+ p.pos["cmd_beg"] = i
+ p.pos["cmd_end"] = min(p.pos["line_end"], p.pos["cmt_end"])
+ assert p.pos["cmd_end"] >= i, f"cmd_end={p.pos['cmd_end']}, i={i}, line_end={p.pos['line_end']}, cmt_end={p.pos['cmt_end']}"
+ ptrace(f"> Possible command end: {p.pos['cmd_end']}, possible command: '{p[i:p.pos['cmd_end']]}'")
+
+ # find commands
+ match = re.fullmatch(re_preprocessor_command, p[i:p.pos["cmd_end"]].strip(" "))
+ if match: # command comment
+ p.state["cmd_in_cmt"] = True
+ command = match.groups()[0]
+ args = match.groups()[1].replace('\t', ' ').strip(' ')
+ pdebug(f"> Found command '{command}' with args '{args}'")
+ # delete from previous block if
+ if command in ["elif", "else", "endif"]:
except:
error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
content = f""
@@ -343,9 +391,65 @@ class Parser():
return len(self.file)
+class HTMLParser(Parser):
+ """
+ Parse a html file
+ Each function operates the positon indicated by i until the position "line_end"
+ """
+ def __init__(self, file, variables:dict[str, str]):
+ super().__init__(file)
+ self.i = 0
+ self.variables = variables
+ self.pos["cmt_beg"] = -1
+ self.pos["cmt_end"] = -1
+ self.pos["cmd_beg"] = -1
+ self.pos["cmdend"] = -1
+ self.pos["conditional_block_beg"] = -1 # char pos of the first char of the last block, if waiting for elif, else or endif
+ self.state["cmd_in_cmt"] = False
+ self.state["last_condition"] = False # if the last if condition was true
+
+ def next_line(self):
+ """update i and line_end"""
+ self.pos["line_end"] = self.file.find('\n', i)
+ if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
+
+ def use_variables(self):
+ """replace variable usages in the current line"""
+ self.replace(i, self.pos["line_end"], replace_variables(self[i:self.pos["line_end"]], variables))
+ ptrace("> Line after replacing variables:", self.file[i:self.pos["line_end"]])
+
+ def add_sidenav_headings(self):
+ """check if heading for sidenav in line"""
+ match = re.search(re_sidenav_heading, self[i:self.pos["line_end"]])
+ if match:
+ Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
+ ptrace("> Found heading with id:", match.groups())
+
+ def find_comment_begin(self) -> bool:
+ """
+ find the beginning of a comment in the current line
+ if comment begin was found, jump into the comment, return True
+ """
+ # look for comment begin
+ if self.pos["cmt_beg"] < 0: # if not in comment, find next comment
+ self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"])
+ # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
+ if self.pos["cmt_beg"] < 0:
+ self.i = self.pos["line_end"] + 1
+ return False
+ else:
+ # jump to comment_begin
+ old_i = self.i
+ self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
+ ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}")
+ return True
+ return True # still in previous comment
+
+
+
def parse_file(_file:str, variables:dict[str,str]):
- p = Parser(_file)
+ p = HTMLParser(_file, variables)
sidenav_include_pos = -1
p.pos["cmt_beg"] = -1
p.pos["cmt_end"] = -1
@@ -358,32 +462,15 @@ def parse_file(_file:str, variables:dict[str,str]):
# if file.count(COMMENT_BEGIN) != file.count(COMMENT_END):
while i < len(p): # at start of new line or end of comment
+ p.next_line()
ptrace(f"Processing at i={i} in line {pos2line(p.file, i)}")
- # replace variable usages in the current line
- p.pos["line_end"] = p.file.find('\n', i)
- if p.pos["line_end"] < 0: p.pos["line_end"] = len(p)
- p.replace(i, p.pos["line_end"], replace_variables(p[i:p.pos["line_end"]], variables))
- ptrace("> Line after replacing variables:", p.file[i:p.pos["line_end"]])
+ p.use_variables()
+ p.add_sidenav_headings()
+
+ if not p.find_comment_begin(): continue
- # check if heading for sidenav in line
- match = re.search(re_sidenav_heading, p[i:p.pos["line_end"]])
- if match:
- Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
- ptrace("> Found heading with id:", match.groups())
- # look for comment
- if p.pos["cmt_beg"] < 0: # if not in comment, find next comment
- p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
- # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
- if p.pos["cmt_beg"] < 0:
- i = p.pos["line_end"] + 1
- continue
- else:
- # jump to comment_begin
- old_i = i
- i = p.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
- ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
# in comment, i at the character after COMMENT_BEGIN
p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
@@ -578,7 +665,7 @@ if __name__ == "__main__":
error(f"Invalid argument: {argv[i]}")
i += 1
# sanity checks
- if not target_path: missing_arg("--input")
+ if not target_path: missing_arg("--target")
if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)")
if inplace: output_path = target_path
if not output_path: