start move to HTMLParser class

This commit is contained in:
matthias@arch 2023-11-17 15:36:15 +01:00
parent ea8c4cc8c4
commit 26fc849ed5

View File

@ -216,6 +216,54 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
try: try:
with open(args) as file: with open(args) as file:
content = file.read() content = file.read()
p = Parser(content)
p.pos["seg_beg"] = -1
p.pos["seg_end"] = -1
i = 0
while i < len(p): # at start of new line or end of comment
# simply search for the segment begin and end
ptrace(f"cmd_include: Processing at i={i} in line {pos2line(p.file, i)}")
# look for comment
if p.pos["cmt_beg"] < 0: # if not in comment, find next comment
p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
if p.pos["cmt_beg"] < 0:
i = p.pos["line_end"] + 1
continue
else:
# jump to comment_begin
old_i = i
i = p.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
# in comment, i at the character after COMMENT_BEGIN
p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
# sanity checks
if p.pos["cmt_end"] < 0:
error(f"Comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
else:
tmp_next_begin = p.file.find(COMMENT_BEGIN, i)
if 0 < tmp_next_begin and tmp_next_begin < p.pos["cmt_end"]:
error(f"Found next comment begin before the comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{p.file[i:p.pos['line_end']]}'", level=error_levels["light"])
p.pos["cmt_beg"] = -1
continue
# either at newline (if in multiline comment) or at comment end
p.pos["cmd_beg"] = i
p.pos["cmd_end"] = min(p.pos["line_end"], p.pos["cmt_end"])
assert p.pos["cmd_end"] >= i, f"cmd_end={p.pos['cmd_end']}, i={i}, line_end={p.pos['line_end']}, cmt_end={p.pos['cmt_end']}"
ptrace(f"> Possible command end: {p.pos['cmd_end']}, possible command: '{p[i:p.pos['cmd_end']]}'")
# find commands
match = re.fullmatch(re_preprocessor_command, p[i:p.pos["cmd_end"]].strip(" "))
if match: # command comment
p.state["cmd_in_cmt"] = True
command = match.groups()[0]
args = match.groups()[1].replace('\t', ' ').strip(' ')
pdebug(f"> Found command '{command}' with args '{args}'")
# delete from previous block if
if command in ["elif", "else", "endif"]:
except: except:
error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"]) error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
content = f"<!-- Could not include '{args}' -->" content = f"<!-- Could not include '{args}' -->"
@ -343,9 +391,65 @@ class Parser():
return len(self.file) return len(self.file)
class HTMLParser(Parser):
"""
Parse a html file
Each function operates the positon indicated by i until the position "line_end"
"""
def __init__(self, file, variables:dict[str, str]):
super().__init__(file)
self.i = 0
self.variables = variables
self.pos["cmt_beg"] = -1
self.pos["cmt_end"] = -1
self.pos["cmd_beg"] = -1
self.pos["cmdend"] = -1
self.pos["conditional_block_beg"] = -1 # char pos of the first char of the last block, if waiting for elif, else or endif
self.state["cmd_in_cmt"] = False
self.state["last_condition"] = False # if the last if condition was true
def next_line(self):
"""update i and line_end"""
self.pos["line_end"] = self.file.find('\n', i)
if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
def use_variables(self):
"""replace variable usages in the current line"""
self.replace(i, self.pos["line_end"], replace_variables(self[i:self.pos["line_end"]], variables))
ptrace("> Line after replacing variables:", self.file[i:self.pos["line_end"]])
def add_sidenav_headings(self):
"""check if heading for sidenav in line"""
match = re.search(re_sidenav_heading, self[i:self.pos["line_end"]])
if match:
Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
ptrace("> Found heading with id:", match.groups())
def find_comment_begin(self) -> bool:
"""
find the beginning of a comment in the current line
if comment begin was found, jump into the comment, return True
"""
# look for comment begin
if self.pos["cmt_beg"] < 0: # if not in comment, find next comment
self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"])
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
if self.pos["cmt_beg"] < 0:
self.i = self.pos["line_end"] + 1
return False
else:
# jump to comment_begin
old_i = self.i
self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}")
return True
return True # still in previous comment
def parse_file(_file:str, variables:dict[str,str]): def parse_file(_file:str, variables:dict[str,str]):
p = Parser(_file) p = HTMLParser(_file, variables)
sidenav_include_pos = -1 sidenav_include_pos = -1
p.pos["cmt_beg"] = -1 p.pos["cmt_beg"] = -1
p.pos["cmt_end"] = -1 p.pos["cmt_end"] = -1
@ -358,32 +462,15 @@ def parse_file(_file:str, variables:dict[str,str]):
# if file.count(COMMENT_BEGIN) != file.count(COMMENT_END): # if file.count(COMMENT_BEGIN) != file.count(COMMENT_END):
while i < len(p): # at start of new line or end of comment while i < len(p): # at start of new line or end of comment
p.next_line()
ptrace(f"Processing at i={i} in line {pos2line(p.file, i)}") ptrace(f"Processing at i={i} in line {pos2line(p.file, i)}")
# replace variable usages in the current line p.use_variables()
p.pos["line_end"] = p.file.find('\n', i) p.add_sidenav_headings()
if p.pos["line_end"] < 0: p.pos["line_end"] = len(p)
p.replace(i, p.pos["line_end"], replace_variables(p[i:p.pos["line_end"]], variables)) if not p.find_comment_begin(): continue
ptrace("> Line after replacing variables:", p.file[i:p.pos["line_end"]])
# check if heading for sidenav in line
match = re.search(re_sidenav_heading, p[i:p.pos["line_end"]])
if match:
Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
ptrace("> Found heading with id:", match.groups())
# look for comment
if p.pos["cmt_beg"] < 0: # if not in comment, find next comment
p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
if p.pos["cmt_beg"] < 0:
i = p.pos["line_end"] + 1
continue
else:
# jump to comment_begin
old_i = i
i = p.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
# in comment, i at the character after COMMENT_BEGIN # in comment, i at the character after COMMENT_BEGIN
p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"]) p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
@ -578,7 +665,7 @@ if __name__ == "__main__":
error(f"Invalid argument: {argv[i]}") error(f"Invalid argument: {argv[i]}")
i += 1 i += 1
# sanity checks # sanity checks
if not target_path: missing_arg("--input") if not target_path: missing_arg("--target")
if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)") if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)")
if inplace: output_path = target_path if inplace: output_path = target_path
if not output_path: if not output_path: