start move to HTMLParser class
This commit is contained in:
parent
ea8c4cc8c4
commit
26fc849ed5
@ -216,6 +216,54 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
|
|||||||
try:
|
try:
|
||||||
with open(args) as file:
|
with open(args) as file:
|
||||||
content = file.read()
|
content = file.read()
|
||||||
|
p = Parser(content)
|
||||||
|
p.pos["seg_beg"] = -1
|
||||||
|
p.pos["seg_end"] = -1
|
||||||
|
i = 0
|
||||||
|
while i < len(p): # at start of new line or end of comment
|
||||||
|
# simply search for the segment begin and end
|
||||||
|
ptrace(f"cmd_include: Processing at i={i} in line {pos2line(p.file, i)}")
|
||||||
|
|
||||||
|
# look for comment
|
||||||
|
if p.pos["cmt_beg"] < 0: # if not in comment, find next comment
|
||||||
|
p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
|
||||||
|
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
|
||||||
|
if p.pos["cmt_beg"] < 0:
|
||||||
|
i = p.pos["line_end"] + 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# jump to comment_begin
|
||||||
|
old_i = i
|
||||||
|
i = p.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
|
||||||
|
ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
|
||||||
|
|
||||||
|
# in comment, i at the character after COMMENT_BEGIN
|
||||||
|
p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
|
||||||
|
# sanity checks
|
||||||
|
if p.pos["cmt_end"] < 0:
|
||||||
|
error(f"Comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
|
||||||
|
else:
|
||||||
|
tmp_next_begin = p.file.find(COMMENT_BEGIN, i)
|
||||||
|
if 0 < tmp_next_begin and tmp_next_begin < p.pos["cmt_end"]:
|
||||||
|
error(f"Found next comment begin before the comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{p.file[i:p.pos['line_end']]}'", level=error_levels["light"])
|
||||||
|
p.pos["cmt_beg"] = -1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# either at newline (if in multiline comment) or at comment end
|
||||||
|
p.pos["cmd_beg"] = i
|
||||||
|
p.pos["cmd_end"] = min(p.pos["line_end"], p.pos["cmt_end"])
|
||||||
|
assert p.pos["cmd_end"] >= i, f"cmd_end={p.pos['cmd_end']}, i={i}, line_end={p.pos['line_end']}, cmt_end={p.pos['cmt_end']}"
|
||||||
|
ptrace(f"> Possible command end: {p.pos['cmd_end']}, possible command: '{p[i:p.pos['cmd_end']]}'")
|
||||||
|
|
||||||
|
# find commands
|
||||||
|
match = re.fullmatch(re_preprocessor_command, p[i:p.pos["cmd_end"]].strip(" "))
|
||||||
|
if match: # command comment
|
||||||
|
p.state["cmd_in_cmt"] = True
|
||||||
|
command = match.groups()[0]
|
||||||
|
args = match.groups()[1].replace('\t', ' ').strip(' ')
|
||||||
|
pdebug(f"> Found command '{command}' with args '{args}'")
|
||||||
|
# delete from previous block if
|
||||||
|
if command in ["elif", "else", "endif"]:
|
||||||
except:
|
except:
|
||||||
error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
|
error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
|
||||||
content = f"<!-- Could not include '{args}' -->"
|
content = f"<!-- Could not include '{args}' -->"
|
||||||
@ -343,9 +391,65 @@ class Parser():
|
|||||||
return len(self.file)
|
return len(self.file)
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLParser(Parser):
|
||||||
|
"""
|
||||||
|
Parse a html file
|
||||||
|
Each function operates the positon indicated by i until the position "line_end"
|
||||||
|
"""
|
||||||
|
def __init__(self, file, variables:dict[str, str]):
|
||||||
|
super().__init__(file)
|
||||||
|
self.i = 0
|
||||||
|
self.variables = variables
|
||||||
|
self.pos["cmt_beg"] = -1
|
||||||
|
self.pos["cmt_end"] = -1
|
||||||
|
self.pos["cmd_beg"] = -1
|
||||||
|
self.pos["cmdend"] = -1
|
||||||
|
self.pos["conditional_block_beg"] = -1 # char pos of the first char of the last block, if waiting for elif, else or endif
|
||||||
|
self.state["cmd_in_cmt"] = False
|
||||||
|
self.state["last_condition"] = False # if the last if condition was true
|
||||||
|
|
||||||
|
def next_line(self):
|
||||||
|
"""update i and line_end"""
|
||||||
|
self.pos["line_end"] = self.file.find('\n', i)
|
||||||
|
if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
|
||||||
|
|
||||||
|
def use_variables(self):
|
||||||
|
"""replace variable usages in the current line"""
|
||||||
|
self.replace(i, self.pos["line_end"], replace_variables(self[i:self.pos["line_end"]], variables))
|
||||||
|
ptrace("> Line after replacing variables:", self.file[i:self.pos["line_end"]])
|
||||||
|
|
||||||
|
def add_sidenav_headings(self):
|
||||||
|
"""check if heading for sidenav in line"""
|
||||||
|
match = re.search(re_sidenav_heading, self[i:self.pos["line_end"]])
|
||||||
|
if match:
|
||||||
|
Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
|
||||||
|
ptrace("> Found heading with id:", match.groups())
|
||||||
|
|
||||||
|
def find_comment_begin(self) -> bool:
|
||||||
|
"""
|
||||||
|
find the beginning of a comment in the current line
|
||||||
|
if comment begin was found, jump into the comment, return True
|
||||||
|
"""
|
||||||
|
# look for comment begin
|
||||||
|
if self.pos["cmt_beg"] < 0: # if not in comment, find next comment
|
||||||
|
self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"])
|
||||||
|
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
|
||||||
|
if self.pos["cmt_beg"] < 0:
|
||||||
|
self.i = self.pos["line_end"] + 1
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# jump to comment_begin
|
||||||
|
old_i = self.i
|
||||||
|
self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
|
||||||
|
ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}")
|
||||||
|
return True
|
||||||
|
return True # still in previous comment
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_file(_file:str, variables:dict[str,str]):
|
def parse_file(_file:str, variables:dict[str,str]):
|
||||||
p = Parser(_file)
|
p = HTMLParser(_file, variables)
|
||||||
sidenav_include_pos = -1
|
sidenav_include_pos = -1
|
||||||
p.pos["cmt_beg"] = -1
|
p.pos["cmt_beg"] = -1
|
||||||
p.pos["cmt_end"] = -1
|
p.pos["cmt_end"] = -1
|
||||||
@ -358,32 +462,15 @@ def parse_file(_file:str, variables:dict[str,str]):
|
|||||||
# if file.count(COMMENT_BEGIN) != file.count(COMMENT_END):
|
# if file.count(COMMENT_BEGIN) != file.count(COMMENT_END):
|
||||||
|
|
||||||
while i < len(p): # at start of new line or end of comment
|
while i < len(p): # at start of new line or end of comment
|
||||||
|
p.next_line()
|
||||||
ptrace(f"Processing at i={i} in line {pos2line(p.file, i)}")
|
ptrace(f"Processing at i={i} in line {pos2line(p.file, i)}")
|
||||||
|
|
||||||
# replace variable usages in the current line
|
p.use_variables()
|
||||||
p.pos["line_end"] = p.file.find('\n', i)
|
p.add_sidenav_headings()
|
||||||
if p.pos["line_end"] < 0: p.pos["line_end"] = len(p)
|
|
||||||
p.replace(i, p.pos["line_end"], replace_variables(p[i:p.pos["line_end"]], variables))
|
if not p.find_comment_begin(): continue
|
||||||
ptrace("> Line after replacing variables:", p.file[i:p.pos["line_end"]])
|
|
||||||
|
|
||||||
# check if heading for sidenav in line
|
|
||||||
match = re.search(re_sidenav_heading, p[i:p.pos["line_end"]])
|
|
||||||
if match:
|
|
||||||
Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
|
|
||||||
ptrace("> Found heading with id:", match.groups())
|
|
||||||
|
|
||||||
# look for comment
|
|
||||||
if p.pos["cmt_beg"] < 0: # if not in comment, find next comment
|
|
||||||
p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
|
|
||||||
# ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
|
|
||||||
if p.pos["cmt_beg"] < 0:
|
|
||||||
i = p.pos["line_end"] + 1
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# jump to comment_begin
|
|
||||||
old_i = i
|
|
||||||
i = p.pos["cmt_beg"] + len(COMMENT_BEGIN) # after comment begin
|
|
||||||
ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
|
|
||||||
|
|
||||||
# in comment, i at the character after COMMENT_BEGIN
|
# in comment, i at the character after COMMENT_BEGIN
|
||||||
p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
|
p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
|
||||||
@ -578,7 +665,7 @@ if __name__ == "__main__":
|
|||||||
error(f"Invalid argument: {argv[i]}")
|
error(f"Invalid argument: {argv[i]}")
|
||||||
i += 1
|
i += 1
|
||||||
# sanity checks
|
# sanity checks
|
||||||
if not target_path: missing_arg("--input")
|
if not target_path: missing_arg("--target")
|
||||||
if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)")
|
if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)")
|
||||||
if inplace: output_path = target_path
|
if inplace: output_path = target_path
|
||||||
if not output_path:
|
if not output_path:
|
||||||
|
Loading…
Reference in New Issue
Block a user