start move to HTMLParser class

2023-11-17 15:36:15 +01:00 · 2023-11-17 15:36:15 +01:00 · 26fc849ed5
commit 26fc849ed5
parent ea8c4cc8c4
1 changed files with 111 additions and 24 deletions
--- a/135
+++ b/135
@ -216,6 +216,54 @@ def cmd_include(args: str, variables:dict[str, str]={}) -> str:
    try:
        with open(args) as file:
            content = file.read()
            p = Parser(content)
            p.pos["seg_beg"] = -1
            p.pos["seg_end"] = -1
            i = 0
            while i < len(p):  # at start of new line or end of comment
                # simply search for the segment begin and end
                ptrace(f"cmd_include: Processing at i={i} in line {pos2line(p.file, i)}")
                # look for comment
                if p.pos["cmt_beg"] < 0:  # if not in comment, find next comment
                    p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
                    # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
                    if p.pos["cmt_beg"] < 0:
                        i = p.pos["line_end"] + 1
                        continue
                    else:
                        # jump to comment_begin
                        old_i = i
                        i = p.pos["cmt_beg"] + len(COMMENT_BEGIN)  # after comment begin
                        ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
                # in comment, i at the character after COMMENT_BEGIN
                p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
                # sanity checks
                if p.pos["cmt_end"] < 0:
                    error(f"Comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is never ended.", level=error_levels["serious"])
                else:
                    tmp_next_begin = p.file.find(COMMENT_BEGIN, i)
                    if 0 < tmp_next_begin and  tmp_next_begin < p.pos["cmt_end"]:
                        error(f"Found next comment begin before the comment starting in line {pos2line(p.file, p.pos['cmt_beg'])} is ended! Skipping comment. Comment without proper closing tags: '{p.file[i:p.pos['line_end']]}'", level=error_levels["light"])
                        p.pos["cmt_beg"] = -1
                        continue
                # either at newline (if in multiline comment) or at comment end
                p.pos["cmd_beg"] = i
                p.pos["cmd_end"] = min(p.pos["line_end"], p.pos["cmt_end"])
                assert p.pos["cmd_end"] >= i, f"cmd_end={p.pos['cmd_end']}, i={i}, line_end={p.pos['line_end']}, cmt_end={p.pos['cmt_end']}"
                ptrace(f"> Possible command end: {p.pos['cmd_end']}, possible command: '{p[i:p.pos['cmd_end']]}'")
                # find commands
                match = re.fullmatch(re_preprocessor_command, p[i:p.pos["cmd_end"]].strip(" "))
                if match:  # command comment
                    p.state["cmd_in_cmt"] = True
                    command = match.groups()[0]
                    args = match.groups()[1].replace('\t', ' ').strip(' ')
                    pdebug(f"> Found command '{command}' with args '{args}'")
                    # delete from previous block if
                    if command in ["elif", "else", "endif"]:
    except:
        error(f"cmd_include: Could not open file '{args}'", level=error_levels["serious"], exit_code=exit_codes["FileNotFound"])
        content = f"<!-- Could not include '{args}' -->"
@ -343,9 +391,65 @@ class Parser():
        return len(self.file)
 class HTMLParser(Parser):
    """
    Parse a html file
    Each function operates the positon indicated by i until the position "line_end"
    """
    def __init__(self, file, variables:dict[str, str]):
        super().__init__(file)
        self.i = 0
        self.variables = variables
        self.pos["cmt_beg"] = -1
        self.pos["cmt_end"] = -1
        self.pos["cmd_beg"] = -1
        self.pos["cmdend"] = -1
        self.pos["conditional_block_beg"] = -1  # char pos of the first char of the last block, if waiting for elif, else or endif
        self.state["cmd_in_cmt"] = False
        self.state["last_condition"] = False  # if the last if condition was true
    def next_line(self):
        """update i and line_end"""
        self.pos["line_end"] = self.file.find('\n', i)
        if self.pos["line_end"] < 0: self.pos["line_end"] = len(self)
    def use_variables(self):
        """replace variable usages in the current line"""
        self.replace(i, self.pos["line_end"], replace_variables(self[i:self.pos["line_end"]], variables))
        ptrace("> Line after replacing variables:", self.file[i:self.pos["line_end"]])
    def add_sidenav_headings(self):
        """check if heading for sidenav in line"""
        match = re.search(re_sidenav_heading, self[i:self.pos["line_end"]])
        if match:
            Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
            ptrace("> Found heading with id:", match.groups())
    def find_comment_begin(self) -> bool:
        """
        find the beginning of a comment in the current line
        if comment begin was found, jump into the comment, return True
        """
        # look for comment begin
        if self.pos["cmt_beg"] < 0:  # if not in comment, find next comment
            self.pos["cmt_beg"] = self.file.find(COMMENT_BEGIN, self.i, self.pos["line_end"])
            # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
            if self.pos["cmt_beg"] < 0:
                self.i = self.pos["line_end"] + 1
                return False
            else:
                # jump to comment_begin
                old_i = self.i
                self.i = self.pos["cmt_beg"] + len(COMMENT_BEGIN)  # after comment begin
                ptrace(f"> Found comment begin, jumping from pos {old_i} to {self.i}")
                return True
        return True  # still in previous comment
 def parse_file(_file:str, variables:dict[str,str]):
-    p = Parser(_file)
+    p = HTMLParser(_file, variables)
    sidenav_include_pos = -1
    p.pos["cmt_beg"] = -1
    p.pos["cmt_end"] = -1
@ -358,32 +462,15 @@ def parse_file(_file:str, variables:dict[str,str]):
    # if file.count(COMMENT_BEGIN) != file.count(COMMENT_END):
    while i < len(p):  # at start of new line or end of comment
        p.next_line()
        ptrace(f"Processing at i={i} in line {pos2line(p.file, i)}")
-        # replace variable usages in the current line
+        p.use_variables()
-        p.pos["line_end"] = p.file.find('\n', i)
+        p.add_sidenav_headings()
-        if p.pos["line_end"] < 0: p.pos["line_end"] = len(p)
+
-        p.replace(i, p.pos["line_end"], replace_variables(p[i:p.pos["line_end"]], variables))
+        if not p.find_comment_begin(): continue
        ptrace("> Line after replacing variables:", p.file[i:p.pos["line_end"]])
        # check if heading for sidenav in line
        match = re.search(re_sidenav_heading, p[i:p.pos["line_end"]])
        if match:
            Sidenav.addEntry(match.groups()[1], f"#{match.groups()[0]}")
            ptrace("> Found heading with id:", match.groups())
        # look for comment
        if p.pos["cmt_beg"] < 0:  # if not in comment, find next comment
            p.pos["cmt_beg"] = p.file.find(COMMENT_BEGIN, i, p.pos["line_end"])
            # ptrace(f"i={i}, line_end={line_end}, comment_begin={comment_begin}")
            if p.pos["cmt_beg"] < 0:
                i = p.pos["line_end"] + 1
                continue
            else:
                # jump to comment_begin
                old_i = i
                i = p.pos["cmt_beg"] + len(COMMENT_BEGIN)  # after comment begin
                ptrace(f"> Found comment begin, jumping from pos {old_i} to {i}")
        # in comment, i at the character after COMMENT_BEGIN
        p.pos["cmt_end"] = p.file.find(COMMENT_END, i) #, p.pos["line_end"])
@ -578,7 +665,7 @@ if __name__ == "__main__":
            error(f"Invalid argument: {argv[i]}")
        i += 1
    # sanity checks
-    if not target_path: missing_arg("--input")
+    if not target_path: missing_arg("--target")
    if not os.path.isfile(target_path): error(f"Invalid target: {target_path} (does not exist)")
    if inplace: output_path = target_path
    if not output_path: