import sys import re from typing import Callable filename = "unknown" def pdebug(*args, **k): print(f"DEBUG ({filename}):", *args, file=sys.stderr, **k) def parse_custom_language(file_content: str): # procedure_name: scope_name exported_names = {} def handle_export(m): export_type = m.groups()[0] scope = m.groups()[1] functions = m.groups()[2].replace(" ", "").strip(",") for f in functions.split(","): pdebug(f"Add Exported function: '{f}' in '{scope}'") exported_names[f] = scope return "" def handle_procedure(m): # print("handle procedure:", m.groups()) p_docs = m.groups()[0].strip("\n") p_type = m.groups()[1] p_name = m.groups()[2] p_args = m.groups()[3].strip(" ") p_code = m.groups()[4] s = "" in_namespace = False if p_name in exported_names: # print(f"{p_name} is exported") in_namespace = True # wrap function in namespace {}, which consumes the first line of the docstring, which must be ;;***... namespace = exported_names[p_name] # assert p_docs.startswith(";;*"), f"Documentation of an exported procedure must start with ';;***...' ({p_name})" # assert p_docs[p_docs.rfind('\n'):].startswith("\n;;*"), f"Documentation of an exported procedure must end with ';;***...' ({p_name})" s += f"namespace {namespace}" + " {" + p_docs # s += p_docs[p_docs.find('\n'):p_docs.rfind('\n')] s += "\n" # s += f"@ingroup {namespace}\n" else: s += p_docs + "\n" #re.sub(r";;\*+", ";;", p_docs, 0, re.MULTILINE) + "\n" if p_type == "proc": s += f"proc {p_name}(" for match in re.finditer(r"[\@\\]param +(.+?) *:", p_docs): s += f"Param {match.groups()[0]}," if s[-1] == ",": s = s[:-1] s += ");\n" elif p_type == "macro": pdebug(f"Processing macro '{p_name}' with args '{'TXT'.join(p_args.replace(' ', '').split(','))}'") s += f"macro {p_name}(" p_args = "".join("Param " + param + "," for param in p_args.replace(" ", "").split(',')).strip(",") s += p_args s += ");\n" pdebug("Found macro", p_name, s) elif p_type == "enum": p_code = re.sub(r"( *(?:;;.*)?\n)", r",\1", p_code) s += f"enum {p_name}" + "{\n" + p_code + "};" else: raise NotImplementedError(f"handle_procedure not implemented for procedure type {p_type}") s += re.sub(".*", "", p_code) if in_namespace: s += "} // namespace" else: s += "\n" return s def handle_storage_label(m): l_docs = m.groups()[0].strip('\n') l_name = m.groups()[1] l_allocs = m.groups()[2] storage_alloc = r"(?:\.(byte|res|dbyte|word|addr|faraddr|dword|ascii|asciiz)([, ]+(?:0x[a-fA-F0-9]+|0b[01]+|\d+|\w+|\"[^\n]*?[^\\\n]\")[ \n]*)*)" storage_alloc_arg = r"(0x[a-fA-F0-9]+|0b[01]+|\d+|\w+|\"[^\n]*[^\\\n]\")" args = [] allocs = [] for alloc_match in re.finditer(storage_alloc, l_allocs): allocs.append(alloc_match) alloc_args = alloc_match.groups()[1] if alloc_args: args += re.findall(storage_alloc_arg, alloc_args) s = "" in_namespace = False # pdebug("ldocs for", l_name, l_docs) # pdebug(m.groups()) if l_name in exported_names: in_namespace = True namespace = exported_names[l_name] s += f"namespace {namespace}" + " {" + l_docs s += "\n" else: s += l_docs + "\n" if len(args) > 1: if all(arg.startswith("\"") for arg in args): s += f'char* {l_name} = "' + "".join(map(lambda x: x.strip('"'), args)) + '"' else: s += "bytes[] = {" for arg in args: s += arg + "," s += "}" s += s.strip(",") + "}" else: # alloc_type label = l_type = allocs[0].groups()[0] if len(args) == 0: l_arg = None else: l_arg = args[0] if l_type == "res": l_type = f"bytes[{l_arg}]" l_arg = None else: l_type += "*" s += f"{l_type} {l_name}" if l_arg: s += f" = {l_arg}" s += ";" if in_namespace: s += "} // namespace" s += m.group().count('\n') * '\n' # make sure the #lines is the same # pdebug(args, "\n---\n", s) return s patterns: dict[str, str|Callable[[re.Match], str]] = { r"\@(?:macro|function)": "@brief", r"^\.scope ([a-zA-Z0-9_]+)": r"namespace \1 {", # r"^\.macro ([a-zA-Z0-9_]+)(.*)?": r"macro \1(\2 \2); ", # r"^\.end(?:macro)": "", r"^\.end(?:scope)": "}", r"^\.(include)": r"#\1", r"^(Export(?:Zp)?) (\w+)((?: *, *\w+)+)": handle_export, r"^(Import(?:Zp)?) (\w+)((?: *, *\w+)+)": "", r"(? 0xHEX except in comments r"(? 0bBIN except in comments r"^((?:;;.*\n)*)^\.(proc|enum|macro) (\w+)(.*?)\n((?:.|\n)*?)\.end(proc|enum|macro).*": handle_procedure, r"^((?:;;.*\n)*) *(\w+):((?:\s*\.(?:byte|res|dbyte|word|addr|faraddr|dword|ascii|asciiz)(?:[, ]+(?:0x[a-fA-F0-9]+|0b[01]+|\d+|\w+|\"[^\n]*[^\\\n]\")[ \n]*)*)+)": handle_storage_label, r";;": "//!", # C++ comments # TODO this is currently case sensitive r"(?") sys.exit(1) filename = sys.argv[1] with open(filename, 'r') as file: file_content = file.read() transformed_content = parse_custom_language(file_content) print(transformed_content) if __name__ == "__main__": main()