import sys import re from typing import Callable ## # @defgroup documentation Documentation # @brief # ## # @file # @brief Doxygen filter for ca65 assembler files # @details # This filter converts ca65 to C++ statements that doxygen can parse. # # Doxygen comments are double semicolons `;;` # - turns procedures `.proc` into function statements with `proc` as return type # Parameters documented with the @param command are put into the paranthesis (function arguments) with `Param` as type (@ref handle_procedure) # - turns macros `.macro` into function statements with `macro` as return type with the parameter macros # as function arguments with `Param` type (@ref handle_procedure) # - enums become ... enums, documentation of enum members after their name is also handled when using ;;< (@ref handle_procedure) # - labeled storage allocations with `.byte`, `.res`, `.ascii` etc. are turned into variable declarations with the label as variable name (@ref handle_procedure) # - if allocations are strings, they are concatenated together to `char * LABEL_NAME = "";` # - if there are multiple non-string allocations: `bytes LABEL_NAME[] = {alloc1, alloc2, ...};` # - if there is one non-string allocation: `alloc_type * LABEL_NAME = alloc;` # - if the allocation is not initilized: `alloc_type * LABEL_NAME;` # - if the allocation type is `res SIZE`: `bytes LABEL_NAME[SIZE];` # - the sizes of the arrays may be wrong! # - include statements are kept # - all other preprocessor macros are removed # # @todo Handle structs # @todo for storage allocators, check in which segment they are in and apply `const` where necessary # @ingroup documentation filename = "unknown" def pdebug(*args, **k): print(f"DEBUG ({filename}):", *args, file=sys.stderr, **k) def parse_custom_language(file_content: str): # procedure_name: scope_name exported_names = {} def handle_export(m): export_type = m.groups()[0] scope = m.groups()[1] functions = m.groups()[2].replace(" ", "").strip(",") for f in functions.split(","): # pdebug(f"Add Exported function: '{f}' in '{scope}'") exported_names[f] = scope return "" def handle_procedure(m): # print("handle procedure:", m.groups()) p_docs = m.groups()[0].strip("\n") p_type = m.groups()[1] p_name = m.groups()[2] p_args = m.groups()[3].strip(" ") p_code = m.groups()[4] s = "" in_namespace = False if p_name in exported_names: # print(f"{p_name} is exported") in_namespace = True # wrap function in namespace {}, which consumes the first line of the docstring, which must be ;;***... namespace = exported_names[p_name] # assert p_docs.startswith(";;*"), f"Documentation of an exported procedure must start with ';;***...' ({p_name})" # assert p_docs[p_docs.rfind('\n'):].startswith("\n;;*"), f"Documentation of an exported procedure must end with ';;***...' ({p_name})" s += f"namespace {namespace}" + " {" + p_docs # s += p_docs[p_docs.find('\n'):p_docs.rfind('\n')] s += "\n" # s += f"@ingroup {namespace}\n" else: s += p_docs + "\n" #re.sub(r";;\*+", ";;", p_docs, 0, re.MULTILINE) + "\n" if p_type == "proc": s += f"proc {p_name}(" for match in re.finditer(r"[\@\\]param +(.+?) *:", p_docs): s += f"Param {match.groups()[0]}," if s[-1] == ",": s = s[:-1] s += ");\n" elif p_type == "macro": # pdebug(f"Processing macro '{p_name}' with args '{'TXT'.join(p_args.replace(' ', '').split(','))}'") s += f"macro {p_name}(" p_args = "".join("Param " + param + "," for param in p_args.replace(" ", "").split(',')).strip(",") s += p_args s += ");\n" elif p_type == "enum": p_code = re.sub(r"(.*=.*?)( *(?:;;.*)?\n)", r"\1,\2", p_code) s += f"enum {p_name}" + "{\n" + p_code + "};" else: raise NotImplementedError(f"handle_procedure not implemented for procedure type {p_type}") s += re.sub(".*", "", p_code) if in_namespace: s += "} // namespace" else: s += "\n" return s def handle_storage_label(m): l_docs = m.groups()[0] l_name = m.groups()[1] l_allocs = m.groups()[2] l_docs2 = m.groups()[3] # if doc was in the same line as the label storage_alloc = r"\.(byte|res|dbyte|word|addr|faraddr|dword|asciiz?)(([, ]+(?:0x[a-fA-F0-9]+|0b[01]+|\d+|\w+|\"[^\n]*?[^\\\n]\")[ \n]*)*)" storage_alloc_arg = r"(0x[a-fA-F0-9]+|0b[01]+|\d+|\w+|\"[^\n]*[^\\\n]\")" args = [] allocs = [] for alloc_match in re.finditer(storage_alloc, l_allocs): allocs.append(alloc_match) alloc_args = alloc_match.groups()[1] if alloc_args: args += re.findall(storage_alloc_arg, alloc_args) # pdebug(f"Storage label {l_name} with allocs '{[ma.group() for ma in allocs]}' and args '{args}'\n\t{m.groups()}") s = "" in_namespace = False # if the label is exported, put it in a namespace if l_name in exported_names: in_namespace = True namespace = exported_names[l_name] s += f"namespace {namespace}" + " {" # docs after the namespace, otherwise they document the namespace if l_docs: s += l_docs # put the single line comment into a /** */ comment in front of the declaration if l_docs2: s += "/** " if not "brief" in l_docs2: s += "@brief " s += f"{l_docs2.strip(';')} */ " # completely ignoring the type of the storage allocation here if len(args) > 1: if all(arg.startswith("\"") for arg in args): s += f'char* {l_name} = "' + "".join(map(lambda x: x.strip('"'), args)) + '"' else: s += f"bytes {l_name}[{len(args)}] = " + "{" for arg in args: s += arg + "," s = s.strip(",") + "}" else: l_type = allocs[0].groups()[0] if len(args) == 0: l_arg = None else: l_arg = args[0] # if res: use bytes[length] as type if l_type == "res": l_type = f"bytes[{l_arg}]" l_arg = None # else use type* as type else: l_type += "*" s += f"{l_type} {l_name}" if l_arg: s += f" = {l_arg}" s += ";" if in_namespace: s += "} // namespace" s += m.group().count('\n') * '\n' # make sure the #lines is the same # pdebug(args, "\n---\n", s) return s patterns: dict[str, str|Callable[[re.Match], str]] = { r"\@(?:macro|function)": "@brief", r"^\.scope ([a-zA-Z0-9_]+)": r"namespace \1 {", # r"^\.macro ([a-zA-Z0-9_]+)(.*)?": r"macro \1(\2 \2); ", # r"^\.end(?:macro)": "", r"^\.end(?:scope)": "}", r"^\.(include)": r"#\1", r"^(Export(?:Zp)?) (\w+)((?: *, *\w+)+)": handle_export, r"^(Import(?:Zp)?) (\w+)((?: *, *\w+)+)": "", r"(? 0xHEX except in comments r"(? 0bBIN except in comments r"^((?:;;.*\n)*) *\.(proc|enum|macro) (\w+)(.*?)\n((?:.|\n)*?)\.end(proc|enum|macro).*": handle_procedure, r"^((?:;;.*\n)*) *(\w+):((?:\s*\.(?:byte|res|dbyte|word|addr|faraddr|dword|asciiz?)(?:[, ]+(?:0x[a-fA-F0-9]+|0b[01]+|\d+|\w+|\"[^\n]*[^\\\n]\")[ \n]*)*)+)(;;.*)?": handle_storage_label, r"^INCLUDE_[A-Z0-9_]+ *= *1$": r"", # Include guard variables r";;": "//!", # C++ comments # TODO this is currently case sensitive r"(?") sys.exit(1) filename = sys.argv[1] with open(filename, 'r') as file: file_content = file.read() transformed_content = parse_custom_language(file_content) print(transformed_content) if __name__ == "__main__": main()