2024-08-08 00:11:15 +02:00
import sys
import re
from typing import Callable
2024-08-08 20:15:50 +02:00
##
# @defgroup documentation Documentation
# @brief
#
##
# @file
# @brief Doxygen filter for ca65 assembler files
# @details
# This filter converts ca65 to C++ statements that doxygen can parse.
#
# Doxygen comments are double semicolons `;;`
# - turns procedures `.proc` into function statements with `proc` as return type
# Parameters documented with the @param command are put into the paranthesis (function arguments) with `Param` as type (@ref handle_procedure)
# - turns macros `.macro` into function statements with `macro` as return type with the parameter macros
# as function arguments with `Param` type (@ref handle_procedure)
2024-08-08 21:11:25 +02:00
# - enums become ... enums, documentation of enum members after their name is also handled when using ;;< (@ref handle_procedure)
2024-08-08 20:15:50 +02:00
# - labeled storage allocations with `.byte`, `.res`, `.ascii` etc. are turned into variable declarations with the label as variable name (@ref handle_procedure)
# - if allocations are strings, they are concatenated together to `char * LABEL_NAME = "<string(s)>";`
# - if there are multiple non-string allocations: `bytes LABEL_NAME[] = {alloc1, alloc2, ...};`
# - if there is one non-string allocation: `alloc_type * LABEL_NAME = alloc;`
# - if the allocation is not initilized: `alloc_type * LABEL_NAME;`
# - if the allocation type is `res SIZE`: `bytes LABEL_NAME[SIZE];`
# - the sizes of the arrays may be wrong!
# - include statements are kept
# - all other preprocessor macros are removed
#
# @todo Handle structs
# @todo for storage allocators, check in which segment they are in and apply `const` where necessary
# @ingroup documentation
2024-08-08 00:11:15 +02:00
filename = " unknown "
def pdebug ( * args , * * k ) :
print ( f " DEBUG ( { filename } ): " , * args , file = sys . stderr , * * k )
def parse_custom_language ( file_content : str ) :
# procedure_name: scope_name
exported_names = { }
def handle_export ( m ) :
export_type = m . groups ( ) [ 0 ]
scope = m . groups ( ) [ 1 ]
functions = m . groups ( ) [ 2 ] . replace ( " " , " " ) . strip ( " , " )
for f in functions . split ( " , " ) :
2024-08-08 20:15:50 +02:00
# pdebug(f"Add Exported function: '{f}' in '{scope}'")
2024-08-08 00:11:15 +02:00
exported_names [ f ] = scope
return " "
def handle_procedure ( m ) :
# print("handle procedure:", m.groups())
p_docs = m . groups ( ) [ 0 ] . strip ( " \n " )
p_type = m . groups ( ) [ 1 ]
p_name = m . groups ( ) [ 2 ]
p_args = m . groups ( ) [ 3 ] . strip ( " " )
p_code = m . groups ( ) [ 4 ]
s = " "
in_namespace = False
if p_name in exported_names :
# print(f"{p_name} is exported")
in_namespace = True
# wrap function in namespace {}, which consumes the first line of the docstring, which must be ;;***...
namespace = exported_names [ p_name ]
# assert p_docs.startswith(";;*"), f"Documentation of an exported procedure must start with ';;***...' ({p_name})"
# assert p_docs[p_docs.rfind('\n'):].startswith("\n;;*"), f"Documentation of an exported procedure must end with ';;***...' ({p_name})"
s + = f " namespace { namespace } " + " { " + p_docs
# s += p_docs[p_docs.find('\n'):p_docs.rfind('\n')]
s + = " \n "
# s += f"@ingroup {namespace}\n"
else :
s + = p_docs + " \n " #re.sub(r";;\*+", ";;", p_docs, 0, re.MULTILINE) + "\n"
if p_type == " proc " :
s + = f " proc { p_name } ( "
for match in re . finditer ( r " [ \ @ \\ ]param +(.+?) *: " , p_docs ) :
s + = f " Param { match . groups ( ) [ 0 ] } , "
if s [ - 1 ] == " , " : s = s [ : - 1 ]
s + = " ); \n "
elif p_type == " macro " :
2024-08-08 20:15:50 +02:00
# pdebug(f"Processing macro '{p_name}' with args '{'TXT'.join(p_args.replace(' ', '').split(','))}'")
2024-08-08 00:11:15 +02:00
s + = f " macro { p_name } ( "
p_args = " " . join ( " Param " + param + " , " for param in p_args . replace ( " " , " " ) . split ( ' , ' ) ) . strip ( " , " )
s + = p_args
s + = " ); \n "
elif p_type == " enum " :
2024-08-08 21:11:25 +02:00
p_code = re . sub ( r " (.*=.*?)( *(?:;;.*)? \ n) " , r " \ 1, \ 2 " , p_code )
2024-08-08 00:11:15 +02:00
s + = f " enum { p_name } " + " { \n " + p_code + " }; "
else :
raise NotImplementedError ( f " handle_procedure not implemented for procedure type { p_type } " )
s + = re . sub ( " .* " , " " , p_code )
if in_namespace :
s + = " } // namespace "
else :
s + = " \n "
return s
def handle_storage_label ( m ) :
2024-08-08 20:15:50 +02:00
l_docs = m . groups ( ) [ 0 ]
2024-08-08 00:11:15 +02:00
l_name = m . groups ( ) [ 1 ]
l_allocs = m . groups ( ) [ 2 ]
2024-08-08 20:15:50 +02:00
l_docs2 = m . groups ( ) [ 3 ] # if doc was in the same line as the label
storage_alloc = r " \ .(byte|res|dbyte|word|addr|faraddr|dword|asciiz?)(([, ]+(?:0x[a-fA-F0-9]+|0b[01]+| \ d+| \ w+| \" [^ \ n]*?[^ \\ \ n] \" )[ \ n]*)*) "
2024-08-08 00:11:15 +02:00
storage_alloc_arg = r " (0x[a-fA-F0-9]+|0b[01]+| \ d+| \ w+| \" [^ \ n]*[^ \\ \ n] \" ) "
args = [ ]
allocs = [ ]
for alloc_match in re . finditer ( storage_alloc , l_allocs ) :
allocs . append ( alloc_match )
alloc_args = alloc_match . groups ( ) [ 1 ]
if alloc_args :
args + = re . findall ( storage_alloc_arg , alloc_args )
2024-08-08 20:15:50 +02:00
# pdebug(f"Storage label {l_name} with allocs '{[ma.group() for ma in allocs]}' and args '{args}'\n\t{m.groups()}")
2024-08-08 00:11:15 +02:00
s = " "
in_namespace = False
2024-08-08 20:15:50 +02:00
# if the label is exported, put it in a namespace
2024-08-08 00:11:15 +02:00
if l_name in exported_names :
in_namespace = True
namespace = exported_names [ l_name ]
2024-08-08 20:15:50 +02:00
s + = f " namespace { namespace } " + " { "
# docs after the namespace, otherwise they document the namespace
if l_docs :
s + = l_docs
# put the single line comment into a /** */ comment in front of the declaration
if l_docs2 :
s + = " /** "
if not " brief " in l_docs2 : s + = " @brief "
s + = f " { l_docs2 . strip ( ' ; ' ) } */ "
# completely ignoring the type of the storage allocation here
2024-08-08 00:11:15 +02:00
if len ( args ) > 1 :
if all ( arg . startswith ( " \" " ) for arg in args ) :
s + = f ' char* { l_name } = " ' + " " . join ( map ( lambda x : x . strip ( ' " ' ) , args ) ) + ' " '
else :
2024-08-08 20:15:50 +02:00
s + = f " bytes { l_name } [ { len ( args ) } ] = " + " { "
2024-08-08 00:11:15 +02:00
for arg in args :
s + = arg + " , "
2024-08-08 20:15:50 +02:00
s = s . strip ( " , " ) + " } "
2024-08-08 00:11:15 +02:00
else :
l_type = allocs [ 0 ] . groups ( ) [ 0 ]
if len ( args ) == 0 :
l_arg = None
else :
l_arg = args [ 0 ]
2024-08-08 20:15:50 +02:00
# if res: use bytes[length] as type
2024-08-08 00:11:15 +02:00
if l_type == " res " :
l_type = f " bytes[ { l_arg } ] "
l_arg = None
2024-08-08 20:15:50 +02:00
# else use type* as type
2024-08-08 00:11:15 +02:00
else : l_type + = " * "
s + = f " { l_type } { l_name } "
if l_arg :
s + = f " = { l_arg } "
s + = " ; "
if in_namespace : s + = " } // namespace "
s + = m . group ( ) . count ( ' \n ' ) * ' \n ' # make sure the #lines is the same
# pdebug(args, "\n---\n", s)
return s
patterns : dict [ str , str | Callable [ [ re . Match ] , str ] ] = {
r " \ @(?:macro|function) " : " @brief " ,
r " ^ \ .scope ([a-zA-Z0-9_]+) " : r " namespace \ 1 { " ,
# r"^\.macro ([a-zA-Z0-9_]+)(.*)?": r"macro \1(\2 \2); ",
# r"^\.end(?:macro)": "",
r " ^ \ .end(?:scope) " : " } " ,
r " ^ \ .(include) " : r " # \ 1 " ,
r " ^(Export(?:Zp)?) ( \ w+)((?: *, * \ w+)+) " : handle_export ,
r " ^(Import(?:Zp)?) ( \ w+)((?: *, * \ w+)+) " : " " ,
r " (?<!^;) \ $([A-Fa-f0-9_]+) " : r " 0x \ 1 " , # $HEX -> 0xHEX except in comments
r " (?<!^;) % ([01_]+) " : r " 0b \ 1 " , # %BIN -> 0bBIN except in comments
2024-08-08 20:15:50 +02:00
r " ^((?:;;.* \ n)*) * \ .(proc|enum|macro) ( \ w+)(.*?) \ n((?:.| \ n)*?) \ .end(proc|enum|macro).* " : handle_procedure ,
r " ^((?:;;.* \ n)*) *( \ w+):((?: \ s* \ .(?:byte|res|dbyte|word|addr|faraddr|dword|asciiz?)(?:[, ]+(?:0x[a-fA-F0-9]+|0b[01]+| \ d+| \ w+| \" [^ \ n]*[^ \\ \ n] \" )[ \ n]*)*)+)(;;.*)? " : handle_storage_label ,
r " ^INCLUDE_[A-Z0-9_]+ *= *1$ " : r " " , # Include guard variables
2024-08-08 00:11:15 +02:00
r " ;; " : " //! " , # C++ comments
# TODO this is currently case sensitive
r " (?<!^;)( * \ w+ *= *[^;, \ n]+?) *(//.*)?$ " : r " \ 1; \ 2 " , # semicolons after assignments, except in comments and when they already end with a comma or semicolon. Also preserve comments after the assignment
r " ^([^ \ n;]*)(?<! \ w) \ .( \ w+) " : r " \ 1// # \ 2 " , # all .preprocessor commands
}
compiled_patterns = [ ]
for k , v in patterns . items ( ) :
compiled_patterns . append ( ( re . compile ( k ) , v ) )
resub_patterns : dict [ str , str | Callable [ [ re . Match ] , str ] ] = {
r " (?<!^;;)[ \ t \ r \ f \ v]+ " : " " , # turn all spaces into single whitespace except if in doxygen comment
r " ^((?:[^ \" \ n;]||[^ \" \ n;]* \" (?:[^ \" \ n]| \\ \" )+ \" )+);(?!;).* " : r " \ 1 " , # remove normal comments, detect strings
r " ^;; \ *+ " : " ;; " , # remove ;;*** comments
r " [ \ t \ r \ f \ v]+$ " : " " , # remove trailing spaces print(file_content)
}
for pat , subst in resub_patterns . items ( ) :
file_content = re . sub ( pat , subst , file_content , 0 , re . MULTILINE )
for pat , subst in patterns . items ( ) :
( file_content , n_subst ) = re . subn ( pat , subst , file_content , 0 , re . MULTILINE )
return file_content
def main ( ) :
global filename
if len ( sys . argv ) != 2 :
print ( " Usage: python doxy-asm65.py <input_file> " )
sys . exit ( 1 )
filename = sys . argv [ 1 ]
with open ( filename , ' r ' ) as file :
file_content = file . read ( )
transformed_content = parse_custom_language ( file_content )
print ( transformed_content )
if __name__ == " __main__ " :
main ( )