Tim Edwards | 55f4d0e | 2020-07-05 15:41:02 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | #-------------------------------------------------------------------- |
| 3 | # |
| 4 | # preproc.py |
| 5 | # |
| 6 | # General purpose macro preprocessor |
| 7 | # |
| 8 | #-------------------------------------------------------------------- |
| 9 | # Usage: |
| 10 | # |
| 11 | # preproc.py input_file [output_file] [-D<variable> ...] |
| 12 | # |
| 13 | # Where <variable> may be a keyword or a key=value pair |
| 14 | # |
| 15 | # Syntax: Basically like cpp. However, this preprocessor handles |
| 16 | # only a limited set of keywords, so it does not otherwise mangle |
| 17 | # the file in the belief that it must be C code. Handling of boolean |
| 18 | # relations is important, so these are thoroughly defined (see below) |
| 19 | # |
| 20 | # #if defined(<variable>) [...] |
| 21 | # #ifdef <variable> |
| 22 | # #ifndef <variable> |
| 23 | # #elseif <variable> |
| 24 | # #else |
| 25 | # #endif |
| 26 | # |
| 27 | # #define <variable> [...] |
| 28 | # #define <variable>(<parameters>) [...] |
| 29 | # #undef <variable> |
| 30 | # |
| 31 | # #include <filename> |
| 32 | # |
| 33 | # <variable> may be |
| 34 | # <keyword> |
| 35 | # <keyword>=<value> |
| 36 | # |
| 37 | # <keyword> without '=' is effectively the same as <keyword>=1 |
| 38 | # Lack of a keyword is equivalent to <keyword>=0, in a conditional. |
| 39 | # |
| 40 | # Boolean operators (in order of precedence): |
| 41 | # ! NOT |
| 42 | # && AND |
| 43 | # || OR |
| 44 | # |
| 45 | # Comments: |
| 46 | # Most comments (C-like or Tcl-like) are output as-is. A |
| 47 | # line beginning with "###" is treated as a preprocessor |
| 48 | # comment and is not copied to the output. |
| 49 | # |
| 50 | # Examples; |
| 51 | # #if defined(X) || defined(Y) |
| 52 | # #else |
| 53 | # #if defined(Z) |
| 54 | # #endif |
| 55 | #-------------------------------------------------------------------- |
| 56 | |
| 57 | import re |
| 58 | import sys |
| 59 | |
| 60 | def solve_statement(condition): |
| 61 | |
| 62 | defrex = re.compile('defined[ \t]*\(([^\)]+)\)') |
| 63 | orrex = re.compile('(.+)\|\|(.+)') |
| 64 | andrex = re.compile('(.+)&&(.+)') |
| 65 | notrex = re.compile('!([^&\|]+)') |
| 66 | parenrex = re.compile('\(([^\)]+)\)') |
| 67 | leadspacerex = re.compile('^[ \t]+(.*)') |
| 68 | endspacerex = re.compile('(.*)[ \t]+$') |
| 69 | |
| 70 | matchfound = True |
| 71 | while matchfound: |
| 72 | matchfound = False |
| 73 | |
| 74 | # Search for defined(K) (K must be a single keyword) |
| 75 | # If the keyword was defined, then it should have been replaced by 1 |
| 76 | lmatch = defrex.search(condition) |
| 77 | if lmatch: |
| 78 | key = lmatch.group(1) |
| 79 | if key == 1 or key == '1' or key == True: |
| 80 | repl = 1 |
| 81 | else: |
| 82 | repl = 0 |
| 83 | |
| 84 | condition = defrex.sub(str(repl), condition) |
| 85 | matchfound = True |
| 86 | |
| 87 | # Search for (X) recursively |
| 88 | lmatch = parenrex.search(condition) |
| 89 | if lmatch: |
| 90 | repl = solve_statement(lmatch.group(1)) |
| 91 | condition = parenrex.sub(str(repl), condition) |
| 92 | matchfound = True |
| 93 | |
| 94 | # Search for !X recursively |
| 95 | lmatch = notrex.search(condition) |
| 96 | if lmatch: |
| 97 | only = solve_statement(lmatch.group(1)) |
| 98 | if only == '1': |
| 99 | repl = '0' |
| 100 | else: |
| 101 | repl = '1' |
| 102 | condition = notrex.sub(str(repl), condition) |
| 103 | matchfound = True |
| 104 | |
| 105 | # Search for A&&B recursively |
| 106 | lmatch = andrex.search(condition) |
| 107 | if lmatch: |
| 108 | first = solve_statement(lmatch.group(1)) |
| 109 | second = solve_statement(lmatch.group(2)) |
| 110 | if first == '1' and second == '1': |
| 111 | repl = '1' |
| 112 | else: |
| 113 | repl = '0' |
| 114 | condition = andrex.sub(str(repl), condition) |
| 115 | matchfound = True |
| 116 | |
| 117 | # Search for A||B recursively |
| 118 | lmatch = orrex.search(condition) |
| 119 | if lmatch: |
| 120 | first = solve_statement(lmatch.group(1)) |
| 121 | second = solve_statement(lmatch.group(2)) |
| 122 | if first == '1' or second == '1': |
| 123 | repl = '1' |
| 124 | else: |
| 125 | repl = '0' |
| 126 | condition = orrex.sub(str(repl), condition) |
| 127 | matchfound = True |
| 128 | |
| 129 | # Remove whitespace |
| 130 | lmatch = leadspacerex.match(condition) |
| 131 | if lmatch: |
| 132 | condition = lmatch.group(1) |
| 133 | lmatch = endspacerex.match(condition) |
| 134 | if lmatch: |
| 135 | condition = lmatch.group(1) |
| 136 | |
| 137 | return condition |
| 138 | |
| 139 | def solve_condition(condition, keys, defines, keyrex): |
| 140 | # Do definition replacement on the conditional |
| 141 | for keyword in keys: |
| 142 | condition = keyrex[keyword].sub(defines[keyword], condition) |
| 143 | |
| 144 | value = solve_statement(condition) |
| 145 | if value == '1': |
| 146 | return 1 |
| 147 | else: |
| 148 | return 0 |
| 149 | |
| 150 | def sortkeys(keys): |
| 151 | newkeys = [] |
| 152 | for i in range(0, len(keys)): |
| 153 | keyword = keys[i] |
| 154 | found = False |
| 155 | for j in range(0, len(newkeys)): |
| 156 | inword = newkeys[j] |
| 157 | if inword in keyword: |
| 158 | # Insert keyword before inword |
| 159 | newkeys.insert(j, keyword) |
| 160 | found = True |
| 161 | break |
| 162 | if not found: |
| 163 | newkeys.append(keyword) |
| 164 | return newkeys |
| 165 | |
| 166 | def runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile): |
| 167 | |
| 168 | includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)') |
| 169 | definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)') |
| 170 | paramrex = re.compile('^([^\(]+)\(([^\)]+)\)') |
| 171 | defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)') |
| 172 | undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)') |
| 173 | ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)') |
| 174 | ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)') |
| 175 | ifrex = re.compile('^[ \t]*#if[ \t]+(.+)') |
| 176 | elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)') |
| 177 | elserex = re.compile('^[ \t]*#else') |
| 178 | endifrex = re.compile('^[ \t]*#endif') |
| 179 | commentrex = re.compile('^###[^#]*$') |
| 180 | ccstartrex = re.compile('/\*') # C-style comment start |
| 181 | ccendrex = re.compile('\*/') # C-style comment end |
| 182 | contrex = re.compile('.*\\\\$') # Backslash continuation line |
| 183 | |
| 184 | badifrex = re.compile('^[ \t]*#if[ \t]*.*') |
| 185 | badelserex = re.compile('^[ \t]*#else[ \t]*.*') |
| 186 | |
| 187 | # This code is not designed to operate on huge files. Neither is it designed to be |
| 188 | # efficient. |
| 189 | |
| 190 | # ifblock state: |
| 191 | # -1 : not in an if/else block |
| 192 | # 0 : no condition satisfied yet |
| 193 | # 1 : condition satisfied |
| 194 | # 2 : condition was handled, waiting for endif |
| 195 | |
| 196 | ifile = False |
| 197 | try: |
| 198 | ifile = open(inputfile, 'r') |
| 199 | except FileNotFoundError: |
| 200 | for dir in incdirs: |
| 201 | try: |
| 202 | ifile = open(dir + '/' + inputfile, 'r') |
| 203 | except FileNotFoundError: |
| 204 | pass |
| 205 | else: |
| 206 | break |
| 207 | |
| 208 | if not ifile: |
| 209 | print("Error: Cannot open file " + inputfile + " for reading.\n", file=sys.stderr) |
| 210 | return |
| 211 | |
| 212 | ccblock = -1 |
| 213 | ifblock = -1 |
| 214 | ifstack = [] |
| 215 | lineno = 0 |
| 216 | |
| 217 | filetext = ifile.readlines() |
| 218 | lastline = [] |
| 219 | |
| 220 | for line in filetext: |
| 221 | lineno += 1 |
| 222 | |
| 223 | # C-style comments override everything else |
| 224 | if ccomm: |
| 225 | if ccblock == -1: |
| 226 | pmatch = ccstartrex.search(line) |
| 227 | if pmatch: |
| 228 | ematch = ccendrex.search(line[pmatch.end(0):]) |
| 229 | if ematch: |
| 230 | line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):] |
| 231 | else: |
| 232 | line = line[0:pmatch.start(0)] |
| 233 | ccblock = 1 |
| 234 | elif ccblock == 1: |
| 235 | ematch = ccendrex.search(line) |
| 236 | if ematch: |
| 237 | line = line[ematch.end(0)+2:] |
| 238 | ccblock = -1 |
| 239 | else: |
| 240 | continue |
| 241 | |
| 242 | # Handle continuation detected in previous line |
| 243 | if lastline: |
| 244 | # Note: Apparently there is a character retained after the backslash, |
| 245 | # so strip the last two characters from the line. |
| 246 | line = lastline[0:-2] + line |
| 247 | lastline = [] |
| 248 | |
| 249 | # Continuation lines have the next highest priority. However, this |
| 250 | # script will attempt to keep continuation lines in the body of the |
| 251 | # text and only collapse lines where continuation lines occur in |
| 252 | # a preprocessor statement. |
| 253 | |
| 254 | cmatch = contrex.match(line) |
| 255 | |
| 256 | # Ignore lines beginning with "###" |
| 257 | pmatch = commentrex.match(line) |
| 258 | if pmatch: |
| 259 | continue |
| 260 | |
| 261 | # Handle ifdef |
| 262 | pmatch = ifdefrex.match(line) |
| 263 | if pmatch: |
| 264 | if cmatch: |
| 265 | lastline = line |
| 266 | continue |
| 267 | if ifblock != -1: |
| 268 | ifstack.append(ifblock) |
| 269 | |
| 270 | if ifblock == 1 or ifblock == -1: |
| 271 | condition = pmatch.group(1) |
| 272 | ifblock = solve_condition(condition, keys, defines, keyrex) |
| 273 | else: |
| 274 | ifblock = 2 |
| 275 | continue |
| 276 | |
| 277 | # Handle ifndef |
| 278 | pmatch = ifndefrex.match(line) |
| 279 | if pmatch: |
| 280 | if cmatch: |
| 281 | lastline = line |
| 282 | continue |
| 283 | if ifblock != -1: |
| 284 | ifstack.append(ifblock) |
| 285 | |
| 286 | if ifblock == 1 or ifblock == -1: |
| 287 | condition = pmatch.group(1) |
| 288 | ifblock = solve_condition(condition, keys, defines, keyrex) |
| 289 | ifblock = 1 if ifblock == 0 else 0 |
| 290 | else: |
| 291 | ifblock = 2 |
| 292 | continue |
| 293 | |
| 294 | # Handle if |
| 295 | pmatch = ifrex.match(line) |
| 296 | if pmatch: |
| 297 | if cmatch: |
| 298 | lastline = line |
| 299 | continue |
| 300 | if ifblock != -1: |
| 301 | ifstack.append(ifblock) |
| 302 | |
| 303 | if ifblock == 1 or ifblock == -1: |
| 304 | condition = pmatch.group(1) |
| 305 | ifblock = solve_condition(condition, keys, defines, keyrex) |
| 306 | else: |
| 307 | ifblock = 2 |
| 308 | continue |
| 309 | |
| 310 | # Handle elseif |
| 311 | pmatch = elseifrex.match(line) |
| 312 | if pmatch: |
| 313 | if cmatch: |
| 314 | lastline = line |
| 315 | continue |
| 316 | if ifblock == -1: |
| 317 | print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr) |
| 318 | ifblock = 0 |
| 319 | |
| 320 | if ifblock == 1: |
| 321 | ifblock = 2 |
| 322 | elif ifblock != 2: |
| 323 | condition = pmatch.group(1) |
| 324 | ifblock = solve_condition(condition, keys, defines, keyrex) |
| 325 | continue |
| 326 | |
| 327 | # Handle else |
| 328 | pmatch = elserex.match(line) |
| 329 | if pmatch: |
| 330 | if cmatch: |
| 331 | lastline = line |
| 332 | continue |
| 333 | if ifblock == -1: |
| 334 | print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr) |
| 335 | ifblock = 0 |
| 336 | |
| 337 | if ifblock == 1: |
| 338 | ifblock = 2 |
| 339 | elif ifblock == 0: |
| 340 | ifblock = 1 |
| 341 | continue |
| 342 | |
| 343 | # Handle endif |
| 344 | pmatch = endifrex.match(line) |
| 345 | if pmatch: |
| 346 | if cmatch: |
| 347 | lastline = line |
| 348 | continue |
| 349 | if ifblock == -1: |
| 350 | print("Error: #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr) |
| 351 | elif ifstack: |
| 352 | ifblock = ifstack.pop() |
| 353 | else: |
| 354 | ifblock = -1 |
| 355 | continue |
| 356 | |
| 357 | # Check for 'if' or 'else' that were not properly formed |
| 358 | pmatch = badifrex.match(line) |
| 359 | if pmatch: |
| 360 | print("Error: Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr) |
| 361 | if ifblock != -1: |
| 362 | ifstack.append(ifblock) |
| 363 | |
| 364 | if ifblock == 1 or ifblock == -1: |
| 365 | ifblock = 0 |
| 366 | else: |
| 367 | ifblock = 2 |
| 368 | continue |
| 369 | |
| 370 | pmatch = badelserex.match(line) |
| 371 | if pmatch: |
| 372 | print("Error: Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr) |
| 373 | ifblock = 2 |
| 374 | continue |
| 375 | |
| 376 | # Ignore all lines that are not satisfied by a conditional |
| 377 | if ifblock == 0 or ifblock == 2: |
| 378 | continue |
| 379 | |
| 380 | # Handle include. Note that this code does not expect or |
| 381 | # handle 'if' blocks that cross file boundaries. |
| 382 | pmatch = includerex.match(line) |
| 383 | if pmatch: |
| 384 | if cmatch: |
| 385 | lastline = line |
| 386 | continue |
| 387 | inclfile = pmatch.group(1) |
| 388 | runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile) |
| 389 | continue |
| 390 | |
| 391 | # Handle define (with value) |
| 392 | pmatch = definerex.match(line) |
| 393 | if pmatch: |
| 394 | if cmatch: |
| 395 | lastline = line |
| 396 | continue |
| 397 | condition = pmatch.group(1) |
| 398 | |
| 399 | # Additional handling of definition w/parameters: #define X(a,b,c) ..." |
| 400 | rmatch = paramrex.match(condition) |
| 401 | if rmatch: |
| 402 | # 'condition' as a key into keyrex only needs to be unique. |
| 403 | # Use the definition word without everything in parentheses |
| 404 | condition = rmatch.group(1) |
| 405 | |
| 406 | # 'pcondition' is the actual search regexp and must capture all |
| 407 | # the parameters individually for substitution |
| 408 | |
| 409 | parameters = rmatch.group(2).split(',') |
| 410 | |
| 411 | # Generate the regexp string to match comma-separate values |
| 412 | # Note that this is based on the cpp preprocessor, which |
| 413 | # apparently allows commas in arguments if surrounded by |
| 414 | # parentheses; e.g., "def(a, b, (c1,c2))". This is NOT |
| 415 | # handled. |
| 416 | |
| 417 | pcondition = condition + '\(' |
| 418 | for param in parameters[0:-1]: |
| 419 | pcondition += '(.*),' |
| 420 | pcondition += '(.*)\)' |
| 421 | |
| 422 | # Generate the substitution string with group substitutions |
| 423 | pvalue = pmatch.group(2) |
| 424 | idx = 1 |
| 425 | for param in parameters: |
| 426 | pvalue = pvalue.replace(param, '\g<' + str(idx) + '>') |
| 427 | idx = idx + 1 |
| 428 | |
| 429 | defines[condition] = pvalue |
| 430 | keyrex[condition] = re.compile(pcondition) |
| 431 | else: |
| 432 | parameters = [] |
| 433 | value = pmatch.group(2) |
| 434 | # Note: Need to check for infinite recursion here, but it's tricky. |
| 435 | defines[condition] = value |
| 436 | keyrex[condition] = re.compile(condition) |
| 437 | |
| 438 | if condition not in keys: |
| 439 | # Parameterized keys go to the front of the list |
| 440 | if parameters: |
| 441 | keys.insert(0, condition) |
| 442 | else: |
| 443 | keys.append(condition) |
| 444 | keys = sortkeys(keys) |
| 445 | continue |
| 446 | |
| 447 | # Handle define (simple case, no value) |
| 448 | pmatch = defrex.match(line) |
| 449 | if pmatch: |
| 450 | if cmatch: |
| 451 | lastline = line |
| 452 | continue |
| 453 | condition = pmatch.group(1) |
| 454 | defines[condition] = '1' |
| 455 | keyrex[condition] = re.compile(condition) |
| 456 | if condition not in keys: |
| 457 | keys.append(condition) |
| 458 | keys = sortkeys(keys) |
| 459 | continue |
| 460 | |
| 461 | # Handle undef |
| 462 | pmatch = undefrex.match(line) |
| 463 | if pmatch: |
| 464 | if cmatch: |
| 465 | lastline = line |
| 466 | continue |
| 467 | condition = pmatch.group(1) |
| 468 | if condition in keys: |
| 469 | defines.pop(condition) |
| 470 | keyrex.pop(condition) |
| 471 | keys.remove(condition) |
| 472 | continue |
| 473 | |
| 474 | # Now do definition replacement on what's left (if anything) |
| 475 | # This must be done repeatedly from the top until there are no |
| 476 | # more substitutions to make. |
| 477 | |
| 478 | while True: |
| 479 | origline = line |
| 480 | for keyword in keys: |
| 481 | newline = keyrex[keyword].sub(defines[keyword], line) |
| 482 | if newline != line: |
| 483 | line = newline |
| 484 | break |
| 485 | |
| 486 | if line == origline: |
| 487 | break |
| 488 | |
| 489 | # Output the line |
| 490 | print(line, file=ofile, end='') |
| 491 | |
| 492 | if ifblock != -1 or ifstack != []: |
| 493 | print("Error: input file ended with an unterminated #if block.", file=sys.stderr) |
| 494 | |
| 495 | if ifile != sys.stdin: |
| 496 | ifile.close() |
| 497 | return |
| 498 | |
| 499 | def printusage(progname): |
| 500 | print('Usage: ' + progname + ' input_file [output_file] [-options]') |
| 501 | print(' Options are:') |
| 502 | print(' -help Print this help text.') |
| 503 | print(' -ccomm Remove C comments in /* ... */ delimiters.') |
| 504 | print(' -D<def> Define word <def> and set its value to 1.') |
| 505 | print(' -D<def>=<val> Define word <def> and set its value to <val>.') |
| 506 | print(' -I<dir> Add <dir> to search path for input files.') |
| 507 | return |
| 508 | |
| 509 | if __name__ == '__main__': |
| 510 | |
| 511 | # Parse command line for options and arguments |
| 512 | options = [] |
| 513 | arguments = [] |
| 514 | for item in sys.argv[1:]: |
| 515 | if item.find('-', 0) == 0: |
| 516 | options.append(item) |
| 517 | else: |
| 518 | arguments.append(item) |
| 519 | |
| 520 | if len(arguments) > 0: |
| 521 | inputfile = arguments[0] |
| 522 | if len(arguments) > 1: |
| 523 | outputfile = arguments[1] |
| 524 | else: |
| 525 | outputfile = [] |
| 526 | else: |
| 527 | printusage(sys.argv[0]) |
| 528 | sys.exit(0) |
| 529 | |
| 530 | defines = {} |
| 531 | keyrex = {} |
| 532 | keys = [] |
| 533 | incdirs = [] |
| 534 | ccomm = False |
| 535 | for item in options: |
| 536 | result = item.split('=') |
| 537 | if result[0] == '-help': |
| 538 | printusage(sys.argv[0]) |
| 539 | sys.exit(0) |
| 540 | elif result[0] == '-ccomm': |
| 541 | ccomm = True |
| 542 | elif result[0][0:2] == '-I': |
| 543 | incdirs.append(result[0][2:]) |
| 544 | elif result[0][0:2] == '-D': |
| 545 | keyword = result[0][2:] |
| 546 | try: |
| 547 | value = result[1] |
| 548 | except: |
| 549 | value = '1' |
| 550 | defines[keyword] = value |
| 551 | keyrex[keyword] = re.compile(keyword) |
| 552 | keys.append(keyword) |
| 553 | keys = sortkeys(keys) |
| 554 | else: |
| 555 | print('Bad option ' + item + ', options are -help, -ccomm, -D<def> -I<dir>\n') |
| 556 | sys.exit(1) |
| 557 | |
| 558 | if outputfile: |
| 559 | ofile = open(outputfile, 'w') |
| 560 | else: |
| 561 | ofile = sys.stdout |
| 562 | |
| 563 | if not ofile: |
| 564 | print("Error: Cannot open file " + output_file + " for writing.") |
| 565 | sys.exit(1) |
| 566 | |
| 567 | # Sort keys so that if any definition contains another definition, the |
| 568 | # subset word is handled last; otherwise the subset word will get |
| 569 | # substituted, screwing up the definition names in which it occurs. |
| 570 | |
| 571 | keys = sortkeys(keys) |
| 572 | |
| 573 | runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile) |
| 574 | if ofile != sys.stdout: |
| 575 | ofile.close() |
| 576 | sys.exit(0) |