blob: 96129217d1b5577a8d25800d1e96710fbd964083 [file] [log] [blame]
Tim Edwards55f4d0e2020-07-05 15:41:02 -04001#!/usr/bin/env python3
2#--------------------------------------------------------------------
3#
4# preproc.py
5#
6# General purpose macro preprocessor
7#
8#--------------------------------------------------------------------
9# Usage:
10#
11# preproc.py input_file [output_file] [-D<variable> ...]
12#
13# Where <variable> may be a keyword or a key=value pair
14#
15# Syntax: Basically like cpp. However, this preprocessor handles
16# only a limited set of keywords, so it does not otherwise mangle
17# the file in the belief that it must be C code. Handling of boolean
18# relations is important, so these are thoroughly defined (see below)
19#
20# #if defined(<variable>) [...]
21# #ifdef <variable>
22# #ifndef <variable>
23# #elseif <variable>
24# #else
25# #endif
26#
27# #define <variable> [...]
28# #define <variable>(<parameters>) [...]
29# #undef <variable>
30#
31# #include <filename>
32#
33# <variable> may be
34# <keyword>
35# <keyword>=<value>
36#
37# <keyword> without '=' is effectively the same as <keyword>=1
38# Lack of a keyword is equivalent to <keyword>=0, in a conditional.
39#
40# Boolean operators (in order of precedence):
41# ! NOT
42# && AND
43# || OR
44#
45# Comments:
46# Most comments (C-like or Tcl-like) are output as-is. A
47# line beginning with "###" is treated as a preprocessor
48# comment and is not copied to the output.
49#
50# Examples;
51# #if defined(X) || defined(Y)
52# #else
53# #if defined(Z)
54# #endif
55#--------------------------------------------------------------------
56
Tim Edwards9134eed2021-02-13 14:42:59 -050057import os
Tim Edwards55f4d0e2020-07-05 15:41:02 -040058import re
59import sys
60
61def solve_statement(condition):
62
63 defrex = re.compile('defined[ \t]*\(([^\)]+)\)')
64 orrex = re.compile('(.+)\|\|(.+)')
65 andrex = re.compile('(.+)&&(.+)')
66 notrex = re.compile('!([^&\|]+)')
67 parenrex = re.compile('\(([^\)]+)\)')
68 leadspacerex = re.compile('^[ \t]+(.*)')
69 endspacerex = re.compile('(.*)[ \t]+$')
70
71 matchfound = True
72 while matchfound:
73 matchfound = False
74
75 # Search for defined(K) (K must be a single keyword)
76 # If the keyword was defined, then it should have been replaced by 1
77 lmatch = defrex.search(condition)
78 if lmatch:
79 key = lmatch.group(1)
80 if key == 1 or key == '1' or key == True:
81 repl = 1
82 else:
83 repl = 0
84
85 condition = defrex.sub(str(repl), condition)
86 matchfound = True
87
88 # Search for (X) recursively
89 lmatch = parenrex.search(condition)
90 if lmatch:
91 repl = solve_statement(lmatch.group(1))
92 condition = parenrex.sub(str(repl), condition)
93 matchfound = True
94
95 # Search for !X recursively
96 lmatch = notrex.search(condition)
97 if lmatch:
98 only = solve_statement(lmatch.group(1))
99 if only == '1':
100 repl = '0'
101 else:
102 repl = '1'
103 condition = notrex.sub(str(repl), condition)
104 matchfound = True
105
106 # Search for A&&B recursively
107 lmatch = andrex.search(condition)
108 if lmatch:
109 first = solve_statement(lmatch.group(1))
110 second = solve_statement(lmatch.group(2))
111 if first == '1' and second == '1':
112 repl = '1'
113 else:
114 repl = '0'
115 condition = andrex.sub(str(repl), condition)
116 matchfound = True
117
118 # Search for A||B recursively
119 lmatch = orrex.search(condition)
120 if lmatch:
121 first = solve_statement(lmatch.group(1))
122 second = solve_statement(lmatch.group(2))
123 if first == '1' or second == '1':
124 repl = '1'
125 else:
126 repl = '0'
127 condition = orrex.sub(str(repl), condition)
128 matchfound = True
129
130 # Remove whitespace
131 lmatch = leadspacerex.match(condition)
132 if lmatch:
133 condition = lmatch.group(1)
134 lmatch = endspacerex.match(condition)
135 if lmatch:
136 condition = lmatch.group(1)
137
138 return condition
139
140def solve_condition(condition, keys, defines, keyrex):
141 # Do definition replacement on the conditional
142 for keyword in keys:
143 condition = keyrex[keyword].sub(defines[keyword], condition)
144
145 value = solve_statement(condition)
146 if value == '1':
147 return 1
148 else:
149 return 0
150
151def sortkeys(keys):
152 newkeys = []
153 for i in range(0, len(keys)):
154 keyword = keys[i]
155 found = False
156 for j in range(0, len(newkeys)):
157 inword = newkeys[j]
158 if inword in keyword:
159 # Insert keyword before inword
160 newkeys.insert(j, keyword)
161 found = True
162 break
163 if not found:
164 newkeys.append(keyword)
165 return newkeys
166
167def runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile):
168
169 includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)')
170 definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)')
171 paramrex = re.compile('^([^\(]+)\(([^\)]+)\)')
172 defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)')
173 undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)')
174 ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)')
175 ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)')
176 ifrex = re.compile('^[ \t]*#if[ \t]+(.+)')
177 elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)')
178 elserex = re.compile('^[ \t]*#else')
179 endifrex = re.compile('^[ \t]*#endif')
180 commentrex = re.compile('^###[^#]*$')
181 ccstartrex = re.compile('/\*') # C-style comment start
182 ccendrex = re.compile('\*/') # C-style comment end
183 contrex = re.compile('.*\\\\$') # Backslash continuation line
184
185 badifrex = re.compile('^[ \t]*#if[ \t]*.*')
186 badelserex = re.compile('^[ \t]*#else[ \t]*.*')
187
188 # This code is not designed to operate on huge files. Neither is it designed to be
189 # efficient.
190
191 # ifblock state:
192 # -1 : not in an if/else block
193 # 0 : no condition satisfied yet
194 # 1 : condition satisfied
195 # 2 : condition was handled, waiting for endif
196
197 ifile = False
198 try:
199 ifile = open(inputfile, 'r')
200 except FileNotFoundError:
201 for dir in incdirs:
202 try:
203 ifile = open(dir + '/' + inputfile, 'r')
204 except FileNotFoundError:
205 pass
206 else:
207 break
208
209 if not ifile:
210 print("Error: Cannot open file " + inputfile + " for reading.\n", file=sys.stderr)
211 return
212
213 ccblock = -1
214 ifblock = -1
215 ifstack = []
216 lineno = 0
217
218 filetext = ifile.readlines()
219 lastline = []
220
221 for line in filetext:
222 lineno += 1
223
224 # C-style comments override everything else
225 if ccomm:
226 if ccblock == -1:
227 pmatch = ccstartrex.search(line)
228 if pmatch:
229 ematch = ccendrex.search(line[pmatch.end(0):])
230 if ematch:
231 line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):]
232 else:
233 line = line[0:pmatch.start(0)]
234 ccblock = 1
235 elif ccblock == 1:
236 ematch = ccendrex.search(line)
237 if ematch:
238 line = line[ematch.end(0)+2:]
239 ccblock = -1
240 else:
241 continue
242
243 # Handle continuation detected in previous line
244 if lastline:
245 # Note: Apparently there is a character retained after the backslash,
246 # so strip the last two characters from the line.
247 line = lastline[0:-2] + line
248 lastline = []
249
250 # Continuation lines have the next highest priority. However, this
251 # script will attempt to keep continuation lines in the body of the
252 # text and only collapse lines where continuation lines occur in
253 # a preprocessor statement.
254
255 cmatch = contrex.match(line)
256
257 # Ignore lines beginning with "###"
258 pmatch = commentrex.match(line)
259 if pmatch:
260 continue
261
262 # Handle ifdef
263 pmatch = ifdefrex.match(line)
264 if pmatch:
265 if cmatch:
266 lastline = line
267 continue
268 if ifblock != -1:
269 ifstack.append(ifblock)
270
271 if ifblock == 1 or ifblock == -1:
272 condition = pmatch.group(1)
273 ifblock = solve_condition(condition, keys, defines, keyrex)
274 else:
275 ifblock = 2
276 continue
277
278 # Handle ifndef
279 pmatch = ifndefrex.match(line)
280 if pmatch:
281 if cmatch:
282 lastline = line
283 continue
284 if ifblock != -1:
285 ifstack.append(ifblock)
286
287 if ifblock == 1 or ifblock == -1:
288 condition = pmatch.group(1)
289 ifblock = solve_condition(condition, keys, defines, keyrex)
290 ifblock = 1 if ifblock == 0 else 0
291 else:
292 ifblock = 2
293 continue
294
295 # Handle if
296 pmatch = ifrex.match(line)
297 if pmatch:
298 if cmatch:
299 lastline = line
300 continue
301 if ifblock != -1:
302 ifstack.append(ifblock)
303
304 if ifblock == 1 or ifblock == -1:
305 condition = pmatch.group(1)
306 ifblock = solve_condition(condition, keys, defines, keyrex)
307 else:
308 ifblock = 2
309 continue
310
311 # Handle elseif
312 pmatch = elseifrex.match(line)
313 if pmatch:
314 if cmatch:
315 lastline = line
316 continue
317 if ifblock == -1:
318 print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
319 ifblock = 0
320
321 if ifblock == 1:
322 ifblock = 2
323 elif ifblock != 2:
324 condition = pmatch.group(1)
325 ifblock = solve_condition(condition, keys, defines, keyrex)
326 continue
327
328 # Handle else
329 pmatch = elserex.match(line)
330 if pmatch:
331 if cmatch:
332 lastline = line
333 continue
334 if ifblock == -1:
335 print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
336 ifblock = 0
337
338 if ifblock == 1:
339 ifblock = 2
340 elif ifblock == 0:
341 ifblock = 1
342 continue
343
344 # Handle endif
345 pmatch = endifrex.match(line)
346 if pmatch:
347 if cmatch:
348 lastline = line
349 continue
350 if ifblock == -1:
351 print("Error: #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr)
352 elif ifstack:
353 ifblock = ifstack.pop()
354 else:
355 ifblock = -1
356 continue
357
358 # Check for 'if' or 'else' that were not properly formed
359 pmatch = badifrex.match(line)
360 if pmatch:
361 print("Error: Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
362 if ifblock != -1:
363 ifstack.append(ifblock)
364
365 if ifblock == 1 or ifblock == -1:
366 ifblock = 0
367 else:
368 ifblock = 2
369 continue
370
371 pmatch = badelserex.match(line)
372 if pmatch:
373 print("Error: Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
374 ifblock = 2
375 continue
376
377 # Ignore all lines that are not satisfied by a conditional
378 if ifblock == 0 or ifblock == 2:
379 continue
380
381 # Handle include. Note that this code does not expect or
382 # handle 'if' blocks that cross file boundaries.
383 pmatch = includerex.match(line)
384 if pmatch:
385 if cmatch:
386 lastline = line
387 continue
388 inclfile = pmatch.group(1)
389 runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile)
390 continue
391
392 # Handle define (with value)
393 pmatch = definerex.match(line)
394 if pmatch:
395 if cmatch:
396 lastline = line
397 continue
398 condition = pmatch.group(1)
399
400 # Additional handling of definition w/parameters: #define X(a,b,c) ..."
401 rmatch = paramrex.match(condition)
402 if rmatch:
403 # 'condition' as a key into keyrex only needs to be unique.
404 # Use the definition word without everything in parentheses
405 condition = rmatch.group(1)
406
407 # 'pcondition' is the actual search regexp and must capture all
408 # the parameters individually for substitution
409
410 parameters = rmatch.group(2).split(',')
411
412 # Generate the regexp string to match comma-separate values
413 # Note that this is based on the cpp preprocessor, which
414 # apparently allows commas in arguments if surrounded by
415 # parentheses; e.g., "def(a, b, (c1,c2))". This is NOT
416 # handled.
417
418 pcondition = condition + '\('
419 for param in parameters[0:-1]:
420 pcondition += '(.*),'
421 pcondition += '(.*)\)'
422
423 # Generate the substitution string with group substitutions
424 pvalue = pmatch.group(2)
425 idx = 1
426 for param in parameters:
427 pvalue = pvalue.replace(param, '\g<' + str(idx) + '>')
428 idx = idx + 1
429
430 defines[condition] = pvalue
431 keyrex[condition] = re.compile(pcondition)
432 else:
433 parameters = []
434 value = pmatch.group(2)
435 # Note: Need to check for infinite recursion here, but it's tricky.
436 defines[condition] = value
437 keyrex[condition] = re.compile(condition)
438
439 if condition not in keys:
440 # Parameterized keys go to the front of the list
441 if parameters:
442 keys.insert(0, condition)
443 else:
444 keys.append(condition)
445 keys = sortkeys(keys)
446 continue
447
448 # Handle define (simple case, no value)
449 pmatch = defrex.match(line)
450 if pmatch:
451 if cmatch:
452 lastline = line
453 continue
454 condition = pmatch.group(1)
455 defines[condition] = '1'
456 keyrex[condition] = re.compile(condition)
457 if condition not in keys:
458 keys.append(condition)
459 keys = sortkeys(keys)
460 continue
461
462 # Handle undef
463 pmatch = undefrex.match(line)
464 if pmatch:
465 if cmatch:
466 lastline = line
467 continue
468 condition = pmatch.group(1)
469 if condition in keys:
470 defines.pop(condition)
471 keyrex.pop(condition)
472 keys.remove(condition)
473 continue
474
475 # Now do definition replacement on what's left (if anything)
476 # This must be done repeatedly from the top until there are no
477 # more substitutions to make.
478
479 while True:
480 origline = line
481 for keyword in keys:
482 newline = keyrex[keyword].sub(defines[keyword], line)
483 if newline != line:
484 line = newline
485 break
486
487 if line == origline:
488 break
489
490 # Output the line
491 print(line, file=ofile, end='')
492
493 if ifblock != -1 or ifstack != []:
494 print("Error: input file ended with an unterminated #if block.", file=sys.stderr)
495
496 if ifile != sys.stdin:
497 ifile.close()
498 return
499
500def printusage(progname):
501 print('Usage: ' + progname + ' input_file [output_file] [-options]')
502 print(' Options are:')
503 print(' -help Print this help text.')
Tim Edwards9134eed2021-02-13 14:42:59 -0500504 print(' -quiet Stop without error if input file is not found.')
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400505 print(' -ccomm Remove C comments in /* ... */ delimiters.')
506 print(' -D<def> Define word <def> and set its value to 1.')
507 print(' -D<def>=<val> Define word <def> and set its value to <val>.')
508 print(' -I<dir> Add <dir> to search path for input files.')
509 return
510
511if __name__ == '__main__':
512
513 # Parse command line for options and arguments
514 options = []
515 arguments = []
516 for item in sys.argv[1:]:
517 if item.find('-', 0) == 0:
518 options.append(item)
519 else:
520 arguments.append(item)
521
522 if len(arguments) > 0:
523 inputfile = arguments[0]
524 if len(arguments) > 1:
525 outputfile = arguments[1]
526 else:
527 outputfile = []
528 else:
529 printusage(sys.argv[0])
530 sys.exit(0)
531
532 defines = {}
533 keyrex = {}
534 keys = []
535 incdirs = []
536 ccomm = False
Tim Edwards9134eed2021-02-13 14:42:59 -0500537 quiet = False
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400538 for item in options:
539 result = item.split('=')
540 if result[0] == '-help':
541 printusage(sys.argv[0])
542 sys.exit(0)
543 elif result[0] == '-ccomm':
544 ccomm = True
Tim Edwards9134eed2021-02-13 14:42:59 -0500545 elif result[0] == '-quiet':
546 quiet = True
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400547 elif result[0][0:2] == '-I':
548 incdirs.append(result[0][2:])
549 elif result[0][0:2] == '-D':
550 keyword = result[0][2:]
551 try:
552 value = result[1]
553 except:
554 value = '1'
555 defines[keyword] = value
556 keyrex[keyword] = re.compile(keyword)
557 keys.append(keyword)
558 keys = sortkeys(keys)
559 else:
Tim Edwards9134eed2021-02-13 14:42:59 -0500560 print('Bad option ' + item + ', options are -help, -quiet, -ccomm, -D<def> -I<dir>\n')
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400561 sys.exit(1)
562
Tim Edwards9134eed2021-02-13 14:42:59 -0500563 if not os.path.isfile(inputfile):
564 if not quiet:
565 print("Error: No input file " + inputfile + " found.")
566 else:
567 sys.exit(0)
568
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400569 if outputfile:
570 ofile = open(outputfile, 'w')
571 else:
572 ofile = sys.stdout
573
574 if not ofile:
Tim Edwards9134eed2021-02-13 14:42:59 -0500575 print("Error: Cannot open file " + outputfile + " for writing.")
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400576 sys.exit(1)
577
578 # Sort keys so that if any definition contains another definition, the
579 # subset word is handled last; otherwise the subset word will get
580 # substituted, screwing up the definition names in which it occurs.
581
582 keys = sortkeys(keys)
583
584 runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile)
585 if ofile != sys.stdout:
586 ofile.close()
Tim Edwards10aa9ab2021-03-15 17:46:29 -0400587
588 # Set mode of outputfile to be equal to that of inputfile (if not stdout)
589 if outputfile:
590 statinfo = os.stat(inputfile)
591 mode = statinfo.st_mode
592 os.chmod(outputfile, mode)
593
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400594 sys.exit(0)