blob: 1fca5beffdbbd6af98a65fc0cfd38a9a583addce [file] [log] [blame]
Tim Edwards55f4d0e2020-07-05 15:41:02 -04001#!/usr/bin/env python3
2#--------------------------------------------------------------------
3#
4# preproc.py
5#
6# General purpose macro preprocessor
7#
8#--------------------------------------------------------------------
9# Usage:
10#
11# preproc.py input_file [output_file] [-D<variable> ...]
12#
13# Where <variable> may be a keyword or a key=value pair
14#
15# Syntax: Basically like cpp. However, this preprocessor handles
16# only a limited set of keywords, so it does not otherwise mangle
17# the file in the belief that it must be C code. Handling of boolean
18# relations is important, so these are thoroughly defined (see below)
19#
20# #if defined(<variable>) [...]
21# #ifdef <variable>
22# #ifndef <variable>
23# #elseif <variable>
24# #else
25# #endif
26#
27# #define <variable> [...]
28# #define <variable>(<parameters>) [...]
29# #undef <variable>
30#
31# #include <filename>
32#
33# <variable> may be
34# <keyword>
35# <keyword>=<value>
36#
37# <keyword> without '=' is effectively the same as <keyword>=1
38# Lack of a keyword is equivalent to <keyword>=0, in a conditional.
39#
40# Boolean operators (in order of precedence):
41# ! NOT
42# && AND
43# || OR
44#
45# Comments:
46# Most comments (C-like or Tcl-like) are output as-is. A
47# line beginning with "###" is treated as a preprocessor
48# comment and is not copied to the output.
49#
50# Examples;
51# #if defined(X) || defined(Y)
52# #else
53# #if defined(Z)
54# #endif
55#--------------------------------------------------------------------
56
57import re
58import sys
59
60def solve_statement(condition):
61
62 defrex = re.compile('defined[ \t]*\(([^\)]+)\)')
63 orrex = re.compile('(.+)\|\|(.+)')
64 andrex = re.compile('(.+)&&(.+)')
65 notrex = re.compile('!([^&\|]+)')
66 parenrex = re.compile('\(([^\)]+)\)')
67 leadspacerex = re.compile('^[ \t]+(.*)')
68 endspacerex = re.compile('(.*)[ \t]+$')
69
70 matchfound = True
71 while matchfound:
72 matchfound = False
73
74 # Search for defined(K) (K must be a single keyword)
75 # If the keyword was defined, then it should have been replaced by 1
76 lmatch = defrex.search(condition)
77 if lmatch:
78 key = lmatch.group(1)
79 if key == 1 or key == '1' or key == True:
80 repl = 1
81 else:
82 repl = 0
83
84 condition = defrex.sub(str(repl), condition)
85 matchfound = True
86
87 # Search for (X) recursively
88 lmatch = parenrex.search(condition)
89 if lmatch:
90 repl = solve_statement(lmatch.group(1))
91 condition = parenrex.sub(str(repl), condition)
92 matchfound = True
93
94 # Search for !X recursively
95 lmatch = notrex.search(condition)
96 if lmatch:
97 only = solve_statement(lmatch.group(1))
98 if only == '1':
99 repl = '0'
100 else:
101 repl = '1'
102 condition = notrex.sub(str(repl), condition)
103 matchfound = True
104
105 # Search for A&&B recursively
106 lmatch = andrex.search(condition)
107 if lmatch:
108 first = solve_statement(lmatch.group(1))
109 second = solve_statement(lmatch.group(2))
110 if first == '1' and second == '1':
111 repl = '1'
112 else:
113 repl = '0'
114 condition = andrex.sub(str(repl), condition)
115 matchfound = True
116
117 # Search for A||B recursively
118 lmatch = orrex.search(condition)
119 if lmatch:
120 first = solve_statement(lmatch.group(1))
121 second = solve_statement(lmatch.group(2))
122 if first == '1' or second == '1':
123 repl = '1'
124 else:
125 repl = '0'
126 condition = orrex.sub(str(repl), condition)
127 matchfound = True
128
129 # Remove whitespace
130 lmatch = leadspacerex.match(condition)
131 if lmatch:
132 condition = lmatch.group(1)
133 lmatch = endspacerex.match(condition)
134 if lmatch:
135 condition = lmatch.group(1)
136
137 return condition
138
139def solve_condition(condition, keys, defines, keyrex):
140 # Do definition replacement on the conditional
141 for keyword in keys:
142 condition = keyrex[keyword].sub(defines[keyword], condition)
143
144 value = solve_statement(condition)
145 if value == '1':
146 return 1
147 else:
148 return 0
149
150def sortkeys(keys):
151 newkeys = []
152 for i in range(0, len(keys)):
153 keyword = keys[i]
154 found = False
155 for j in range(0, len(newkeys)):
156 inword = newkeys[j]
157 if inword in keyword:
158 # Insert keyword before inword
159 newkeys.insert(j, keyword)
160 found = True
161 break
162 if not found:
163 newkeys.append(keyword)
164 return newkeys
165
166def runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile):
167
168 includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)')
169 definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)')
170 paramrex = re.compile('^([^\(]+)\(([^\)]+)\)')
171 defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)')
172 undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)')
173 ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)')
174 ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)')
175 ifrex = re.compile('^[ \t]*#if[ \t]+(.+)')
176 elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)')
177 elserex = re.compile('^[ \t]*#else')
178 endifrex = re.compile('^[ \t]*#endif')
179 commentrex = re.compile('^###[^#]*$')
180 ccstartrex = re.compile('/\*') # C-style comment start
181 ccendrex = re.compile('\*/') # C-style comment end
182 contrex = re.compile('.*\\\\$') # Backslash continuation line
183
184 badifrex = re.compile('^[ \t]*#if[ \t]*.*')
185 badelserex = re.compile('^[ \t]*#else[ \t]*.*')
186
187 # This code is not designed to operate on huge files. Neither is it designed to be
188 # efficient.
189
190 # ifblock state:
191 # -1 : not in an if/else block
192 # 0 : no condition satisfied yet
193 # 1 : condition satisfied
194 # 2 : condition was handled, waiting for endif
195
196 ifile = False
197 try:
198 ifile = open(inputfile, 'r')
199 except FileNotFoundError:
200 for dir in incdirs:
201 try:
202 ifile = open(dir + '/' + inputfile, 'r')
203 except FileNotFoundError:
204 pass
205 else:
206 break
207
208 if not ifile:
209 print("Error: Cannot open file " + inputfile + " for reading.\n", file=sys.stderr)
210 return
211
212 ccblock = -1
213 ifblock = -1
214 ifstack = []
215 lineno = 0
216
217 filetext = ifile.readlines()
218 lastline = []
219
220 for line in filetext:
221 lineno += 1
222
223 # C-style comments override everything else
224 if ccomm:
225 if ccblock == -1:
226 pmatch = ccstartrex.search(line)
227 if pmatch:
228 ematch = ccendrex.search(line[pmatch.end(0):])
229 if ematch:
230 line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):]
231 else:
232 line = line[0:pmatch.start(0)]
233 ccblock = 1
234 elif ccblock == 1:
235 ematch = ccendrex.search(line)
236 if ematch:
237 line = line[ematch.end(0)+2:]
238 ccblock = -1
239 else:
240 continue
241
242 # Handle continuation detected in previous line
243 if lastline:
244 # Note: Apparently there is a character retained after the backslash,
245 # so strip the last two characters from the line.
246 line = lastline[0:-2] + line
247 lastline = []
248
249 # Continuation lines have the next highest priority. However, this
250 # script will attempt to keep continuation lines in the body of the
251 # text and only collapse lines where continuation lines occur in
252 # a preprocessor statement.
253
254 cmatch = contrex.match(line)
255
256 # Ignore lines beginning with "###"
257 pmatch = commentrex.match(line)
258 if pmatch:
259 continue
260
261 # Handle ifdef
262 pmatch = ifdefrex.match(line)
263 if pmatch:
264 if cmatch:
265 lastline = line
266 continue
267 if ifblock != -1:
268 ifstack.append(ifblock)
269
270 if ifblock == 1 or ifblock == -1:
271 condition = pmatch.group(1)
272 ifblock = solve_condition(condition, keys, defines, keyrex)
273 else:
274 ifblock = 2
275 continue
276
277 # Handle ifndef
278 pmatch = ifndefrex.match(line)
279 if pmatch:
280 if cmatch:
281 lastline = line
282 continue
283 if ifblock != -1:
284 ifstack.append(ifblock)
285
286 if ifblock == 1 or ifblock == -1:
287 condition = pmatch.group(1)
288 ifblock = solve_condition(condition, keys, defines, keyrex)
289 ifblock = 1 if ifblock == 0 else 0
290 else:
291 ifblock = 2
292 continue
293
294 # Handle if
295 pmatch = ifrex.match(line)
296 if pmatch:
297 if cmatch:
298 lastline = line
299 continue
300 if ifblock != -1:
301 ifstack.append(ifblock)
302
303 if ifblock == 1 or ifblock == -1:
304 condition = pmatch.group(1)
305 ifblock = solve_condition(condition, keys, defines, keyrex)
306 else:
307 ifblock = 2
308 continue
309
310 # Handle elseif
311 pmatch = elseifrex.match(line)
312 if pmatch:
313 if cmatch:
314 lastline = line
315 continue
316 if ifblock == -1:
317 print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
318 ifblock = 0
319
320 if ifblock == 1:
321 ifblock = 2
322 elif ifblock != 2:
323 condition = pmatch.group(1)
324 ifblock = solve_condition(condition, keys, defines, keyrex)
325 continue
326
327 # Handle else
328 pmatch = elserex.match(line)
329 if pmatch:
330 if cmatch:
331 lastline = line
332 continue
333 if ifblock == -1:
334 print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
335 ifblock = 0
336
337 if ifblock == 1:
338 ifblock = 2
339 elif ifblock == 0:
340 ifblock = 1
341 continue
342
343 # Handle endif
344 pmatch = endifrex.match(line)
345 if pmatch:
346 if cmatch:
347 lastline = line
348 continue
349 if ifblock == -1:
350 print("Error: #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr)
351 elif ifstack:
352 ifblock = ifstack.pop()
353 else:
354 ifblock = -1
355 continue
356
357 # Check for 'if' or 'else' that were not properly formed
358 pmatch = badifrex.match(line)
359 if pmatch:
360 print("Error: Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
361 if ifblock != -1:
362 ifstack.append(ifblock)
363
364 if ifblock == 1 or ifblock == -1:
365 ifblock = 0
366 else:
367 ifblock = 2
368 continue
369
370 pmatch = badelserex.match(line)
371 if pmatch:
372 print("Error: Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
373 ifblock = 2
374 continue
375
376 # Ignore all lines that are not satisfied by a conditional
377 if ifblock == 0 or ifblock == 2:
378 continue
379
380 # Handle include. Note that this code does not expect or
381 # handle 'if' blocks that cross file boundaries.
382 pmatch = includerex.match(line)
383 if pmatch:
384 if cmatch:
385 lastline = line
386 continue
387 inclfile = pmatch.group(1)
388 runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile)
389 continue
390
391 # Handle define (with value)
392 pmatch = definerex.match(line)
393 if pmatch:
394 if cmatch:
395 lastline = line
396 continue
397 condition = pmatch.group(1)
398
399 # Additional handling of definition w/parameters: #define X(a,b,c) ..."
400 rmatch = paramrex.match(condition)
401 if rmatch:
402 # 'condition' as a key into keyrex only needs to be unique.
403 # Use the definition word without everything in parentheses
404 condition = rmatch.group(1)
405
406 # 'pcondition' is the actual search regexp and must capture all
407 # the parameters individually for substitution
408
409 parameters = rmatch.group(2).split(',')
410
411 # Generate the regexp string to match comma-separate values
412 # Note that this is based on the cpp preprocessor, which
413 # apparently allows commas in arguments if surrounded by
414 # parentheses; e.g., "def(a, b, (c1,c2))". This is NOT
415 # handled.
416
417 pcondition = condition + '\('
418 for param in parameters[0:-1]:
419 pcondition += '(.*),'
420 pcondition += '(.*)\)'
421
422 # Generate the substitution string with group substitutions
423 pvalue = pmatch.group(2)
424 idx = 1
425 for param in parameters:
426 pvalue = pvalue.replace(param, '\g<' + str(idx) + '>')
427 idx = idx + 1
428
429 defines[condition] = pvalue
430 keyrex[condition] = re.compile(pcondition)
431 else:
432 parameters = []
433 value = pmatch.group(2)
434 # Note: Need to check for infinite recursion here, but it's tricky.
435 defines[condition] = value
436 keyrex[condition] = re.compile(condition)
437
438 if condition not in keys:
439 # Parameterized keys go to the front of the list
440 if parameters:
441 keys.insert(0, condition)
442 else:
443 keys.append(condition)
444 keys = sortkeys(keys)
445 continue
446
447 # Handle define (simple case, no value)
448 pmatch = defrex.match(line)
449 if pmatch:
450 if cmatch:
451 lastline = line
452 continue
453 condition = pmatch.group(1)
454 defines[condition] = '1'
455 keyrex[condition] = re.compile(condition)
456 if condition not in keys:
457 keys.append(condition)
458 keys = sortkeys(keys)
459 continue
460
461 # Handle undef
462 pmatch = undefrex.match(line)
463 if pmatch:
464 if cmatch:
465 lastline = line
466 continue
467 condition = pmatch.group(1)
468 if condition in keys:
469 defines.pop(condition)
470 keyrex.pop(condition)
471 keys.remove(condition)
472 continue
473
474 # Now do definition replacement on what's left (if anything)
475 # This must be done repeatedly from the top until there are no
476 # more substitutions to make.
477
478 while True:
479 origline = line
480 for keyword in keys:
481 newline = keyrex[keyword].sub(defines[keyword], line)
482 if newline != line:
483 line = newline
484 break
485
486 if line == origline:
487 break
488
489 # Output the line
490 print(line, file=ofile, end='')
491
492 if ifblock != -1 or ifstack != []:
493 print("Error: input file ended with an unterminated #if block.", file=sys.stderr)
494
495 if ifile != sys.stdin:
496 ifile.close()
497 return
498
499def printusage(progname):
500 print('Usage: ' + progname + ' input_file [output_file] [-options]')
501 print(' Options are:')
502 print(' -help Print this help text.')
503 print(' -ccomm Remove C comments in /* ... */ delimiters.')
504 print(' -D<def> Define word <def> and set its value to 1.')
505 print(' -D<def>=<val> Define word <def> and set its value to <val>.')
506 print(' -I<dir> Add <dir> to search path for input files.')
507 return
508
509if __name__ == '__main__':
510
511 # Parse command line for options and arguments
512 options = []
513 arguments = []
514 for item in sys.argv[1:]:
515 if item.find('-', 0) == 0:
516 options.append(item)
517 else:
518 arguments.append(item)
519
520 if len(arguments) > 0:
521 inputfile = arguments[0]
522 if len(arguments) > 1:
523 outputfile = arguments[1]
524 else:
525 outputfile = []
526 else:
527 printusage(sys.argv[0])
528 sys.exit(0)
529
530 defines = {}
531 keyrex = {}
532 keys = []
533 incdirs = []
534 ccomm = False
535 for item in options:
536 result = item.split('=')
537 if result[0] == '-help':
538 printusage(sys.argv[0])
539 sys.exit(0)
540 elif result[0] == '-ccomm':
541 ccomm = True
542 elif result[0][0:2] == '-I':
543 incdirs.append(result[0][2:])
544 elif result[0][0:2] == '-D':
545 keyword = result[0][2:]
546 try:
547 value = result[1]
548 except:
549 value = '1'
550 defines[keyword] = value
551 keyrex[keyword] = re.compile(keyword)
552 keys.append(keyword)
553 keys = sortkeys(keys)
554 else:
555 print('Bad option ' + item + ', options are -help, -ccomm, -D<def> -I<dir>\n')
556 sys.exit(1)
557
558 if outputfile:
559 ofile = open(outputfile, 'w')
560 else:
561 ofile = sys.stdout
562
563 if not ofile:
564 print("Error: Cannot open file " + output_file + " for writing.")
565 sys.exit(1)
566
567 # Sort keys so that if any definition contains another definition, the
568 # subset word is handled last; otherwise the subset word will get
569 # substituted, screwing up the definition names in which it occurs.
570
571 keys = sortkeys(keys)
572
573 runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile)
574 if ofile != sys.stdout:
575 ofile.close()
576 sys.exit(0)