blob: 15f0ecf9fa42bfe3c33643ce4b4945d09c632670 [file] [log] [blame]
Tim Edwards55f4d0e2020-07-05 15:41:02 -04001#!/usr/bin/env python3
2#--------------------------------------------------------------------
3#
4# preproc.py
5#
6# General purpose macro preprocessor
7#
8#--------------------------------------------------------------------
9# Usage:
10#
11# preproc.py input_file [output_file] [-D<variable> ...]
12#
13# Where <variable> may be a keyword or a key=value pair
14#
15# Syntax: Basically like cpp. However, this preprocessor handles
16# only a limited set of keywords, so it does not otherwise mangle
17# the file in the belief that it must be C code. Handling of boolean
18# relations is important, so these are thoroughly defined (see below)
19#
20# #if defined(<variable>) [...]
21# #ifdef <variable>
22# #ifndef <variable>
23# #elseif <variable>
24# #else
25# #endif
26#
27# #define <variable> [...]
28# #define <variable>(<parameters>) [...]
29# #undef <variable>
30#
31# #include <filename>
32#
33# <variable> may be
34# <keyword>
35# <keyword>=<value>
36#
37# <keyword> without '=' is effectively the same as <keyword>=1
38# Lack of a keyword is equivalent to <keyword>=0, in a conditional.
39#
40# Boolean operators (in order of precedence):
41# ! NOT
42# && AND
43# || OR
44#
45# Comments:
46# Most comments (C-like or Tcl-like) are output as-is. A
47# line beginning with "###" is treated as a preprocessor
48# comment and is not copied to the output.
49#
50# Examples;
51# #if defined(X) || defined(Y)
52# #else
53# #if defined(Z)
54# #endif
55#--------------------------------------------------------------------
56
Tim Edwards9134eed2021-02-13 14:42:59 -050057import os
Tim Edwards55f4d0e2020-07-05 15:41:02 -040058import re
59import sys
60
61def solve_statement(condition):
62
63 defrex = re.compile('defined[ \t]*\(([^\)]+)\)')
64 orrex = re.compile('(.+)\|\|(.+)')
65 andrex = re.compile('(.+)&&(.+)')
66 notrex = re.compile('!([^&\|]+)')
67 parenrex = re.compile('\(([^\)]+)\)')
68 leadspacerex = re.compile('^[ \t]+(.*)')
69 endspacerex = re.compile('(.*)[ \t]+$')
70
71 matchfound = True
72 while matchfound:
73 matchfound = False
74
75 # Search for defined(K) (K must be a single keyword)
76 # If the keyword was defined, then it should have been replaced by 1
77 lmatch = defrex.search(condition)
78 if lmatch:
79 key = lmatch.group(1)
80 if key == 1 or key == '1' or key == True:
81 repl = 1
82 else:
83 repl = 0
84
85 condition = defrex.sub(str(repl), condition)
86 matchfound = True
87
88 # Search for (X) recursively
89 lmatch = parenrex.search(condition)
90 if lmatch:
91 repl = solve_statement(lmatch.group(1))
92 condition = parenrex.sub(str(repl), condition)
93 matchfound = True
94
95 # Search for !X recursively
96 lmatch = notrex.search(condition)
97 if lmatch:
98 only = solve_statement(lmatch.group(1))
99 if only == '1':
100 repl = '0'
101 else:
102 repl = '1'
103 condition = notrex.sub(str(repl), condition)
104 matchfound = True
105
106 # Search for A&&B recursively
107 lmatch = andrex.search(condition)
108 if lmatch:
109 first = solve_statement(lmatch.group(1))
110 second = solve_statement(lmatch.group(2))
111 if first == '1' and second == '1':
112 repl = '1'
113 else:
114 repl = '0'
115 condition = andrex.sub(str(repl), condition)
116 matchfound = True
117
118 # Search for A||B recursively
119 lmatch = orrex.search(condition)
120 if lmatch:
121 first = solve_statement(lmatch.group(1))
122 second = solve_statement(lmatch.group(2))
123 if first == '1' or second == '1':
124 repl = '1'
125 else:
126 repl = '0'
127 condition = orrex.sub(str(repl), condition)
128 matchfound = True
129
130 # Remove whitespace
131 lmatch = leadspacerex.match(condition)
132 if lmatch:
133 condition = lmatch.group(1)
134 lmatch = endspacerex.match(condition)
135 if lmatch:
136 condition = lmatch.group(1)
137
138 return condition
139
140def solve_condition(condition, keys, defines, keyrex):
141 # Do definition replacement on the conditional
142 for keyword in keys:
143 condition = keyrex[keyword].sub(defines[keyword], condition)
144
145 value = solve_statement(condition)
146 if value == '1':
147 return 1
148 else:
149 return 0
150
151def sortkeys(keys):
152 newkeys = []
153 for i in range(0, len(keys)):
154 keyword = keys[i]
155 found = False
156 for j in range(0, len(newkeys)):
157 inword = newkeys[j]
158 if inword in keyword:
159 # Insert keyword before inword
160 newkeys.insert(j, keyword)
161 found = True
162 break
163 if not found:
164 newkeys.append(keyword)
165 return newkeys
166
Tim Edwardsbf1da952021-12-21 15:41:31 -0500167def runpp(keys, keyrex, defines, ccomm, utf, incdirs, inputfile, ofile):
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400168
169 includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)')
170 definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)')
171 paramrex = re.compile('^([^\(]+)\(([^\)]+)\)')
172 defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)')
173 undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)')
174 ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)')
175 ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)')
176 ifrex = re.compile('^[ \t]*#if[ \t]+(.+)')
177 elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)')
178 elserex = re.compile('^[ \t]*#else')
179 endifrex = re.compile('^[ \t]*#endif')
180 commentrex = re.compile('^###[^#]*$')
181 ccstartrex = re.compile('/\*') # C-style comment start
182 ccendrex = re.compile('\*/') # C-style comment end
183 contrex = re.compile('.*\\\\$') # Backslash continuation line
184
185 badifrex = re.compile('^[ \t]*#if[ \t]*.*')
186 badelserex = re.compile('^[ \t]*#else[ \t]*.*')
187
188 # This code is not designed to operate on huge files. Neither is it designed to be
189 # efficient.
190
191 # ifblock state:
192 # -1 : not in an if/else block
193 # 0 : no condition satisfied yet
194 # 1 : condition satisfied
195 # 2 : condition was handled, waiting for endif
196
197 ifile = False
198 try:
Tim Edwardsbf1da952021-12-21 15:41:31 -0500199 if not utf:
200 ifile = open(inputfile, 'r')
201 else:
202 ifile = open(inputfile, 'r', encoding='utf-8', errors='replace')
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400203 except FileNotFoundError:
204 for dir in incdirs:
205 try:
206 ifile = open(dir + '/' + inputfile, 'r')
207 except FileNotFoundError:
208 pass
209 else:
210 break
211
212 if not ifile:
213 print("Error: Cannot open file " + inputfile + " for reading.\n", file=sys.stderr)
214 return
215
216 ccblock = -1
217 ifblock = -1
218 ifstack = []
219 lineno = 0
220
221 filetext = ifile.readlines()
222 lastline = []
223
224 for line in filetext:
225 lineno += 1
226
227 # C-style comments override everything else
228 if ccomm:
229 if ccblock == -1:
230 pmatch = ccstartrex.search(line)
231 if pmatch:
232 ematch = ccendrex.search(line[pmatch.end(0):])
233 if ematch:
234 line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):]
235 else:
236 line = line[0:pmatch.start(0)]
237 ccblock = 1
238 elif ccblock == 1:
239 ematch = ccendrex.search(line)
240 if ematch:
241 line = line[ematch.end(0)+2:]
242 ccblock = -1
243 else:
244 continue
245
246 # Handle continuation detected in previous line
247 if lastline:
248 # Note: Apparently there is a character retained after the backslash,
249 # so strip the last two characters from the line.
250 line = lastline[0:-2] + line
251 lastline = []
252
253 # Continuation lines have the next highest priority. However, this
254 # script will attempt to keep continuation lines in the body of the
255 # text and only collapse lines where continuation lines occur in
256 # a preprocessor statement.
257
258 cmatch = contrex.match(line)
259
260 # Ignore lines beginning with "###"
261 pmatch = commentrex.match(line)
262 if pmatch:
263 continue
264
265 # Handle ifdef
266 pmatch = ifdefrex.match(line)
267 if pmatch:
268 if cmatch:
269 lastline = line
270 continue
271 if ifblock != -1:
272 ifstack.append(ifblock)
273
274 if ifblock == 1 or ifblock == -1:
275 condition = pmatch.group(1)
276 ifblock = solve_condition(condition, keys, defines, keyrex)
277 else:
278 ifblock = 2
279 continue
280
281 # Handle ifndef
282 pmatch = ifndefrex.match(line)
283 if pmatch:
284 if cmatch:
285 lastline = line
286 continue
287 if ifblock != -1:
288 ifstack.append(ifblock)
289
290 if ifblock == 1 or ifblock == -1:
291 condition = pmatch.group(1)
292 ifblock = solve_condition(condition, keys, defines, keyrex)
293 ifblock = 1 if ifblock == 0 else 0
294 else:
295 ifblock = 2
296 continue
297
298 # Handle if
299 pmatch = ifrex.match(line)
300 if pmatch:
301 if cmatch:
302 lastline = line
303 continue
304 if ifblock != -1:
305 ifstack.append(ifblock)
306
307 if ifblock == 1 or ifblock == -1:
308 condition = pmatch.group(1)
309 ifblock = solve_condition(condition, keys, defines, keyrex)
310 else:
311 ifblock = 2
312 continue
313
314 # Handle elseif
315 pmatch = elseifrex.match(line)
316 if pmatch:
317 if cmatch:
318 lastline = line
319 continue
320 if ifblock == -1:
321 print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
322 ifblock = 0
323
324 if ifblock == 1:
325 ifblock = 2
326 elif ifblock != 2:
327 condition = pmatch.group(1)
328 ifblock = solve_condition(condition, keys, defines, keyrex)
329 continue
330
331 # Handle else
332 pmatch = elserex.match(line)
333 if pmatch:
334 if cmatch:
335 lastline = line
336 continue
337 if ifblock == -1:
338 print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
339 ifblock = 0
340
341 if ifblock == 1:
342 ifblock = 2
343 elif ifblock == 0:
344 ifblock = 1
345 continue
346
347 # Handle endif
348 pmatch = endifrex.match(line)
349 if pmatch:
350 if cmatch:
351 lastline = line
352 continue
353 if ifblock == -1:
354 print("Error: #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr)
355 elif ifstack:
356 ifblock = ifstack.pop()
357 else:
358 ifblock = -1
359 continue
360
361 # Check for 'if' or 'else' that were not properly formed
362 pmatch = badifrex.match(line)
363 if pmatch:
364 print("Error: Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
365 if ifblock != -1:
366 ifstack.append(ifblock)
367
368 if ifblock == 1 or ifblock == -1:
369 ifblock = 0
370 else:
371 ifblock = 2
372 continue
373
374 pmatch = badelserex.match(line)
375 if pmatch:
376 print("Error: Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
377 ifblock = 2
378 continue
379
380 # Ignore all lines that are not satisfied by a conditional
381 if ifblock == 0 or ifblock == 2:
382 continue
383
384 # Handle include. Note that this code does not expect or
385 # handle 'if' blocks that cross file boundaries.
386 pmatch = includerex.match(line)
387 if pmatch:
388 if cmatch:
389 lastline = line
390 continue
391 inclfile = pmatch.group(1)
392 runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile)
393 continue
394
395 # Handle define (with value)
396 pmatch = definerex.match(line)
397 if pmatch:
398 if cmatch:
399 lastline = line
400 continue
401 condition = pmatch.group(1)
402
403 # Additional handling of definition w/parameters: #define X(a,b,c) ..."
404 rmatch = paramrex.match(condition)
405 if rmatch:
406 # 'condition' as a key into keyrex only needs to be unique.
407 # Use the definition word without everything in parentheses
408 condition = rmatch.group(1)
409
410 # 'pcondition' is the actual search regexp and must capture all
411 # the parameters individually for substitution
412
413 parameters = rmatch.group(2).split(',')
414
415 # Generate the regexp string to match comma-separate values
416 # Note that this is based on the cpp preprocessor, which
417 # apparently allows commas in arguments if surrounded by
418 # parentheses; e.g., "def(a, b, (c1,c2))". This is NOT
419 # handled.
420
421 pcondition = condition + '\('
422 for param in parameters[0:-1]:
423 pcondition += '(.*),'
424 pcondition += '(.*)\)'
425
426 # Generate the substitution string with group substitutions
427 pvalue = pmatch.group(2)
428 idx = 1
429 for param in parameters:
430 pvalue = pvalue.replace(param, '\g<' + str(idx) + '>')
431 idx = idx + 1
432
433 defines[condition] = pvalue
434 keyrex[condition] = re.compile(pcondition)
435 else:
436 parameters = []
437 value = pmatch.group(2)
438 # Note: Need to check for infinite recursion here, but it's tricky.
439 defines[condition] = value
440 keyrex[condition] = re.compile(condition)
441
442 if condition not in keys:
443 # Parameterized keys go to the front of the list
444 if parameters:
445 keys.insert(0, condition)
446 else:
447 keys.append(condition)
448 keys = sortkeys(keys)
449 continue
450
451 # Handle define (simple case, no value)
452 pmatch = defrex.match(line)
453 if pmatch:
454 if cmatch:
455 lastline = line
456 continue
457 condition = pmatch.group(1)
458 defines[condition] = '1'
459 keyrex[condition] = re.compile(condition)
460 if condition not in keys:
461 keys.append(condition)
462 keys = sortkeys(keys)
463 continue
464
465 # Handle undef
466 pmatch = undefrex.match(line)
467 if pmatch:
468 if cmatch:
469 lastline = line
470 continue
471 condition = pmatch.group(1)
472 if condition in keys:
473 defines.pop(condition)
474 keyrex.pop(condition)
475 keys.remove(condition)
476 continue
477
478 # Now do definition replacement on what's left (if anything)
479 # This must be done repeatedly from the top until there are no
480 # more substitutions to make.
481
482 while True:
483 origline = line
484 for keyword in keys:
485 newline = keyrex[keyword].sub(defines[keyword], line)
486 if newline != line:
487 line = newline
488 break
489
490 if line == origline:
491 break
492
493 # Output the line
Tim Edwardsbf1da952021-12-21 15:41:31 -0500494 if not utf:
495 print(line, file=ofile, end='')
496 else:
497 ofile.write(line.encode('utf-8'))
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400498
499 if ifblock != -1 or ifstack != []:
500 print("Error: input file ended with an unterminated #if block.", file=sys.stderr)
501
502 if ifile != sys.stdin:
503 ifile.close()
504 return
505
506def printusage(progname):
507 print('Usage: ' + progname + ' input_file [output_file] [-options]')
508 print(' Options are:')
509 print(' -help Print this help text.')
Tim Edwards9134eed2021-02-13 14:42:59 -0500510 print(' -quiet Stop without error if input file is not found.')
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400511 print(' -ccomm Remove C comments in /* ... */ delimiters.')
512 print(' -D<def> Define word <def> and set its value to 1.')
513 print(' -D<def>=<val> Define word <def> and set its value to <val>.')
514 print(' -I<dir> Add <dir> to search path for input files.')
515 return
516
517if __name__ == '__main__':
518
519 # Parse command line for options and arguments
520 options = []
521 arguments = []
522 for item in sys.argv[1:]:
523 if item.find('-', 0) == 0:
524 options.append(item)
525 else:
526 arguments.append(item)
527
528 if len(arguments) > 0:
529 inputfile = arguments[0]
530 if len(arguments) > 1:
531 outputfile = arguments[1]
532 else:
533 outputfile = []
534 else:
535 printusage(sys.argv[0])
536 sys.exit(0)
537
538 defines = {}
539 keyrex = {}
540 keys = []
541 incdirs = []
542 ccomm = False
Tim Edwards9134eed2021-02-13 14:42:59 -0500543 quiet = False
Tim Edwardsbf1da952021-12-21 15:41:31 -0500544 utf = False
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400545 for item in options:
546 result = item.split('=')
547 if result[0] == '-help':
548 printusage(sys.argv[0])
549 sys.exit(0)
550 elif result[0] == '-ccomm':
551 ccomm = True
Tim Edwards9134eed2021-02-13 14:42:59 -0500552 elif result[0] == '-quiet':
553 quiet = True
Tim Edwardsbf1da952021-12-21 15:41:31 -0500554 elif result[0] == '-utf8':
555 utf = True
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400556 elif result[0][0:2] == '-I':
557 incdirs.append(result[0][2:])
558 elif result[0][0:2] == '-D':
559 keyword = result[0][2:]
560 try:
561 value = result[1]
562 except:
563 value = '1'
564 defines[keyword] = value
565 keyrex[keyword] = re.compile(keyword)
566 keys.append(keyword)
567 keys = sortkeys(keys)
568 else:
Tim Edwardsbf1da952021-12-21 15:41:31 -0500569 print('Bad option ' + item + ', options are -help, -quiet, -ccomm, -utf8, -D<def> -I<dir>\n')
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400570 sys.exit(1)
571
Tim Edwards9134eed2021-02-13 14:42:59 -0500572 if not os.path.isfile(inputfile):
573 if not quiet:
574 print("Error: No input file " + inputfile + " found.")
575 else:
576 sys.exit(0)
577
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400578 if outputfile:
Tim Edwardsbf1da952021-12-21 15:41:31 -0500579 if not utf:
580 ofile = open(outputfile, 'w')
581 else:
582 ofile = open(outputfile, 'wb')
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400583 else:
584 ofile = sys.stdout
585
586 if not ofile:
Tim Edwards9134eed2021-02-13 14:42:59 -0500587 print("Error: Cannot open file " + outputfile + " for writing.")
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400588 sys.exit(1)
589
590 # Sort keys so that if any definition contains another definition, the
591 # subset word is handled last; otherwise the subset word will get
592 # substituted, screwing up the definition names in which it occurs.
593
594 keys = sortkeys(keys)
595
Tim Edwardsbf1da952021-12-21 15:41:31 -0500596 runpp(keys, keyrex, defines, ccomm, utf, incdirs, inputfile, ofile)
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400597 if ofile != sys.stdout:
598 ofile.close()
Tim Edwards10aa9ab2021-03-15 17:46:29 -0400599
600 # Set mode of outputfile to be equal to that of inputfile (if not stdout)
601 if outputfile:
602 statinfo = os.stat(inputfile)
603 mode = statinfo.st_mode
604 os.chmod(outputfile, mode)
605
Tim Edwards55f4d0e2020-07-05 15:41:02 -0400606 sys.exit(0)