blob: 15f0ecf9fa42bfe3c33643ce4b4945d09c632670 [file] [log] [blame]
#!/usr/bin/env python3
#--------------------------------------------------------------------
#
# preproc.py
#
# General purpose macro preprocessor
#
#--------------------------------------------------------------------
# Usage:
#
# preproc.py input_file [output_file] [-D<variable> ...]
#
# Where <variable> may be a keyword or a key=value pair
#
# Syntax: Basically like cpp. However, this preprocessor handles
# only a limited set of keywords, so it does not otherwise mangle
# the file in the belief that it must be C code. Handling of boolean
# relations is important, so these are thoroughly defined (see below)
#
# #if defined(<variable>) [...]
# #ifdef <variable>
# #ifndef <variable>
# #elseif <variable>
# #else
# #endif
#
# #define <variable> [...]
# #define <variable>(<parameters>) [...]
# #undef <variable>
#
# #include <filename>
#
# <variable> may be
# <keyword>
# <keyword>=<value>
#
# <keyword> without '=' is effectively the same as <keyword>=1
# Lack of a keyword is equivalent to <keyword>=0, in a conditional.
#
# Boolean operators (in order of precedence):
# ! NOT
# && AND
# || OR
#
# Comments:
# Most comments (C-like or Tcl-like) are output as-is. A
# line beginning with "###" is treated as a preprocessor
# comment and is not copied to the output.
#
# Examples;
# #if defined(X) || defined(Y)
# #else
# #if defined(Z)
# #endif
#--------------------------------------------------------------------
import os
import re
import sys
def solve_statement(condition):
defrex = re.compile('defined[ \t]*\(([^\)]+)\)')
orrex = re.compile('(.+)\|\|(.+)')
andrex = re.compile('(.+)&&(.+)')
notrex = re.compile('!([^&\|]+)')
parenrex = re.compile('\(([^\)]+)\)')
leadspacerex = re.compile('^[ \t]+(.*)')
endspacerex = re.compile('(.*)[ \t]+$')
matchfound = True
while matchfound:
matchfound = False
# Search for defined(K) (K must be a single keyword)
# If the keyword was defined, then it should have been replaced by 1
lmatch = defrex.search(condition)
if lmatch:
key = lmatch.group(1)
if key == 1 or key == '1' or key == True:
repl = 1
else:
repl = 0
condition = defrex.sub(str(repl), condition)
matchfound = True
# Search for (X) recursively
lmatch = parenrex.search(condition)
if lmatch:
repl = solve_statement(lmatch.group(1))
condition = parenrex.sub(str(repl), condition)
matchfound = True
# Search for !X recursively
lmatch = notrex.search(condition)
if lmatch:
only = solve_statement(lmatch.group(1))
if only == '1':
repl = '0'
else:
repl = '1'
condition = notrex.sub(str(repl), condition)
matchfound = True
# Search for A&&B recursively
lmatch = andrex.search(condition)
if lmatch:
first = solve_statement(lmatch.group(1))
second = solve_statement(lmatch.group(2))
if first == '1' and second == '1':
repl = '1'
else:
repl = '0'
condition = andrex.sub(str(repl), condition)
matchfound = True
# Search for A||B recursively
lmatch = orrex.search(condition)
if lmatch:
first = solve_statement(lmatch.group(1))
second = solve_statement(lmatch.group(2))
if first == '1' or second == '1':
repl = '1'
else:
repl = '0'
condition = orrex.sub(str(repl), condition)
matchfound = True
# Remove whitespace
lmatch = leadspacerex.match(condition)
if lmatch:
condition = lmatch.group(1)
lmatch = endspacerex.match(condition)
if lmatch:
condition = lmatch.group(1)
return condition
def solve_condition(condition, keys, defines, keyrex):
# Do definition replacement on the conditional
for keyword in keys:
condition = keyrex[keyword].sub(defines[keyword], condition)
value = solve_statement(condition)
if value == '1':
return 1
else:
return 0
def sortkeys(keys):
newkeys = []
for i in range(0, len(keys)):
keyword = keys[i]
found = False
for j in range(0, len(newkeys)):
inword = newkeys[j]
if inword in keyword:
# Insert keyword before inword
newkeys.insert(j, keyword)
found = True
break
if not found:
newkeys.append(keyword)
return newkeys
def runpp(keys, keyrex, defines, ccomm, utf, incdirs, inputfile, ofile):
includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)')
definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)')
paramrex = re.compile('^([^\(]+)\(([^\)]+)\)')
defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)')
undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)')
ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)')
ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)')
ifrex = re.compile('^[ \t]*#if[ \t]+(.+)')
elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)')
elserex = re.compile('^[ \t]*#else')
endifrex = re.compile('^[ \t]*#endif')
commentrex = re.compile('^###[^#]*$')
ccstartrex = re.compile('/\*') # C-style comment start
ccendrex = re.compile('\*/') # C-style comment end
contrex = re.compile('.*\\\\$') # Backslash continuation line
badifrex = re.compile('^[ \t]*#if[ \t]*.*')
badelserex = re.compile('^[ \t]*#else[ \t]*.*')
# This code is not designed to operate on huge files. Neither is it designed to be
# efficient.
# ifblock state:
# -1 : not in an if/else block
# 0 : no condition satisfied yet
# 1 : condition satisfied
# 2 : condition was handled, waiting for endif
ifile = False
try:
if not utf:
ifile = open(inputfile, 'r')
else:
ifile = open(inputfile, 'r', encoding='utf-8', errors='replace')
except FileNotFoundError:
for dir in incdirs:
try:
ifile = open(dir + '/' + inputfile, 'r')
except FileNotFoundError:
pass
else:
break
if not ifile:
print("Error: Cannot open file " + inputfile + " for reading.\n", file=sys.stderr)
return
ccblock = -1
ifblock = -1
ifstack = []
lineno = 0
filetext = ifile.readlines()
lastline = []
for line in filetext:
lineno += 1
# C-style comments override everything else
if ccomm:
if ccblock == -1:
pmatch = ccstartrex.search(line)
if pmatch:
ematch = ccendrex.search(line[pmatch.end(0):])
if ematch:
line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):]
else:
line = line[0:pmatch.start(0)]
ccblock = 1
elif ccblock == 1:
ematch = ccendrex.search(line)
if ematch:
line = line[ematch.end(0)+2:]
ccblock = -1
else:
continue
# Handle continuation detected in previous line
if lastline:
# Note: Apparently there is a character retained after the backslash,
# so strip the last two characters from the line.
line = lastline[0:-2] + line
lastline = []
# Continuation lines have the next highest priority. However, this
# script will attempt to keep continuation lines in the body of the
# text and only collapse lines where continuation lines occur in
# a preprocessor statement.
cmatch = contrex.match(line)
# Ignore lines beginning with "###"
pmatch = commentrex.match(line)
if pmatch:
continue
# Handle ifdef
pmatch = ifdefrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
if ifblock != -1:
ifstack.append(ifblock)
if ifblock == 1 or ifblock == -1:
condition = pmatch.group(1)
ifblock = solve_condition(condition, keys, defines, keyrex)
else:
ifblock = 2
continue
# Handle ifndef
pmatch = ifndefrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
if ifblock != -1:
ifstack.append(ifblock)
if ifblock == 1 or ifblock == -1:
condition = pmatch.group(1)
ifblock = solve_condition(condition, keys, defines, keyrex)
ifblock = 1 if ifblock == 0 else 0
else:
ifblock = 2
continue
# Handle if
pmatch = ifrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
if ifblock != -1:
ifstack.append(ifblock)
if ifblock == 1 or ifblock == -1:
condition = pmatch.group(1)
ifblock = solve_condition(condition, keys, defines, keyrex)
else:
ifblock = 2
continue
# Handle elseif
pmatch = elseifrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
if ifblock == -1:
print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
ifblock = 0
if ifblock == 1:
ifblock = 2
elif ifblock != 2:
condition = pmatch.group(1)
ifblock = solve_condition(condition, keys, defines, keyrex)
continue
# Handle else
pmatch = elserex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
if ifblock == -1:
print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
ifblock = 0
if ifblock == 1:
ifblock = 2
elif ifblock == 0:
ifblock = 1
continue
# Handle endif
pmatch = endifrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
if ifblock == -1:
print("Error: #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr)
elif ifstack:
ifblock = ifstack.pop()
else:
ifblock = -1
continue
# Check for 'if' or 'else' that were not properly formed
pmatch = badifrex.match(line)
if pmatch:
print("Error: Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
if ifblock != -1:
ifstack.append(ifblock)
if ifblock == 1 or ifblock == -1:
ifblock = 0
else:
ifblock = 2
continue
pmatch = badelserex.match(line)
if pmatch:
print("Error: Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
ifblock = 2
continue
# Ignore all lines that are not satisfied by a conditional
if ifblock == 0 or ifblock == 2:
continue
# Handle include. Note that this code does not expect or
# handle 'if' blocks that cross file boundaries.
pmatch = includerex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
inclfile = pmatch.group(1)
runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile)
continue
# Handle define (with value)
pmatch = definerex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
condition = pmatch.group(1)
# Additional handling of definition w/parameters: #define X(a,b,c) ..."
rmatch = paramrex.match(condition)
if rmatch:
# 'condition' as a key into keyrex only needs to be unique.
# Use the definition word without everything in parentheses
condition = rmatch.group(1)
# 'pcondition' is the actual search regexp and must capture all
# the parameters individually for substitution
parameters = rmatch.group(2).split(',')
# Generate the regexp string to match comma-separate values
# Note that this is based on the cpp preprocessor, which
# apparently allows commas in arguments if surrounded by
# parentheses; e.g., "def(a, b, (c1,c2))". This is NOT
# handled.
pcondition = condition + '\('
for param in parameters[0:-1]:
pcondition += '(.*),'
pcondition += '(.*)\)'
# Generate the substitution string with group substitutions
pvalue = pmatch.group(2)
idx = 1
for param in parameters:
pvalue = pvalue.replace(param, '\g<' + str(idx) + '>')
idx = idx + 1
defines[condition] = pvalue
keyrex[condition] = re.compile(pcondition)
else:
parameters = []
value = pmatch.group(2)
# Note: Need to check for infinite recursion here, but it's tricky.
defines[condition] = value
keyrex[condition] = re.compile(condition)
if condition not in keys:
# Parameterized keys go to the front of the list
if parameters:
keys.insert(0, condition)
else:
keys.append(condition)
keys = sortkeys(keys)
continue
# Handle define (simple case, no value)
pmatch = defrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
condition = pmatch.group(1)
defines[condition] = '1'
keyrex[condition] = re.compile(condition)
if condition not in keys:
keys.append(condition)
keys = sortkeys(keys)
continue
# Handle undef
pmatch = undefrex.match(line)
if pmatch:
if cmatch:
lastline = line
continue
condition = pmatch.group(1)
if condition in keys:
defines.pop(condition)
keyrex.pop(condition)
keys.remove(condition)
continue
# Now do definition replacement on what's left (if anything)
# This must be done repeatedly from the top until there are no
# more substitutions to make.
while True:
origline = line
for keyword in keys:
newline = keyrex[keyword].sub(defines[keyword], line)
if newline != line:
line = newline
break
if line == origline:
break
# Output the line
if not utf:
print(line, file=ofile, end='')
else:
ofile.write(line.encode('utf-8'))
if ifblock != -1 or ifstack != []:
print("Error: input file ended with an unterminated #if block.", file=sys.stderr)
if ifile != sys.stdin:
ifile.close()
return
def printusage(progname):
print('Usage: ' + progname + ' input_file [output_file] [-options]')
print(' Options are:')
print(' -help Print this help text.')
print(' -quiet Stop without error if input file is not found.')
print(' -ccomm Remove C comments in /* ... */ delimiters.')
print(' -D<def> Define word <def> and set its value to 1.')
print(' -D<def>=<val> Define word <def> and set its value to <val>.')
print(' -I<dir> Add <dir> to search path for input files.')
return
if __name__ == '__main__':
# Parse command line for options and arguments
options = []
arguments = []
for item in sys.argv[1:]:
if item.find('-', 0) == 0:
options.append(item)
else:
arguments.append(item)
if len(arguments) > 0:
inputfile = arguments[0]
if len(arguments) > 1:
outputfile = arguments[1]
else:
outputfile = []
else:
printusage(sys.argv[0])
sys.exit(0)
defines = {}
keyrex = {}
keys = []
incdirs = []
ccomm = False
quiet = False
utf = False
for item in options:
result = item.split('=')
if result[0] == '-help':
printusage(sys.argv[0])
sys.exit(0)
elif result[0] == '-ccomm':
ccomm = True
elif result[0] == '-quiet':
quiet = True
elif result[0] == '-utf8':
utf = True
elif result[0][0:2] == '-I':
incdirs.append(result[0][2:])
elif result[0][0:2] == '-D':
keyword = result[0][2:]
try:
value = result[1]
except:
value = '1'
defines[keyword] = value
keyrex[keyword] = re.compile(keyword)
keys.append(keyword)
keys = sortkeys(keys)
else:
print('Bad option ' + item + ', options are -help, -quiet, -ccomm, -utf8, -D<def> -I<dir>\n')
sys.exit(1)
if not os.path.isfile(inputfile):
if not quiet:
print("Error: No input file " + inputfile + " found.")
else:
sys.exit(0)
if outputfile:
if not utf:
ofile = open(outputfile, 'w')
else:
ofile = open(outputfile, 'wb')
else:
ofile = sys.stdout
if not ofile:
print("Error: Cannot open file " + outputfile + " for writing.")
sys.exit(1)
# Sort keys so that if any definition contains another definition, the
# subset word is handled last; otherwise the subset word will get
# substituted, screwing up the definition names in which it occurs.
keys = sortkeys(keys)
runpp(keys, keyrex, defines, ccomm, utf, incdirs, inputfile, ofile)
if ofile != sys.stdout:
ofile.close()
# Set mode of outputfile to be equal to that of inputfile (if not stdout)
if outputfile:
statinfo = os.stat(inputfile)
mode = statinfo.st_mode
os.chmod(outputfile, mode)
sys.exit(0)