#!/usr/bin/env python3
#--------------------------------------------------------------------
#
# preproc.py
#
# General purpose macro preprocessor
#
#--------------------------------------------------------------------
# Usage:
#
#	preproc.py input_file [output_file] [-D<variable> ...]
#
# Where <variable> may be a keyword or a key=value pair
#
# Syntax:  Basically like cpp.  However, this preprocessor handles
# only a limited set of keywords, so it does not otherwise mangle
# the file in the belief that it must be C code.  Handling of boolean
# relations is important, so these are thoroughly defined (see below)
#
#	#if defined(<variable>) [...]
#	#ifdef <variable>
#	#ifndef <variable>
#	#elseif <variable>
#	#else
#	#endif
#
#	#define <variable> [...]
#	#define <variable>(<parameters>) [...]
#	#undef <variable>
#
#	#include <filename>
#
# <variable> may be
#	<keyword>
#	<keyword>=<value>
#
#	<keyword> without '=' is effectively the same as <keyword>=1
#	Lack of a keyword is equivalent to <keyword>=0, in a conditional.
#
# Boolean operators (in order of precedence):
#	!	NOT
#	&&	AND
#	||	OR	
#
# Comments:
#       Most comments (C-like or Tcl-like) are output as-is.  A
#	line beginning with "###" is treated as a preprocessor
#	comment and is not copied to the output.
#
# Examples;
#	#if defined(X) || defined(Y)
#	#else
#	#if defined(Z)
#	#endif
#--------------------------------------------------------------------

import os
import re
import sys

def solve_statement(condition):

    defrex = re.compile('defined[ \t]*\(([^\)]+)\)')
    orrex = re.compile('(.+)\|\|(.+)')
    andrex = re.compile('(.+)&&(.+)')
    notrex = re.compile('!([^&\|]+)')
    parenrex = re.compile('\(([^\)]+)\)')
    leadspacerex = re.compile('^[ \t]+(.*)')
    endspacerex = re.compile('(.*)[ \t]+$')

    matchfound = True
    while matchfound:
        matchfound = False

        # Search for defined(K) (K must be a single keyword)
        # If the keyword was defined, then it should have been replaced by 1
        lmatch = defrex.search(condition)
        if lmatch:
            key = lmatch.group(1)
            if key == 1 or key == '1' or key == True:
                repl = 1
            else:
                repl = 0

            condition = defrex.sub(str(repl), condition)
            matchfound = True

        # Search for (X) recursively
        lmatch = parenrex.search(condition)
        if lmatch:
            repl = solve_statement(lmatch.group(1))
            condition = parenrex.sub(str(repl), condition)
            matchfound = True

        # Search for !X recursively
        lmatch = notrex.search(condition)
        if lmatch:
            only = solve_statement(lmatch.group(1))
            if only == '1':
                repl = '0'
            else:
                repl = '1'
            condition = notrex.sub(str(repl), condition)
            matchfound = True

        # Search for A&&B recursively
        lmatch = andrex.search(condition)
        if lmatch:
            first = solve_statement(lmatch.group(1))
            second = solve_statement(lmatch.group(2))
            if first == '1' and second == '1':
                repl = '1'
            else:
                repl = '0'
            condition = andrex.sub(str(repl), condition)
            matchfound = True

        # Search for A||B recursively
        lmatch = orrex.search(condition)
        if lmatch:
            first = solve_statement(lmatch.group(1))
            second = solve_statement(lmatch.group(2))
            if first == '1' or second == '1':
                repl = '1'
            else:
                repl = '0'
            condition = orrex.sub(str(repl), condition)
            matchfound = True
 
    # Remove whitespace
    lmatch = leadspacerex.match(condition)
    if lmatch:
        condition = lmatch.group(1)
    lmatch = endspacerex.match(condition)
    if lmatch:
        condition = lmatch.group(1)
    
    return condition

def solve_condition(condition, keys, defines, keyrex):
    # Do definition replacement on the conditional
    for keyword in keys:
        condition = keyrex[keyword].sub(defines[keyword], condition)

    value = solve_statement(condition)
    if value == '1':
        return 1
    else:
        return 0

def sortkeys(keys):
    newkeys = []
    for i in range(0, len(keys)):
        keyword = keys[i]
        found = False
        for j in range(0, len(newkeys)):
            inword = newkeys[j]
            if inword in keyword:
                # Insert keyword before inword
                newkeys.insert(j, keyword)
                found = True
                break
        if not found:
            newkeys.append(keyword)
    return newkeys

def runpp(keys, keyrex, defines, ccomm, utf, incdirs, inputfile, ofile):

    includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)')
    definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)')
    paramrex = re.compile('^([^\(]+)\(([^\)]+)\)')
    defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)')
    undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)')
    ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)')
    ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)')
    ifrex = re.compile('^[ \t]*#if[ \t]+(.+)')
    elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)')
    elserex = re.compile('^[ \t]*#else')
    endifrex = re.compile('^[ \t]*#endif')
    commentrex = re.compile('^###[^#]*$')
    ccstartrex = re.compile('/\*')		# C-style comment start
    ccendrex = re.compile('\*/')		# C-style comment end
    contrex = re.compile('.*\\\\$')		# Backslash continuation line

    badifrex = re.compile('^[ \t]*#if[ \t]*.*')
    badelserex = re.compile('^[ \t]*#else[ \t]*.*')

    # This code is not designed to operate on huge files.  Neither is it designed to be
    # efficient.

    # ifblock state:
    # -1 : not in an if/else block
    #  0 : no condition satisfied yet
    #  1 : condition satisfied
    #  2 : condition was handled, waiting for endif

    ifile = False
    try:
        if not utf:
            ifile = open(inputfile, 'r')
        else:
            ifile = open(inputfile, 'r', encoding='utf-8', errors='replace')
    except FileNotFoundError:
        for dir in incdirs:
            try:
                ifile = open(dir + '/' + inputfile, 'r')
            except FileNotFoundError:
                pass
            else:
                break

    if not ifile:
        print("Error:  Cannot open file " + inputfile + " for reading.\n", file=sys.stderr)
        return

    ccblock = -1
    ifblock = -1
    ifstack = []
    lineno = 0

    filetext = ifile.readlines()
    lastline = []

    for line in filetext:
        lineno += 1

        # C-style comments override everything else
        if ccomm:
            if ccblock == -1:
                pmatch = ccstartrex.search(line)
                if pmatch:
                    ematch = ccendrex.search(line[pmatch.end(0):])
                    if ematch:
                        line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):]
                    else:
                        line = line[0:pmatch.start(0)]
                        ccblock = 1
            elif ccblock == 1:
                ematch = ccendrex.search(line)
                if ematch:
                    line = line[ematch.end(0)+2:]
                    ccblock = -1
                else:
                    continue

        # Handle continuation detected in previous line
        if lastline:
            # Note:  Apparently there is a character retained after the backslash,
            # so strip the last two characters from the line.
            line = lastline[0:-2] + line
            lastline = []

        # Continuation lines have the next highest priority.  However, this
        # script will attempt to keep continuation lines in the body of the
        # text and only collapse lines where continuation lines occur in
        # a preprocessor statement.

        cmatch = contrex.match(line)

        # Ignore lines beginning with "###"
        pmatch = commentrex.match(line)
        if pmatch:
            continue

        # Handle ifdef
        pmatch = ifdefrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            if ifblock != -1:
                ifstack.append(ifblock)
                
            if ifblock == 1 or ifblock == -1:
                condition = pmatch.group(1)
                ifblock = solve_condition(condition, keys, defines, keyrex)
            else:
                ifblock = 2
            continue

        # Handle ifndef
        pmatch = ifndefrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            if ifblock != -1:
                ifstack.append(ifblock)
                
            if ifblock == 1 or ifblock == -1:
                condition = pmatch.group(1)
                ifblock = solve_condition(condition, keys, defines, keyrex)
                ifblock = 1 if ifblock == 0 else 0
            else:
                ifblock = 2
            continue

        # Handle if
        pmatch = ifrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            if ifblock != -1:
                ifstack.append(ifblock)

            if ifblock == 1 or ifblock == -1:
                condition = pmatch.group(1)
                ifblock = solve_condition(condition, keys, defines, keyrex)
            else:
                ifblock = 2
            continue

        # Handle elseif
        pmatch = elseifrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            if ifblock == -1:
               print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
               ifblock = 0

            if ifblock == 1:
                ifblock = 2
            elif ifblock != 2:
                condition = pmatch.group(1)
                ifblock = solve_condition(condition, keys, defines, keyrex)
            continue

        # Handle else
        pmatch = elserex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            if ifblock == -1:
               print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
               ifblock = 0

            if ifblock == 1:
                ifblock = 2
            elif ifblock == 0:
                ifblock = 1
            continue

        # Handle endif
        pmatch = endifrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            if ifblock == -1:
                print("Error:  #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr)
            elif ifstack:
                ifblock = ifstack.pop()
            else:
                ifblock = -1
            continue
                 
        # Check for 'if' or 'else' that were not properly formed
        pmatch = badifrex.match(line)
        if pmatch:
            print("Error:  Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
            if ifblock != -1:
                ifstack.append(ifblock)

            if ifblock == 1 or ifblock == -1:
                ifblock = 0
            else:
                ifblock = 2
            continue

        pmatch = badelserex.match(line)
        if pmatch:
            print("Error:  Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
            ifblock = 2
            continue

        # Ignore all lines that are not satisfied by a conditional
        if ifblock == 0 or ifblock == 2:
            continue

        # Handle include.  Note that this code does not expect or
        # handle 'if' blocks that cross file boundaries.
        pmatch = includerex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            inclfile = pmatch.group(1)
            runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile)
            continue

        # Handle define (with value)
        pmatch = definerex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            condition = pmatch.group(1)

            # Additional handling of definition w/parameters: #define X(a,b,c) ..."
            rmatch = paramrex.match(condition) 
            if rmatch:
                # 'condition' as a key into keyrex only needs to be unique.
                # Use the definition word without everything in parentheses
                condition = rmatch.group(1)

                # 'pcondition' is the actual search regexp and must capture all
                # the parameters individually for substitution

                parameters = rmatch.group(2).split(',')

                # Generate the regexp string to match comma-separate values
                # Note that this is based on the cpp preprocessor, which
                # apparently allows commas in arguments if surrounded by
                # parentheses;  e.g., "def(a, b, (c1,c2))".  This is NOT
                # handled.

                pcondition = condition + '\('
                for param in parameters[0:-1]:
                    pcondition += '(.*),'
                pcondition += '(.*)\)'

                # Generate the substitution string with group substitutions
                pvalue = pmatch.group(2)
                idx = 1
                for param in parameters:
                    pvalue = pvalue.replace(param, '\g<' + str(idx) + '>')
                    idx = idx + 1

                defines[condition] = pvalue
                keyrex[condition] = re.compile(pcondition)
            else:
                parameters = []
                value = pmatch.group(2)
                # Note:  Need to check for infinite recursion here, but it's tricky.
                defines[condition] = value
                keyrex[condition] = re.compile(condition)

            if condition not in keys:
                # Parameterized keys go to the front of the list
                if parameters:
                    keys.insert(0, condition)
                else:
                    keys.append(condition)
                keys = sortkeys(keys)
            continue

        # Handle define (simple case, no value)
        pmatch = defrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            condition = pmatch.group(1)
            defines[condition] = '1'
            keyrex[condition] = re.compile(condition)
            if condition not in keys:
                keys.append(condition)
                keys = sortkeys(keys)
            continue

        # Handle undef
        pmatch = undefrex.match(line)
        if pmatch:
            if cmatch:
                lastline = line
                continue
            condition = pmatch.group(1)
            if condition in keys:
                defines.pop(condition)
                keyrex.pop(condition)
                keys.remove(condition)
            continue

        # Now do definition replacement on what's left (if anything)
        # This must be done repeatedly from the top until there are no
        # more substitutions to make.

        while True:
            origline = line
            for keyword in keys:
                newline = keyrex[keyword].sub(defines[keyword], line)
                if newline != line:
                    line = newline
                    break
                    
            if line == origline:
                break
                
        # Output the line
        if not utf:
            print(line, file=ofile, end='')
        else:
            ofile.write(line.encode('utf-8'))

    if ifblock != -1 or ifstack != []:
        print("Error:  input file ended with an unterminated #if block.", file=sys.stderr)

    if ifile != sys.stdin:
        ifile.close()
    return

def printusage(progname):
    print('Usage: ' + progname + ' input_file [output_file] [-options]')
    print('   Options are:')
    print('      -help         Print this help text.')
    print('      -quiet        Stop without error if input file is not found.')
    print('      -ccomm        Remove C comments in /* ... */ delimiters.')
    print('      -D<def>       Define word <def> and set its value to 1.')
    print('      -D<def>=<val> Define word <def> and set its value to <val>.')
    print('      -I<dir>       Add <dir> to search path for input files.')
    return

if __name__ == '__main__':

   # Parse command line for options and arguments
    options = []
    arguments = []
    for item in sys.argv[1:]:
        if item.find('-', 0) == 0:
            options.append(item)
        else:
            arguments.append(item)

    if len(arguments) > 0:
        inputfile = arguments[0]
        if len(arguments) > 1:
            outputfile = arguments[1]
        else:
            outputfile = []
    else:
        printusage(sys.argv[0])
        sys.exit(0)

    defines = {}
    keyrex = {}
    keys = []
    incdirs = []
    ccomm = False
    quiet = False
    utf = False
    for item in options:
        result = item.split('=')
        if result[0] == '-help':
            printusage(sys.argv[0])
            sys.exit(0)
        elif result[0] == '-ccomm':
            ccomm = True
        elif result[0] == '-quiet':
            quiet = True
        elif result[0] == '-utf8':
            utf = True
        elif result[0][0:2] == '-I':
            incdirs.append(result[0][2:])
        elif result[0][0:2] == '-D':
            keyword = result[0][2:]
            try:
                value = result[1]
            except:
                value = '1'
            defines[keyword] = value
            keyrex[keyword] = re.compile(keyword)
            keys.append(keyword)
            keys = sortkeys(keys)
        else:
            print('Bad option ' + item + ', options are -help, -quiet, -ccomm, -utf8, -D<def> -I<dir>\n')
            sys.exit(1)

    if not os.path.isfile(inputfile):
        if not quiet:
            print("Error:  No input file " + inputfile + " found.")
        else:
            sys.exit(0)

    if outputfile:
        if not utf:
            ofile = open(outputfile, 'w')
        else:
            ofile = open(outputfile, 'wb')
    else:
        ofile = sys.stdout

    if not ofile:
        print("Error:  Cannot open file " + outputfile + " for writing.")
        sys.exit(1)

    # Sort keys so that if any definition contains another definition, the
    # subset word is handled last;  otherwise the subset word will get
    # substituted, screwing up the definition names in which it occurs.

    keys = sortkeys(keys)

    runpp(keys, keyrex, defines, ccomm, utf, incdirs, inputfile, ofile)
    if ofile != sys.stdout:
        ofile.close()

    # Set mode of outputfile to be equal to that of inputfile (if not stdout)
    if outputfile:
        statinfo = os.stat(inputfile)
        mode = statinfo.st_mode
        os.chmod(outputfile, mode)

    sys.exit(0)
