blob: 8fe66287cbeb0b09db75d6346b0e6ac5d69c69ba [file] [log] [blame]
"""
Copyright 2013, Shinya Takamaeda-Yamazaki and Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import absolute_import
from __future__ import print_function
import sys
import os
import re
from ply.lex import *
class VerilogLexer(object):
""" Verilog HDL Lexical Analayzer """
def __init__(self, error_func):
self.filename = ''
self.error_func = error_func
self.directives = []
self.default_nettype = 'wire'
def build(self, **kwargs):
self.lexer = lex(object=self, **kwargs)
def input(self, data):
self.lexer.input(data)
def reset_lineno(self):
self.lexer.lineno = 1
def get_directives(self):
return tuple(self.directives)
def get_default_nettype(self):
return self.default_nettype
def token(self):
return self.lexer.token()
keywords = (
'MODULE', 'ENDMODULE', 'BEGIN', 'END', 'GENERATE', 'ENDGENERATE', 'GENVAR',
'FUNCTION', 'ENDFUNCTION', 'TASK', 'ENDTASK',
'INPUT', 'INOUT', 'OUTPUT', 'TRI', 'REG', 'LOGIC', 'WIRE', 'INTEGER', 'REAL', 'SIGNED',
'PARAMETER', 'LOCALPARAM', 'SUPPLY0', 'SUPPLY1',
'ASSIGN', 'ALWAYS', 'ALWAYS_FF', 'ALWAYS_COMB', 'ALWAYS_LATCH', 'SENS_OR', 'POSEDGE', 'NEGEDGE', 'INITIAL',
'IF', 'ELSE', 'FOR', 'WHILE', 'CASE', 'CASEX', 'CASEZ', 'UNIQUE', 'ENDCASE', 'DEFAULT',
'WAIT', 'FOREVER', 'DISABLE', 'FORK', 'JOIN',
)
reserved = {}
for keyword in keywords:
if keyword == 'SENS_OR':
reserved['or'] = keyword
else:
reserved[keyword.lower()] = keyword
operators = (
'PLUS', 'MINUS', 'POWER', 'TIMES', 'DIVIDE', 'MOD',
'NOT', 'OR', 'NOR', 'AND', 'NAND', 'XOR', 'XNOR',
'LOR', 'LAND', 'LNOT',
'LSHIFTA', 'RSHIFTA', 'LSHIFT', 'RSHIFT',
'LT', 'GT', 'LE', 'GE', 'EQ', 'NE', 'EQL', 'NEL',
'COND', # ?
'EQUALS',
)
tokens = keywords + operators + (
'ID',
'AT', 'COMMA', 'COLON', 'SEMICOLON', 'DOT',
'PLUSCOLON', 'MINUSCOLON',
'FLOATNUMBER', 'STRING_LITERAL',
'INTNUMBER_DEC', 'SIGNED_INTNUMBER_DEC',
'INTNUMBER_HEX', 'SIGNED_INTNUMBER_HEX',
'INTNUMBER_OCT', 'SIGNED_INTNUMBER_OCT',
'INTNUMBER_BIN', 'SIGNED_INTNUMBER_BIN',
'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LBRACE', 'RBRACE',
'DELAY', 'DOLLER',
)
skipped = (
'COMMENTOUT', 'LINECOMMENT', 'DIRECTIVE',
)
# Ignore
t_ignore = ' \t'
# Directive
directive = r"""\`.*?\n"""
@TOKEN(directive)
def t_DIRECTIVE(self, t):
self.directives.append((self.lexer.lineno, t.value))
t.lexer.lineno += t.value.count("\n")
m = re.match("^`default_nettype\s+(.+)\n", t.value)
if m:
self.default_nettype = m.group(1)
pass
# Comment
linecomment = r"""//.*?\n"""
commentout = r"""/\*(.|\n)*?\*/"""
@TOKEN(linecomment)
def t_LINECOMMENT(self, t):
t.lexer.lineno += t.value.count("\n")
pass
@TOKEN(commentout)
def t_COMMENTOUT(self, t):
t.lexer.lineno += t.value.count("\n")
pass
# Operator
t_LOR = r'\|\|'
t_LAND = r'\&\&'
t_NOR = r'~\|'
t_NAND = r'~\&'
t_XNOR = r'~\^'
t_OR = r'\|'
t_AND = r'\&'
t_XOR = r'\^'
t_LNOT = r'!'
t_NOT = r'~'
t_LSHIFTA = r'<<<'
t_RSHIFTA = r'>>>'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_EQL = r'==='
t_NEL = r'!=='
t_EQ = r'=='
t_NE = r'!='
t_LE = r'<='
t_GE = r'>='
t_LT = r'<'
t_GT = r'>'
t_POWER = r'\*\*'
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_COND = r'\?'
t_EQUALS = r'='
t_PLUSCOLON = r'\+:'
t_MINUSCOLON = r'-:'
t_AT = r'@'
t_COMMA = r','
t_SEMICOLON = r';'
t_COLON = r':'
t_DOT = r'\.'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_DELAY = r'\#'
t_DOLLER = r'\$'
bin_number = '[0-9]*\'[bB][0-1xXzZ?][0-1xXzZ?_]*'
signed_bin_number = '[0-9]*\'[sS][bB][0-1xZzZ?][0-1xXzZ?_]*'
octal_number = '[0-9]*\'[oO][0-7xXzZ?][0-7xXzZ?_]*'
signed_octal_number = '[0-9]*\'[sS][oO][0-7xXzZ?][0-7xXzZ?_]*'
hex_number = '[0-9]*\'[hH][0-9a-fA-FxXzZ?][0-9a-fA-FxXzZ?_]*'
signed_hex_number = '[0-9]*\'[sS][hH][0-9a-fA-FxXzZ?][0-9a-fA-FxXzZ?_]*'
decimal_number = '([0-9]*\'[dD][0-9xXzZ?][0-9xXzZ?_]*)|([0-9][0-9_]*)'
signed_decimal_number = '[0-9]*\'[sS][dD][0-9xXzZ?][0-9xXzZ?_]*'
exponent_part = r"""([eE][-+]?[0-9]+)"""
fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
float_number = '((((' + fractional_constant + ')' + \
exponent_part + '?)|([0-9]+' + exponent_part + ')))'
simple_escape = r"""([a-zA-Z\\?'"])"""
octal_escape = r"""([0-7]{1,3})"""
hex_escape = r"""(x[0-9a-fA-F]+)"""
escape_sequence = r"""(\\(""" + simple_escape + '|' + octal_escape + '|' + hex_escape + '))'
string_char = r"""([^"\\\n]|""" + escape_sequence + ')'
string_literal = '"' + string_char + '*"'
identifier = r"""(([a-zA-Z_])([a-zA-Z_0-9$])*)|((\\\S)(\S)*)"""
@TOKEN(string_literal)
def t_STRING_LITERAL(self, t):
return t
@TOKEN(float_number)
def t_FLOATNUMBER(self, t):
return t
@TOKEN(signed_bin_number)
def t_SIGNED_INTNUMBER_BIN(self, t):
return t
@TOKEN(bin_number)
def t_INTNUMBER_BIN(self, t):
return t
@TOKEN(signed_octal_number)
def t_SIGNED_INTNUMBER_OCT(self, t):
return t
@TOKEN(octal_number)
def t_INTNUMBER_OCT(self, t):
return t
@TOKEN(signed_hex_number)
def t_SIGNED_INTNUMBER_HEX(self, t):
return t
@TOKEN(hex_number)
def t_INTNUMBER_HEX(self, t):
return t
@TOKEN(signed_decimal_number)
def t_SIGNED_INTNUMBER_DEC(self, t):
return t
@TOKEN(decimal_number)
def t_INTNUMBER_DEC(self, t):
return t
@TOKEN(identifier)
def t_ID(self, t):
t.type = self.reserved.get(t.value, 'ID')
return t
def t_NEWLINE(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
pass
def t_error(self, t):
msg = 'Illegal character %s' % repr(t.value[0])
self._error(msg, t)
def _error(self, msg, token):
location = self._make_tok_location(token)
self.error_func(msg, location[0], location[1])
self.lexer.skip(1)
def _find_tok_column(self, token):
i = token.lexpos
while i > 0:
if self.lexer.lexdata[i] == '\n':
break
i -= 1
return (token.lexpos - i) + 1
def _make_tok_location(self, token):
return (token.lineno, self._find_tok_column(token))
def dump_tokens(text):
def my_error_func(msg, a, b):
sys.write(msg + "\n")
sys.exit()
lexer = VerilogLexer(error_func=my_error_func)
lexer.build()
lexer.input(text)
ret = []
# Tokenize
while True:
tok = lexer.token()
if not tok:
break # No more input
ret.append("%s %s %d %s %d\n" %
(tok.value, tok.type, tok.lineno, lexer.filename, tok.lexpos))
return ''.join(ret)