| # |
| # core.py |
| # |
| import os |
| import typing |
| from typing import ( |
| NamedTuple, |
| Union, |
| Callable, |
| Any, |
| Generator, |
| Tuple, |
| List, |
| TextIO, |
| Set, |
| Sequence, |
| ) |
| from abc import ABC, abstractmethod |
| from enum import Enum |
| import string |
| import copy |
| import warnings |
| import re |
| import sys |
| from collections.abc import Iterable |
| import traceback |
| import types |
| from operator import itemgetter |
| from functools import wraps |
| from threading import RLock |
| from pathlib import Path |
| |
| from .util import ( |
| _FifoCache, |
| _UnboundedCache, |
| __config_flags, |
| _collapse_string_to_ranges, |
| _escape_regex_range_chars, |
| _bslash, |
| _flatten, |
| LRUMemo as _LRUMemo, |
| UnboundedMemo as _UnboundedMemo, |
| ) |
| from .exceptions import * |
| from .actions import * |
| from .results import ParseResults, _ParseResultsWithOffset |
| from .unicode import pyparsing_unicode |
| |
| _MAX_INT = sys.maxsize |
| str_type: Tuple[type, ...] = (str, bytes) |
| |
| # |
| # Copyright (c) 2003-2022 Paul T. McGuire |
| # |
| # Permission is hereby granted, free of charge, to any person obtaining |
| # a copy of this software and associated documentation files (the |
| # "Software"), to deal in the Software without restriction, including |
| # without limitation the rights to use, copy, modify, merge, publish, |
| # distribute, sublicense, and/or sell copies of the Software, and to |
| # permit persons to whom the Software is furnished to do so, subject to |
| # the following conditions: |
| # |
| # The above copyright notice and this permission notice shall be |
| # included in all copies or substantial portions of the Software. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| # |
| |
| |
| if sys.version_info >= (3, 8): |
| from functools import cached_property |
| else: |
| |
| class cached_property: |
| def __init__(self, func): |
| self._func = func |
| |
| def __get__(self, instance, owner=None): |
| ret = instance.__dict__[self._func.__name__] = self._func(instance) |
| return ret |
| |
| |
| class __compat__(__config_flags): |
| """ |
| A cross-version compatibility configuration for pyparsing features that will be |
| released in a future version. By setting values in this configuration to True, |
| those features can be enabled in prior versions for compatibility development |
| and testing. |
| |
| - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping |
| of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; |
| maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 |
| behavior |
| """ |
| |
| _type_desc = "compatibility" |
| |
| collect_all_And_tokens = True |
| |
| _all_names = [__ for __ in locals() if not __.startswith("_")] |
| _fixed_names = """ |
| collect_all_And_tokens |
| """.split() |
| |
| |
| class __diag__(__config_flags): |
| _type_desc = "diagnostic" |
| |
| warn_multiple_tokens_in_named_alternation = False |
| warn_ungrouped_named_tokens_in_collection = False |
| warn_name_set_on_empty_Forward = False |
| warn_on_parse_using_empty_Forward = False |
| warn_on_assignment_to_Forward = False |
| warn_on_multiple_string_args_to_oneof = False |
| warn_on_match_first_with_lshift_operator = False |
| enable_debug_on_named_expressions = False |
| |
| _all_names = [__ for __ in locals() if not __.startswith("_")] |
| _warning_names = [name for name in _all_names if name.startswith("warn")] |
| _debug_names = [name for name in _all_names if name.startswith("enable_debug")] |
| |
| @classmethod |
| def enable_all_warnings(cls) -> None: |
| for name in cls._warning_names: |
| cls.enable(name) |
| |
| |
| class Diagnostics(Enum): |
| """ |
| Diagnostic configuration (all default to disabled) |
| - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results |
| name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions |
| - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results |
| name is defined on a containing expression with ungrouped subexpressions that also |
| have results names |
| - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined |
| with a results name, but has no contents defined |
| - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is |
| defined in a grammar but has never had an expression attached to it |
| - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined |
| but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` |
| - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is |
| incorrectly called with multiple str arguments |
| - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent |
| calls to :class:`ParserElement.set_name` |
| |
| Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. |
| All warnings can be enabled by calling :class:`enable_all_warnings`. |
| """ |
| |
| warn_multiple_tokens_in_named_alternation = 0 |
| warn_ungrouped_named_tokens_in_collection = 1 |
| warn_name_set_on_empty_Forward = 2 |
| warn_on_parse_using_empty_Forward = 3 |
| warn_on_assignment_to_Forward = 4 |
| warn_on_multiple_string_args_to_oneof = 5 |
| warn_on_match_first_with_lshift_operator = 6 |
| enable_debug_on_named_expressions = 7 |
| |
| |
| def enable_diag(diag_enum: Diagnostics) -> None: |
| """ |
| Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). |
| """ |
| __diag__.enable(diag_enum.name) |
| |
| |
| def disable_diag(diag_enum: Diagnostics) -> None: |
| """ |
| Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). |
| """ |
| __diag__.disable(diag_enum.name) |
| |
| |
| def enable_all_warnings() -> None: |
| """ |
| Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). |
| """ |
| __diag__.enable_all_warnings() |
| |
| |
| # hide abstract class |
| del __config_flags |
| |
| |
| def _should_enable_warnings( |
| cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] |
| ) -> bool: |
| enable = bool(warn_env_var) |
| for warn_opt in cmd_line_warn_options: |
| w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( |
| ":" |
| )[:5] |
| if not w_action.lower().startswith("i") and ( |
| not (w_message or w_category or w_module) or w_module == "pyparsing" |
| ): |
| enable = True |
| elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): |
| enable = False |
| return enable |
| |
| |
| if _should_enable_warnings( |
| sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") |
| ): |
| enable_all_warnings() |
| |
| |
| # build list of single arg builtins, that can be used as parse actions |
| _single_arg_builtins = { |
| sum, |
| len, |
| sorted, |
| reversed, |
| list, |
| tuple, |
| set, |
| any, |
| all, |
| min, |
| max, |
| } |
| |
| _generatorType = types.GeneratorType |
| ParseAction = Union[ |
| Callable[[], Any], |
| Callable[[ParseResults], Any], |
| Callable[[int, ParseResults], Any], |
| Callable[[str, int, ParseResults], Any], |
| ] |
| ParseCondition = Union[ |
| Callable[[], bool], |
| Callable[[ParseResults], bool], |
| Callable[[int, ParseResults], bool], |
| Callable[[str, int, ParseResults], bool], |
| ] |
| ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] |
| DebugStartAction = Callable[[str, int, "ParserElement", bool], None] |
| DebugSuccessAction = Callable[ |
| [str, int, int, "ParserElement", ParseResults, bool], None |
| ] |
| DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] |
| |
| |
| alphas = string.ascii_uppercase + string.ascii_lowercase |
| identchars = pyparsing_unicode.Latin1.identchars |
| identbodychars = pyparsing_unicode.Latin1.identbodychars |
| nums = "0123456789" |
| hexnums = nums + "ABCDEFabcdef" |
| alphanums = alphas + nums |
| printables = "".join([c for c in string.printable if c not in string.whitespace]) |
| |
| _trim_arity_call_line: traceback.StackSummary = None |
| |
| |
| def _trim_arity(func, max_limit=3): |
| """decorator to trim function calls to match the arity of the target""" |
| global _trim_arity_call_line |
| |
| if func in _single_arg_builtins: |
| return lambda s, l, t: func(t) |
| |
| limit = 0 |
| found_arity = False |
| |
| def extract_tb(tb, limit=0): |
| frames = traceback.extract_tb(tb, limit=limit) |
| frame_summary = frames[-1] |
| return [frame_summary[:2]] |
| |
| # synthesize what would be returned by traceback.extract_stack at the call to |
| # user's parse action 'func', so that we don't incur call penalty at parse time |
| |
| # fmt: off |
| LINE_DIFF = 7 |
| # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND |
| # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! |
| _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) |
| pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) |
| |
| def wrapper(*args): |
| nonlocal found_arity, limit |
| while 1: |
| try: |
| ret = func(*args[limit:]) |
| found_arity = True |
| return ret |
| except TypeError as te: |
| # re-raise TypeErrors if they did not come from our arity testing |
| if found_arity: |
| raise |
| else: |
| tb = te.__traceback__ |
| trim_arity_type_error = ( |
| extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth |
| ) |
| del tb |
| |
| if trim_arity_type_error: |
| if limit < max_limit: |
| limit += 1 |
| continue |
| |
| raise |
| # fmt: on |
| |
| # copy func name to wrapper for sensible debug output |
| # (can't use functools.wraps, since that messes with function signature) |
| func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) |
| wrapper.__name__ = func_name |
| wrapper.__doc__ = func.__doc__ |
| |
| return wrapper |
| |
| |
| def condition_as_parse_action( |
| fn: ParseCondition, message: str = None, fatal: bool = False |
| ) -> ParseAction: |
| """ |
| Function to convert a simple predicate function that returns ``True`` or ``False`` |
| into a parse action. Can be used in places when a parse action is required |
| and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition |
| to an operator level in :class:`infix_notation`). |
| |
| Optional keyword arguments: |
| |
| - ``message`` - define a custom message to be used in the raised exception |
| - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; |
| otherwise will raise :class:`ParseException` |
| |
| """ |
| msg = message if message is not None else "failed user-defined condition" |
| exc_type = ParseFatalException if fatal else ParseException |
| fn = _trim_arity(fn) |
| |
| @wraps(fn) |
| def pa(s, l, t): |
| if not bool(fn(s, l, t)): |
| raise exc_type(s, l, msg) |
| |
| return pa |
| |
| |
| def _default_start_debug_action( |
| instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False |
| ): |
| cache_hit_str = "*" if cache_hit else "" |
| print( |
| ( |
| "{}Match {} at loc {}({},{})\n {}\n {}^".format( |
| cache_hit_str, |
| expr, |
| loc, |
| lineno(loc, instring), |
| col(loc, instring), |
| line(loc, instring), |
| " " * (col(loc, instring) - 1), |
| ) |
| ) |
| ) |
| |
| |
| def _default_success_debug_action( |
| instring: str, |
| startloc: int, |
| endloc: int, |
| expr: "ParserElement", |
| toks: ParseResults, |
| cache_hit: bool = False, |
| ): |
| cache_hit_str = "*" if cache_hit else "" |
| print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list())) |
| |
| |
| def _default_exception_debug_action( |
| instring: str, |
| loc: int, |
| expr: "ParserElement", |
| exc: Exception, |
| cache_hit: bool = False, |
| ): |
| cache_hit_str = "*" if cache_hit else "" |
| print( |
| "{}Match {} failed, {} raised: {}".format( |
| cache_hit_str, expr, type(exc).__name__, exc |
| ) |
| ) |
| |
| |
| def null_debug_action(*args): |
| """'Do-nothing' debug action, to suppress debugging output during parsing.""" |
| |
| |
| class ParserElement(ABC): |
| """Abstract base level parser element class.""" |
| |
| DEFAULT_WHITE_CHARS: str = " \n\t\r" |
| verbose_stacktrace: bool = False |
| _literalStringClass: typing.Optional[type] = None |
| |
| @staticmethod |
| def set_default_whitespace_chars(chars: str) -> None: |
| r""" |
| Overrides the default whitespace chars |
| |
| Example:: |
| |
| # default whitespace chars are space, <TAB> and newline |
| Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] |
| |
| # change to just treat newline as significant |
| ParserElement.set_default_whitespace_chars(" \t") |
| Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] |
| """ |
| ParserElement.DEFAULT_WHITE_CHARS = chars |
| |
| # update whitespace all parse expressions defined in this module |
| for expr in _builtin_exprs: |
| if expr.copyDefaultWhiteChars: |
| expr.whiteChars = set(chars) |
| |
| @staticmethod |
| def inline_literals_using(cls: type) -> None: |
| """ |
| Set class to be used for inclusion of string literals into a parser. |
| |
| Example:: |
| |
| # default literal class used is Literal |
| integer = Word(nums) |
| date_str = integer("year") + '/' + integer("month") + '/' + integer("day") |
| |
| date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] |
| |
| |
| # change to Suppress |
| ParserElement.inline_literals_using(Suppress) |
| date_str = integer("year") + '/' + integer("month") + '/' + integer("day") |
| |
| date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] |
| """ |
| ParserElement._literalStringClass = cls |
| |
| class DebugActions(NamedTuple): |
| debug_try: typing.Optional[DebugStartAction] |
| debug_match: typing.Optional[DebugSuccessAction] |
| debug_fail: typing.Optional[DebugExceptionAction] |
| |
| def __init__(self, savelist: bool = False): |
| self.parseAction: List[ParseAction] = list() |
| self.failAction: typing.Optional[ParseFailAction] = None |
| self.customName = None |
| self._defaultName = None |
| self.resultsName = None |
| self.saveAsList = savelist |
| self.skipWhitespace = True |
| self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) |
| self.copyDefaultWhiteChars = True |
| # used when checking for left-recursion |
| self.mayReturnEmpty = False |
| self.keepTabs = False |
| self.ignoreExprs: List["ParserElement"] = list() |
| self.debug = False |
| self.streamlined = False |
| # optimize exception handling for subclasses that don't advance parse index |
| self.mayIndexError = True |
| self.errmsg = "" |
| # mark results names as modal (report only last) or cumulative (list all) |
| self.modalResults = True |
| # custom debug actions |
| self.debugActions = self.DebugActions(None, None, None) |
| # avoid redundant calls to preParse |
| self.callPreparse = True |
| self.callDuringTry = False |
| self.suppress_warnings_: List[Diagnostics] = [] |
| |
| def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": |
| """ |
| Suppress warnings emitted for a particular diagnostic on this expression. |
| |
| Example:: |
| |
| base = pp.Forward() |
| base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) |
| |
| # statement would normally raise a warning, but is now suppressed |
| print(base.parseString("x")) |
| |
| """ |
| self.suppress_warnings_.append(warning_type) |
| return self |
| |
| def copy(self) -> "ParserElement": |
| """ |
| Make a copy of this :class:`ParserElement`. Useful for defining |
| different parse actions for the same parsing pattern, using copies of |
| the original parse element. |
| |
| Example:: |
| |
| integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) |
| integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") |
| integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") |
| |
| print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) |
| |
| prints:: |
| |
| [5120, 100, 655360, 268435456] |
| |
| Equivalent form of ``expr.copy()`` is just ``expr()``:: |
| |
| integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") |
| """ |
| cpy = copy.copy(self) |
| cpy.parseAction = self.parseAction[:] |
| cpy.ignoreExprs = self.ignoreExprs[:] |
| if self.copyDefaultWhiteChars: |
| cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) |
| return cpy |
| |
| def set_results_name( |
| self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False |
| ) -> "ParserElement": |
| """ |
| Define name for referencing matching tokens as a nested attribute |
| of the returned parse results. |
| |
| Normally, results names are assigned as you would assign keys in a dict: |
| any existing value is overwritten by later values. If it is necessary to |
| keep all values captured for a particular results name, call ``set_results_name`` |
| with ``list_all_matches`` = True. |
| |
| NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; |
| this is so that the client can define a basic element, such as an |
| integer, and reference it in multiple places with different names. |
| |
| You can also set results names using the abbreviated syntax, |
| ``expr("name")`` in place of ``expr.set_results_name("name")`` |
| - see :class:`__call__`. If ``list_all_matches`` is required, use |
| ``expr("name*")``. |
| |
| Example:: |
| |
| date_str = (integer.set_results_name("year") + '/' |
| + integer.set_results_name("month") + '/' |
| + integer.set_results_name("day")) |
| |
| # equivalent form: |
| date_str = integer("year") + '/' + integer("month") + '/' + integer("day") |
| """ |
| listAllMatches = listAllMatches or list_all_matches |
| return self._setResultsName(name, listAllMatches) |
| |
| def _setResultsName(self, name, listAllMatches=False): |
| if name is None: |
| return self |
| newself = self.copy() |
| if name.endswith("*"): |
| name = name[:-1] |
| listAllMatches = True |
| newself.resultsName = name |
| newself.modalResults = not listAllMatches |
| return newself |
| |
| def set_break(self, break_flag: bool = True) -> "ParserElement": |
| """ |
| Method to invoke the Python pdb debugger when this element is |
| about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to |
| disable. |
| """ |
| if break_flag: |
| _parseMethod = self._parse |
| |
| def breaker(instring, loc, doActions=True, callPreParse=True): |
| import pdb |
| |
| # this call to pdb.set_trace() is intentional, not a checkin error |
| pdb.set_trace() |
| return _parseMethod(instring, loc, doActions, callPreParse) |
| |
| breaker._originalParseMethod = _parseMethod |
| self._parse = breaker |
| else: |
| if hasattr(self._parse, "_originalParseMethod"): |
| self._parse = self._parse._originalParseMethod |
| return self |
| |
| def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": |
| """ |
| Define one or more actions to perform when successfully matching parse element definition. |
| |
| Parse actions can be called to perform data conversions, do extra validation, |
| update external data structures, or enhance or replace the parsed tokens. |
| Each parse action ``fn`` is a callable method with 0-3 arguments, called as |
| ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: |
| |
| - s = the original string being parsed (see note below) |
| - loc = the location of the matching substring |
| - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object |
| |
| The parsed tokens are passed to the parse action as ParseResults. They can be |
| modified in place using list-style append, extend, and pop operations to update |
| the parsed list elements; and with dictionary-style item set and del operations |
| to add, update, or remove any named results. If the tokens are modified in place, |
| it is not necessary to return them with a return statement. |
| |
| Parse actions can also completely replace the given tokens, with another ``ParseResults`` |
| object, or with some entirely different object (common for parse actions that perform data |
| conversions). A convenient way to build a new parse result is to define the values |
| using a dict, and then create the return value using :class:`ParseResults.from_dict`. |
| |
| If None is passed as the ``fn`` parse action, all previously added parse actions for this |
| expression are cleared. |
| |
| Optional keyword arguments: |
| |
| - call_during_try = (default= ``False``) indicate if parse action should be run during |
| lookaheads and alternate testing. For parse actions that have side effects, it is |
| important to only call the parse action once it is determined that it is being |
| called as part of a successful parse. For parse actions that perform additional |
| validation, then call_during_try should be passed as True, so that the validation |
| code is included in the preliminary "try" parses. |
| |
| Note: the default parsing behavior is to expand tabs in the input string |
| before starting the parsing process. See :class:`parse_string` for more |
| information on parsing strings containing ``<TAB>`` s, and suggested |
| methods to maintain a consistent view of the parsed string, the parse |
| location, and line and column positions within the parsed string. |
| |
| Example:: |
| |
| # parse dates in the form YYYY/MM/DD |
| |
| # use parse action to convert toks from str to int at parse time |
| def convert_to_int(toks): |
| return int(toks[0]) |
| |
| # use a parse action to verify that the date is a valid date |
| def is_valid_date(instring, loc, toks): |
| from datetime import date |
| year, month, day = toks[::2] |
| try: |
| date(year, month, day) |
| except ValueError: |
| raise ParseException(instring, loc, "invalid date given") |
| |
| integer = Word(nums) |
| date_str = integer + '/' + integer + '/' + integer |
| |
| # add parse actions |
| integer.set_parse_action(convert_to_int) |
| date_str.set_parse_action(is_valid_date) |
| |
| # note that integer fields are now ints, not strings |
| date_str.run_tests(''' |
| # successful parse - note that integer fields were converted to ints |
| 1999/12/31 |
| |
| # fail - invalid date |
| 1999/13/31 |
| ''') |
| """ |
| if list(fns) == [None]: |
| self.parseAction = [] |
| else: |
| if not all(callable(fn) for fn in fns): |
| raise TypeError("parse actions must be callable") |
| self.parseAction = [_trim_arity(fn) for fn in fns] |
| self.callDuringTry = kwargs.get( |
| "call_during_try", kwargs.get("callDuringTry", False) |
| ) |
| return self |
| |
| def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": |
| """ |
| Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. |
| |
| See examples in :class:`copy`. |
| """ |
| self.parseAction += [_trim_arity(fn) for fn in fns] |
| self.callDuringTry = self.callDuringTry or kwargs.get( |
| "call_during_try", kwargs.get("callDuringTry", False) |
| ) |
| return self |
| |
| def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": |
| """Add a boolean predicate function to expression's list of parse actions. See |
| :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, |
| functions passed to ``add_condition`` need to return boolean success/fail of the condition. |
| |
| Optional keyword arguments: |
| |
| - message = define a custom message to be used in the raised exception |
| - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise |
| ParseException |
| - call_during_try = boolean to indicate if this method should be called during internal tryParse calls, |
| default=False |
| |
| Example:: |
| |
| integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) |
| year_int = integer.copy() |
| year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") |
| date_str = year_int + '/' + integer + '/' + integer |
| |
| result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), |
| (line:1, col:1) |
| """ |
| for fn in fns: |
| self.parseAction.append( |
| condition_as_parse_action( |
| fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False) |
| ) |
| ) |
| |
| self.callDuringTry = self.callDuringTry or kwargs.get( |
| "call_during_try", kwargs.get("callDuringTry", False) |
| ) |
| return self |
| |
| def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": |
| """ |
| Define action to perform if parsing fails at this expression. |
| Fail acton fn is a callable function that takes the arguments |
| ``fn(s, loc, expr, err)`` where: |
| |
| - s = string being parsed |
| - loc = location where expression match was attempted and failed |
| - expr = the parse expression that failed |
| - err = the exception thrown |
| |
| The function returns no value. It may throw :class:`ParseFatalException` |
| if it is desired to stop parsing immediately.""" |
| self.failAction = fn |
| return self |
| |
| def _skipIgnorables(self, instring, loc): |
| exprsFound = True |
| while exprsFound: |
| exprsFound = False |
| for e in self.ignoreExprs: |
| try: |
| while 1: |
| loc, dummy = e._parse(instring, loc) |
| exprsFound = True |
| except ParseException: |
| pass |
| return loc |
| |
| def preParse(self, instring, loc): |
| if self.ignoreExprs: |
| loc = self._skipIgnorables(instring, loc) |
| |
| if self.skipWhitespace: |
| instrlen = len(instring) |
| white_chars = self.whiteChars |
| while loc < instrlen and instring[loc] in white_chars: |
| loc += 1 |
| |
| return loc |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| return loc, [] |
| |
| def postParse(self, instring, loc, tokenlist): |
| return tokenlist |
| |
| # @profile |
| def _parseNoCache( |
| self, instring, loc, doActions=True, callPreParse=True |
| ) -> Tuple[int, ParseResults]: |
| TRY, MATCH, FAIL = 0, 1, 2 |
| debugging = self.debug # and doActions) |
| len_instring = len(instring) |
| |
| if debugging or self.failAction: |
| # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) |
| try: |
| if callPreParse and self.callPreparse: |
| pre_loc = self.preParse(instring, loc) |
| else: |
| pre_loc = loc |
| tokens_start = pre_loc |
| if self.debugActions.debug_try: |
| self.debugActions.debug_try(instring, tokens_start, self, False) |
| if self.mayIndexError or pre_loc >= len_instring: |
| try: |
| loc, tokens = self.parseImpl(instring, pre_loc, doActions) |
| except IndexError: |
| raise ParseException(instring, len_instring, self.errmsg, self) |
| else: |
| loc, tokens = self.parseImpl(instring, pre_loc, doActions) |
| except Exception as err: |
| # print("Exception raised:", err) |
| if self.debugActions.debug_fail: |
| self.debugActions.debug_fail( |
| instring, tokens_start, self, err, False |
| ) |
| if self.failAction: |
| self.failAction(instring, tokens_start, self, err) |
| raise |
| else: |
| if callPreParse and self.callPreparse: |
| pre_loc = self.preParse(instring, loc) |
| else: |
| pre_loc = loc |
| tokens_start = pre_loc |
| if self.mayIndexError or pre_loc >= len_instring: |
| try: |
| loc, tokens = self.parseImpl(instring, pre_loc, doActions) |
| except IndexError: |
| raise ParseException(instring, len_instring, self.errmsg, self) |
| else: |
| loc, tokens = self.parseImpl(instring, pre_loc, doActions) |
| |
| tokens = self.postParse(instring, loc, tokens) |
| |
| ret_tokens = ParseResults( |
| tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults |
| ) |
| if self.parseAction and (doActions or self.callDuringTry): |
| if debugging: |
| try: |
| for fn in self.parseAction: |
| try: |
| tokens = fn(instring, tokens_start, ret_tokens) |
| except IndexError as parse_action_exc: |
| exc = ParseException("exception raised in parse action") |
| raise exc from parse_action_exc |
| |
| if tokens is not None and tokens is not ret_tokens: |
| ret_tokens = ParseResults( |
| tokens, |
| self.resultsName, |
| asList=self.saveAsList |
| and isinstance(tokens, (ParseResults, list)), |
| modal=self.modalResults, |
| ) |
| except Exception as err: |
| # print "Exception raised in user parse action:", err |
| if self.debugActions.debug_fail: |
| self.debugActions.debug_fail( |
| instring, tokens_start, self, err, False |
| ) |
| raise |
| else: |
| for fn in self.parseAction: |
| try: |
| tokens = fn(instring, tokens_start, ret_tokens) |
| except IndexError as parse_action_exc: |
| exc = ParseException("exception raised in parse action") |
| raise exc from parse_action_exc |
| |
| if tokens is not None and tokens is not ret_tokens: |
| ret_tokens = ParseResults( |
| tokens, |
| self.resultsName, |
| asList=self.saveAsList |
| and isinstance(tokens, (ParseResults, list)), |
| modal=self.modalResults, |
| ) |
| if debugging: |
| # print("Matched", self, "->", ret_tokens.as_list()) |
| if self.debugActions.debug_match: |
| self.debugActions.debug_match( |
| instring, tokens_start, loc, self, ret_tokens, False |
| ) |
| |
| return loc, ret_tokens |
| |
| def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int: |
| try: |
| return self._parse(instring, loc, doActions=False)[0] |
| except ParseFatalException: |
| if raise_fatal: |
| raise |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| def can_parse_next(self, instring: str, loc: int) -> bool: |
| try: |
| self.try_parse(instring, loc) |
| except (ParseException, IndexError): |
| return False |
| else: |
| return True |
| |
| # cache for left-recursion in Forward references |
| recursion_lock = RLock() |
| recursion_memos: typing.Dict[ |
| Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] |
| ] = {} |
| |
| # argument cache for optimizing repeated calls when backtracking through recursive expressions |
| packrat_cache = ( |
| {} |
| ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail |
| packrat_cache_lock = RLock() |
| packrat_cache_stats = [0, 0] |
| |
| # this method gets repeatedly called during backtracking with the same arguments - |
| # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression |
| def _parseCache( |
| self, instring, loc, doActions=True, callPreParse=True |
| ) -> Tuple[int, ParseResults]: |
| HIT, MISS = 0, 1 |
| TRY, MATCH, FAIL = 0, 1, 2 |
| lookup = (self, instring, loc, callPreParse, doActions) |
| with ParserElement.packrat_cache_lock: |
| cache = ParserElement.packrat_cache |
| value = cache.get(lookup) |
| if value is cache.not_in_cache: |
| ParserElement.packrat_cache_stats[MISS] += 1 |
| try: |
| value = self._parseNoCache(instring, loc, doActions, callPreParse) |
| except ParseBaseException as pe: |
| # cache a copy of the exception, without the traceback |
| cache.set(lookup, pe.__class__(*pe.args)) |
| raise |
| else: |
| cache.set(lookup, (value[0], value[1].copy(), loc)) |
| return value |
| else: |
| ParserElement.packrat_cache_stats[HIT] += 1 |
| if self.debug and self.debugActions.debug_try: |
| try: |
| self.debugActions.debug_try(instring, loc, self, cache_hit=True) |
| except TypeError: |
| pass |
| if isinstance(value, Exception): |
| if self.debug and self.debugActions.debug_fail: |
| try: |
| self.debugActions.debug_fail( |
| instring, loc, self, value, cache_hit=True |
| ) |
| except TypeError: |
| pass |
| raise value |
| |
| loc_, result, endloc = value[0], value[1].copy(), value[2] |
| if self.debug and self.debugActions.debug_match: |
| try: |
| self.debugActions.debug_match( |
| instring, loc_, endloc, self, result, cache_hit=True |
| ) |
| except TypeError: |
| pass |
| |
| return loc_, result |
| |
| _parse = _parseNoCache |
| |
| @staticmethod |
| def reset_cache() -> None: |
| ParserElement.packrat_cache.clear() |
| ParserElement.packrat_cache_stats[:] = [0] * len( |
| ParserElement.packrat_cache_stats |
| ) |
| ParserElement.recursion_memos.clear() |
| |
| _packratEnabled = False |
| _left_recursion_enabled = False |
| |
| @staticmethod |
| def disable_memoization() -> None: |
| """ |
| Disables active Packrat or Left Recursion parsing and their memoization |
| |
| This method also works if neither Packrat nor Left Recursion are enabled. |
| This makes it safe to call before activating Packrat nor Left Recursion |
| to clear any previous settings. |
| """ |
| ParserElement.reset_cache() |
| ParserElement._left_recursion_enabled = False |
| ParserElement._packratEnabled = False |
| ParserElement._parse = ParserElement._parseNoCache |
| |
| @staticmethod |
| def enable_left_recursion( |
| cache_size_limit: typing.Optional[int] = None, *, force=False |
| ) -> None: |
| """ |
| Enables "bounded recursion" parsing, which allows for both direct and indirect |
| left-recursion. During parsing, left-recursive :class:`Forward` elements are |
| repeatedly matched with a fixed recursion depth that is gradually increased |
| until finding the longest match. |
| |
| Example:: |
| |
| from pip._vendor import pyparsing as pp |
| pp.ParserElement.enable_left_recursion() |
| |
| E = pp.Forward("E") |
| num = pp.Word(pp.nums) |
| # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... |
| E <<= E + '+' - num | num |
| |
| print(E.parse_string("1+2+3")) |
| |
| Recursion search naturally memoizes matches of ``Forward`` elements and may |
| thus skip reevaluation of parse actions during backtracking. This may break |
| programs with parse actions which rely on strict ordering of side-effects. |
| |
| Parameters: |
| |
| - cache_size_limit - (default=``None``) - memoize at most this many |
| ``Forward`` elements during matching; if ``None`` (the default), |
| memoize all ``Forward`` elements. |
| |
| Bounded Recursion parsing works similar but not identical to Packrat parsing, |
| thus the two cannot be used together. Use ``force=True`` to disable any |
| previous, conflicting settings. |
| """ |
| if force: |
| ParserElement.disable_memoization() |
| elif ParserElement._packratEnabled: |
| raise RuntimeError("Packrat and Bounded Recursion are not compatible") |
| if cache_size_limit is None: |
| ParserElement.recursion_memos = _UnboundedMemo() |
| elif cache_size_limit > 0: |
| ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) |
| else: |
| raise NotImplementedError("Memo size of %s" % cache_size_limit) |
| ParserElement._left_recursion_enabled = True |
| |
| @staticmethod |
| def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: |
| """ |
| Enables "packrat" parsing, which adds memoizing to the parsing logic. |
| Repeated parse attempts at the same string location (which happens |
| often in many complex grammars) can immediately return a cached value, |
| instead of re-executing parsing/validating code. Memoizing is done of |
| both valid results and parsing exceptions. |
| |
| Parameters: |
| |
| - cache_size_limit - (default= ``128``) - if an integer value is provided |
| will limit the size of the packrat cache; if None is passed, then |
| the cache size will be unbounded; if 0 is passed, the cache will |
| be effectively disabled. |
| |
| This speedup may break existing programs that use parse actions that |
| have side-effects. For this reason, packrat parsing is disabled when |
| you first import pyparsing. To activate the packrat feature, your |
| program must call the class method :class:`ParserElement.enable_packrat`. |
| For best results, call ``enable_packrat()`` immediately after |
| importing pyparsing. |
| |
| Example:: |
| |
| from pip._vendor import pyparsing |
| pyparsing.ParserElement.enable_packrat() |
| |
| Packrat parsing works similar but not identical to Bounded Recursion parsing, |
| thus the two cannot be used together. Use ``force=True`` to disable any |
| previous, conflicting settings. |
| """ |
| if force: |
| ParserElement.disable_memoization() |
| elif ParserElement._left_recursion_enabled: |
| raise RuntimeError("Packrat and Bounded Recursion are not compatible") |
| if not ParserElement._packratEnabled: |
| ParserElement._packratEnabled = True |
| if cache_size_limit is None: |
| ParserElement.packrat_cache = _UnboundedCache() |
| else: |
| ParserElement.packrat_cache = _FifoCache(cache_size_limit) |
| ParserElement._parse = ParserElement._parseCache |
| |
| def parse_string( |
| self, instring: str, parse_all: bool = False, *, parseAll: bool = False |
| ) -> ParseResults: |
| """ |
| Parse a string with respect to the parser definition. This function is intended as the primary interface to the |
| client code. |
| |
| :param instring: The input string to be parsed. |
| :param parse_all: If set, the entire input string must match the grammar. |
| :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. |
| :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. |
| :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or |
| an object with attributes if the given parser includes results names. |
| |
| If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This |
| is also equivalent to ending the grammar with :class:`StringEnd`(). |
| |
| To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are |
| converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string |
| contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string |
| being parsed, one can ensure a consistent view of the input string by doing one of the following: |
| |
| - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), |
| - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the |
| parse action's ``s`` argument, or |
| - explicitly expand the tabs in your input string before calling ``parse_string``. |
| |
| Examples: |
| |
| By default, partial matches are OK. |
| |
| >>> res = Word('a').parse_string('aaaaabaaa') |
| >>> print(res) |
| ['aaaaa'] |
| |
| The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children |
| directly to see more examples. |
| |
| It raises an exception if parse_all flag is set and instring does not match the whole grammar. |
| |
| >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) |
| Traceback (most recent call last): |
| ... |
| pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) |
| """ |
| parseAll = parse_all or parseAll |
| |
| ParserElement.reset_cache() |
| if not self.streamlined: |
| self.streamline() |
| for e in self.ignoreExprs: |
| e.streamline() |
| if not self.keepTabs: |
| instring = instring.expandtabs() |
| try: |
| loc, tokens = self._parse(instring, 0) |
| if parseAll: |
| loc = self.preParse(instring, loc) |
| se = Empty() + StringEnd() |
| se._parse(instring, loc) |
| except ParseBaseException as exc: |
| if ParserElement.verbose_stacktrace: |
| raise |
| else: |
| # catch and re-raise exception from here, clearing out pyparsing internal stack trace |
| raise exc.with_traceback(None) |
| else: |
| return tokens |
| |
| def scan_string( |
| self, |
| instring: str, |
| max_matches: int = _MAX_INT, |
| overlap: bool = False, |
| *, |
| debug: bool = False, |
| maxMatches: int = _MAX_INT, |
| ) -> Generator[Tuple[ParseResults, int, int], None, None]: |
| """ |
| Scan the input string for expression matches. Each match will return the |
| matching tokens, start location, and end location. May be called with optional |
| ``max_matches`` argument, to clip scanning after 'n' matches are found. If |
| ``overlap`` is specified, then overlapping matches will be reported. |
| |
| Note that the start and end locations are reported relative to the string |
| being parsed. See :class:`parse_string` for more information on parsing |
| strings with embedded tabs. |
| |
| Example:: |
| |
| source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" |
| print(source) |
| for tokens, start, end in Word(alphas).scan_string(source): |
| print(' '*start + '^'*(end-start)) |
| print(' '*start + tokens[0]) |
| |
| prints:: |
| |
| sldjf123lsdjjkf345sldkjf879lkjsfd987 |
| ^^^^^ |
| sldjf |
| ^^^^^^^ |
| lsdjjkf |
| ^^^^^^ |
| sldkjf |
| ^^^^^^ |
| lkjsfd |
| """ |
| maxMatches = min(maxMatches, max_matches) |
| if not self.streamlined: |
| self.streamline() |
| for e in self.ignoreExprs: |
| e.streamline() |
| |
| if not self.keepTabs: |
| instring = str(instring).expandtabs() |
| instrlen = len(instring) |
| loc = 0 |
| preparseFn = self.preParse |
| parseFn = self._parse |
| ParserElement.resetCache() |
| matches = 0 |
| try: |
| while loc <= instrlen and matches < maxMatches: |
| try: |
| preloc = preparseFn(instring, loc) |
| nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) |
| except ParseException: |
| loc = preloc + 1 |
| else: |
| if nextLoc > loc: |
| matches += 1 |
| if debug: |
| print( |
| { |
| "tokens": tokens.asList(), |
| "start": preloc, |
| "end": nextLoc, |
| } |
| ) |
| yield tokens, preloc, nextLoc |
| if overlap: |
| nextloc = preparseFn(instring, loc) |
| if nextloc > loc: |
| loc = nextLoc |
| else: |
| loc += 1 |
| else: |
| loc = nextLoc |
| else: |
| loc = preloc + 1 |
| except ParseBaseException as exc: |
| if ParserElement.verbose_stacktrace: |
| raise |
| else: |
| # catch and re-raise exception from here, clears out pyparsing internal stack trace |
| raise exc.with_traceback(None) |
| |
| def transform_string(self, instring: str, *, debug: bool = False) -> str: |
| """ |
| Extension to :class:`scan_string`, to modify matching text with modified tokens that may |
| be returned from a parse action. To use ``transform_string``, define a grammar and |
| attach a parse action to it that modifies the returned token list. |
| Invoking ``transform_string()`` on a target string will then scan for matches, |
| and replace the matched text patterns according to the logic in the parse |
| action. ``transform_string()`` returns the resulting transformed string. |
| |
| Example:: |
| |
| wd = Word(alphas) |
| wd.set_parse_action(lambda toks: toks[0].title()) |
| |
| print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) |
| |
| prints:: |
| |
| Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. |
| """ |
| out: List[str] = [] |
| lastE = 0 |
| # force preservation of <TAB>s, to minimize unwanted transformation of string, and to |
| # keep string locs straight between transform_string and scan_string |
| self.keepTabs = True |
| try: |
| for t, s, e in self.scan_string(instring, debug=debug): |
| out.append(instring[lastE:s]) |
| if t: |
| if isinstance(t, ParseResults): |
| out += t.as_list() |
| elif isinstance(t, Iterable) and not isinstance(t, str_type): |
| out.extend(t) |
| else: |
| out.append(t) |
| lastE = e |
| out.append(instring[lastE:]) |
| out = [o for o in out if o] |
| return "".join([str(s) for s in _flatten(out)]) |
| except ParseBaseException as exc: |
| if ParserElement.verbose_stacktrace: |
| raise |
| else: |
| # catch and re-raise exception from here, clears out pyparsing internal stack trace |
| raise exc.with_traceback(None) |
| |
| def search_string( |
| self, |
| instring: str, |
| max_matches: int = _MAX_INT, |
| *, |
| debug: bool = False, |
| maxMatches: int = _MAX_INT, |
| ) -> ParseResults: |
| """ |
| Another extension to :class:`scan_string`, simplifying the access to the tokens found |
| to match the given parse expression. May be called with optional |
| ``max_matches`` argument, to clip searching after 'n' matches are found. |
| |
| Example:: |
| |
| # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters |
| cap_word = Word(alphas.upper(), alphas.lower()) |
| |
| print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) |
| |
| # the sum() builtin can be used to merge results into a single ParseResults object |
| print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) |
| |
| prints:: |
| |
| [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] |
| ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] |
| """ |
| maxMatches = min(maxMatches, max_matches) |
| try: |
| return ParseResults( |
| [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] |
| ) |
| except ParseBaseException as exc: |
| if ParserElement.verbose_stacktrace: |
| raise |
| else: |
| # catch and re-raise exception from here, clears out pyparsing internal stack trace |
| raise exc.with_traceback(None) |
| |
| def split( |
| self, |
| instring: str, |
| maxsplit: int = _MAX_INT, |
| include_separators: bool = False, |
| *, |
| includeSeparators=False, |
| ) -> Generator[str, None, None]: |
| """ |
| Generator method to split a string using the given expression as a separator. |
| May be called with optional ``maxsplit`` argument, to limit the number of splits; |
| and the optional ``include_separators`` argument (default= ``False``), if the separating |
| matching text should be included in the split results. |
| |
| Example:: |
| |
| punc = one_of(list(".,;:/-!?")) |
| print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) |
| |
| prints:: |
| |
| ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] |
| """ |
| includeSeparators = includeSeparators or include_separators |
| last = 0 |
| for t, s, e in self.scan_string(instring, max_matches=maxsplit): |
| yield instring[last:s] |
| if includeSeparators: |
| yield t[0] |
| last = e |
| yield instring[last:] |
| |
| def __add__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` |
| converts them to :class:`Literal`s by default. |
| |
| Example:: |
| |
| greet = Word(alphas) + "," + Word(alphas) + "!" |
| hello = "Hello, World!" |
| print(hello, "->", greet.parse_string(hello)) |
| |
| prints:: |
| |
| Hello, World! -> ['Hello', ',', 'World', '!'] |
| |
| ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. |
| |
| Literal('start') + ... + Literal('end') |
| |
| is equivalent to: |
| |
| Literal('start') + SkipTo('end')("_skipped*") + Literal('end') |
| |
| Note that the skipped text is returned with '_skipped' as a results name, |
| and to support having multiple skips in the same parser, the value returned is |
| a list of all skipped text. |
| """ |
| if other is Ellipsis: |
| return _PendingSkip(self) |
| |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return And([self, other]) |
| |
| def __radd__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``+`` operator when left operand is not a :class:`ParserElement` |
| """ |
| if other is Ellipsis: |
| return SkipTo(self)("_skipped*") + self |
| |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return other + self |
| |
| def __sub__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``-`` operator, returns :class:`And` with error stop |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return self + And._ErrorStop() + other |
| |
| def __rsub__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``-`` operator when left operand is not a :class:`ParserElement` |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return other - self |
| |
| def __mul__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``*`` operator, allows use of ``expr * 3`` in place of |
| ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer |
| tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples |
| may also include ``None`` as in: |
| - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent |
| to ``expr*n + ZeroOrMore(expr)`` |
| (read as "at least n instances of ``expr``") |
| - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` |
| (read as "0 to n instances of ``expr``") |
| - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` |
| - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` |
| |
| Note that ``expr*(None, n)`` does not raise an exception if |
| more than n exprs exist in the input stream; that is, |
| ``expr*(None, n)`` does not enforce a maximum number of expr |
| occurrences. If this behavior is desired, then write |
| ``expr*(None, n) + ~expr`` |
| """ |
| if other is Ellipsis: |
| other = (0, None) |
| elif isinstance(other, tuple) and other[:1] == (Ellipsis,): |
| other = ((0,) + other[1:] + (None,))[:2] |
| |
| if isinstance(other, int): |
| minElements, optElements = other, 0 |
| elif isinstance(other, tuple): |
| other = tuple(o if o is not Ellipsis else None for o in other) |
| other = (other + (None, None))[:2] |
| if other[0] is None: |
| other = (0, other[1]) |
| if isinstance(other[0], int) and other[1] is None: |
| if other[0] == 0: |
| return ZeroOrMore(self) |
| if other[0] == 1: |
| return OneOrMore(self) |
| else: |
| return self * other[0] + ZeroOrMore(self) |
| elif isinstance(other[0], int) and isinstance(other[1], int): |
| minElements, optElements = other |
| optElements -= minElements |
| else: |
| raise TypeError( |
| "cannot multiply ParserElement and ({}) objects".format( |
| ",".join(type(item).__name__ for item in other) |
| ) |
| ) |
| else: |
| raise TypeError( |
| "cannot multiply ParserElement and {} objects".format( |
| type(other).__name__ |
| ) |
| ) |
| |
| if minElements < 0: |
| raise ValueError("cannot multiply ParserElement by negative value") |
| if optElements < 0: |
| raise ValueError( |
| "second tuple value must be greater or equal to first tuple value" |
| ) |
| if minElements == optElements == 0: |
| return And([]) |
| |
| if optElements: |
| |
| def makeOptionalList(n): |
| if n > 1: |
| return Opt(self + makeOptionalList(n - 1)) |
| else: |
| return Opt(self) |
| |
| if minElements: |
| if minElements == 1: |
| ret = self + makeOptionalList(optElements) |
| else: |
| ret = And([self] * minElements) + makeOptionalList(optElements) |
| else: |
| ret = makeOptionalList(optElements) |
| else: |
| if minElements == 1: |
| ret = self |
| else: |
| ret = And([self] * minElements) |
| return ret |
| |
| def __rmul__(self, other) -> "ParserElement": |
| return self.__mul__(other) |
| |
| def __or__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``|`` operator - returns :class:`MatchFirst` |
| """ |
| if other is Ellipsis: |
| return _PendingSkip(self, must_skip=True) |
| |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return MatchFirst([self, other]) |
| |
| def __ror__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``|`` operator when left operand is not a :class:`ParserElement` |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return other | self |
| |
| def __xor__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``^`` operator - returns :class:`Or` |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return Or([self, other]) |
| |
| def __rxor__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``^`` operator when left operand is not a :class:`ParserElement` |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return other ^ self |
| |
| def __and__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``&`` operator - returns :class:`Each` |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return Each([self, other]) |
| |
| def __rand__(self, other) -> "ParserElement": |
| """ |
| Implementation of ``&`` operator when left operand is not a :class:`ParserElement` |
| """ |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| if not isinstance(other, ParserElement): |
| raise TypeError( |
| "Cannot combine element of type {} with ParserElement".format( |
| type(other).__name__ |
| ) |
| ) |
| return other & self |
| |
| def __invert__(self) -> "ParserElement": |
| """ |
| Implementation of ``~`` operator - returns :class:`NotAny` |
| """ |
| return NotAny(self) |
| |
| # disable __iter__ to override legacy use of sequential access to __getitem__ to |
| # iterate over a sequence |
| __iter__ = None |
| |
| def __getitem__(self, key): |
| """ |
| use ``[]`` indexing notation as a short form for expression repetition: |
| |
| - ``expr[n]`` is equivalent to ``expr*n`` |
| - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` |
| - ``expr[n, ...]`` or ``expr[n,]`` is equivalent |
| to ``expr*n + ZeroOrMore(expr)`` |
| (read as "at least n instances of ``expr``") |
| - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` |
| (read as "0 to n instances of ``expr``") |
| - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` |
| - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` |
| |
| ``None`` may be used in place of ``...``. |
| |
| Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception |
| if more than ``n`` ``expr``s exist in the input stream. If this behavior is |
| desired, then write ``expr[..., n] + ~expr``. |
| """ |
| |
| # convert single arg keys to tuples |
| try: |
| if isinstance(key, str_type): |
| key = (key,) |
| iter(key) |
| except TypeError: |
| key = (key, key) |
| |
| if len(key) > 2: |
| raise TypeError( |
| "only 1 or 2 index arguments supported ({}{})".format( |
| key[:5], "... [{}]".format(len(key)) if len(key) > 5 else "" |
| ) |
| ) |
| |
| # clip to 2 elements |
| ret = self * tuple(key[:2]) |
| return ret |
| |
| def __call__(self, name: str = None) -> "ParserElement": |
| """ |
| Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. |
| |
| If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be |
| passed as ``True``. |
| |
| If ``name` is omitted, same as calling :class:`copy`. |
| |
| Example:: |
| |
| # these are equivalent |
| userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") |
| userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") |
| """ |
| if name is not None: |
| return self._setResultsName(name) |
| else: |
| return self.copy() |
| |
| def suppress(self) -> "ParserElement": |
| """ |
| Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from |
| cluttering up returned output. |
| """ |
| return Suppress(self) |
| |
| def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": |
| """ |
| Enables the skipping of whitespace before matching the characters in the |
| :class:`ParserElement`'s defined pattern. |
| |
| :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) |
| """ |
| self.skipWhitespace = True |
| return self |
| |
| def leave_whitespace(self, recursive: bool = True) -> "ParserElement": |
| """ |
| Disables the skipping of whitespace before matching the characters in the |
| :class:`ParserElement`'s defined pattern. This is normally only used internally by |
| the pyparsing module, but may be needed in some whitespace-sensitive grammars. |
| |
| :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) |
| """ |
| self.skipWhitespace = False |
| return self |
| |
| def set_whitespace_chars( |
| self, chars: Union[Set[str], str], copy_defaults: bool = False |
| ) -> "ParserElement": |
| """ |
| Overrides the default whitespace chars |
| """ |
| self.skipWhitespace = True |
| self.whiteChars = set(chars) |
| self.copyDefaultWhiteChars = copy_defaults |
| return self |
| |
| def parse_with_tabs(self) -> "ParserElement": |
| """ |
| Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. |
| Must be called before ``parse_string`` when the input grammar contains elements that |
| match ``<TAB>`` characters. |
| """ |
| self.keepTabs = True |
| return self |
| |
| def ignore(self, other: "ParserElement") -> "ParserElement": |
| """ |
| Define expression to be ignored (e.g., comments) while doing pattern |
| matching; may be called repeatedly, to define multiple comment or other |
| ignorable patterns. |
| |
| Example:: |
| |
| patt = Word(alphas)[1, ...] |
| patt.parse_string('ablaj /* comment */ lskjd') |
| # -> ['ablaj'] |
| |
| patt.ignore(c_style_comment) |
| patt.parse_string('ablaj /* comment */ lskjd') |
| # -> ['ablaj', 'lskjd'] |
| """ |
| import typing |
| |
| if isinstance(other, str_type): |
| other = Suppress(other) |
| |
| if isinstance(other, Suppress): |
| if other not in self.ignoreExprs: |
| self.ignoreExprs.append(other) |
| else: |
| self.ignoreExprs.append(Suppress(other.copy())) |
| return self |
| |
| def set_debug_actions( |
| self, |
| start_action: DebugStartAction, |
| success_action: DebugSuccessAction, |
| exception_action: DebugExceptionAction, |
| ) -> "ParserElement": |
| """ |
| Customize display of debugging messages while doing pattern matching: |
| |
| - ``start_action`` - method to be called when an expression is about to be parsed; |
| should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` |
| |
| - ``success_action`` - method to be called when an expression has successfully parsed; |
| should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` |
| |
| - ``exception_action`` - method to be called when expression fails to parse; |
| should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` |
| """ |
| self.debugActions = self.DebugActions( |
| start_action or _default_start_debug_action, |
| success_action or _default_success_debug_action, |
| exception_action or _default_exception_debug_action, |
| ) |
| self.debug = True |
| return self |
| |
| def set_debug(self, flag: bool = True) -> "ParserElement": |
| """ |
| Enable display of debugging messages while doing pattern matching. |
| Set ``flag`` to ``True`` to enable, ``False`` to disable. |
| |
| Example:: |
| |
| wd = Word(alphas).set_name("alphaword") |
| integer = Word(nums).set_name("numword") |
| term = wd | integer |
| |
| # turn on debugging for wd |
| wd.set_debug() |
| |
| term[1, ...].parse_string("abc 123 xyz 890") |
| |
| prints:: |
| |
| Match alphaword at loc 0(1,1) |
| Matched alphaword -> ['abc'] |
| Match alphaword at loc 3(1,4) |
| Exception raised:Expected alphaword (at char 4), (line:1, col:5) |
| Match alphaword at loc 7(1,8) |
| Matched alphaword -> ['xyz'] |
| Match alphaword at loc 11(1,12) |
| Exception raised:Expected alphaword (at char 12), (line:1, col:13) |
| Match alphaword at loc 15(1,16) |
| Exception raised:Expected alphaword (at char 15), (line:1, col:16) |
| |
| The output shown is that produced by the default debug actions - custom debug actions can be |
| specified using :class:`set_debug_actions`. Prior to attempting |
| to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` |
| is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` |
| message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, |
| which makes debugging and exception messages easier to understand - for instance, the default |
| name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. |
| """ |
| if flag: |
| self.set_debug_actions( |
| _default_start_debug_action, |
| _default_success_debug_action, |
| _default_exception_debug_action, |
| ) |
| else: |
| self.debug = False |
| return self |
| |
| @property |
| def default_name(self) -> str: |
| if self._defaultName is None: |
| self._defaultName = self._generateDefaultName() |
| return self._defaultName |
| |
| @abstractmethod |
| def _generateDefaultName(self): |
| """ |
| Child classes must define this method, which defines how the ``default_name`` is set. |
| """ |
| |
| def set_name(self, name: str) -> "ParserElement": |
| """ |
| Define name for this expression, makes debugging and exception messages clearer. |
| Example:: |
| Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) |
| Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) |
| """ |
| self.customName = name |
| self.errmsg = "Expected " + self.name |
| if __diag__.enable_debug_on_named_expressions: |
| self.set_debug() |
| return self |
| |
| @property |
| def name(self) -> str: |
| # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name |
| return self.customName if self.customName is not None else self.default_name |
| |
| def __str__(self) -> str: |
| return self.name |
| |
| def __repr__(self) -> str: |
| return str(self) |
| |
| def streamline(self) -> "ParserElement": |
| self.streamlined = True |
| self._defaultName = None |
| return self |
| |
| def recurse(self) -> Sequence["ParserElement"]: |
| return [] |
| |
| def _checkRecursion(self, parseElementList): |
| subRecCheckList = parseElementList[:] + [self] |
| for e in self.recurse(): |
| e._checkRecursion(subRecCheckList) |
| |
| def validate(self, validateTrace=None) -> None: |
| """ |
| Check defined expressions for valid structure, check for infinite recursive definitions. |
| """ |
| self._checkRecursion([]) |
| |
| def parse_file( |
| self, |
| file_or_filename: Union[str, Path, TextIO], |
| encoding: str = "utf-8", |
| parse_all: bool = False, |
| *, |
| parseAll: bool = False, |
| ) -> ParseResults: |
| """ |
| Execute the parse expression on the given file or filename. |
| If a filename is specified (instead of a file object), |
| the entire file is opened, read, and closed before parsing. |
| """ |
| parseAll = parseAll or parse_all |
| try: |
| file_contents = file_or_filename.read() |
| except AttributeError: |
| with open(file_or_filename, "r", encoding=encoding) as f: |
| file_contents = f.read() |
| try: |
| return self.parse_string(file_contents, parseAll) |
| except ParseBaseException as exc: |
| if ParserElement.verbose_stacktrace: |
| raise |
| else: |
| # catch and re-raise exception from here, clears out pyparsing internal stack trace |
| raise exc.with_traceback(None) |
| |
| def __eq__(self, other): |
| if self is other: |
| return True |
| elif isinstance(other, str_type): |
| return self.matches(other, parse_all=True) |
| elif isinstance(other, ParserElement): |
| return vars(self) == vars(other) |
| return False |
| |
| def __hash__(self): |
| return id(self) |
| |
| def matches( |
| self, test_string: str, parse_all: bool = True, *, parseAll: bool = True |
| ) -> bool: |
| """ |
| Method for quick testing of a parser against a test string. Good for simple |
| inline microtests of sub expressions while building up larger parser. |
| |
| Parameters: |
| - ``test_string`` - to test against this expression for a match |
| - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests |
| |
| Example:: |
| |
| expr = Word(nums) |
| assert expr.matches("100") |
| """ |
| parseAll = parseAll and parse_all |
| try: |
| self.parse_string(str(test_string), parse_all=parseAll) |
| return True |
| except ParseBaseException: |
| return False |
| |
| def run_tests( |
| self, |
| tests: Union[str, List[str]], |
| parse_all: bool = True, |
| comment: typing.Optional[Union["ParserElement", str]] = "#", |
| full_dump: bool = True, |
| print_results: bool = True, |
| failure_tests: bool = False, |
| post_parse: Callable[[str, ParseResults], str] = None, |
| file: typing.Optional[TextIO] = None, |
| with_line_numbers: bool = False, |
| *, |
| parseAll: bool = True, |
| fullDump: bool = True, |
| printResults: bool = True, |
| failureTests: bool = False, |
| postParse: Callable[[str, ParseResults], str] = None, |
| ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: |
| """ |
| Execute the parse expression on a series of test strings, showing each |
| test, the parsed results or where the parse failed. Quick and easy way to |
| run a parse expression against a list of sample strings. |
| |
| Parameters: |
| - ``tests`` - a list of separate test strings, or a multiline string of test strings |
| - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests |
| - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test |
| string; pass None to disable comment filtering |
| - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; |
| if False, only dump nested list |
| - ``print_results`` - (default= ``True``) prints test output to stdout |
| - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing |
| - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as |
| `fn(test_string, parse_results)` and returns a string to be added to the test output |
| - ``file`` - (default= ``None``) optional file-like object to which test output will be written; |
| if None, will default to ``sys.stdout`` |
| - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers |
| |
| Returns: a (success, results) tuple, where success indicates that all tests succeeded |
| (or failed if ``failure_tests`` is True), and the results contain a list of lines of each |
| test's output |
| |
| Example:: |
| |
| number_expr = pyparsing_common.number.copy() |
| |
| result = number_expr.run_tests(''' |
| # unsigned integer |
| 100 |
| # negative integer |
| -100 |
| # float with scientific notation |
| 6.02e23 |
| # integer with scientific notation |
| 1e-12 |
| ''') |
| print("Success" if result[0] else "Failed!") |
| |
| result = number_expr.run_tests(''' |
| # stray character |
| 100Z |
| # missing leading digit before '.' |
| -.100 |
| # too many '.' |
| 3.14.159 |
| ''', failure_tests=True) |
| print("Success" if result[0] else "Failed!") |
| |
| prints:: |
| |
| # unsigned integer |
| 100 |
| [100] |
| |
| # negative integer |
| -100 |
| [-100] |
| |
| # float with scientific notation |
| 6.02e23 |
| [6.02e+23] |
| |
| # integer with scientific notation |
| 1e-12 |
| [1e-12] |
| |
| Success |
| |
| # stray character |
| 100Z |
| ^ |
| FAIL: Expected end of text (at char 3), (line:1, col:4) |
| |
| # missing leading digit before '.' |
| -.100 |
| ^ |
| FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) |
| |
| # too many '.' |
| 3.14.159 |
| ^ |
| FAIL: Expected end of text (at char 4), (line:1, col:5) |
| |
| Success |
| |
| Each test string must be on a single line. If you want to test a string that spans multiple |
| lines, create a test like this:: |
| |
| expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") |
| |
| (Note that this is a raw string literal, you must include the leading ``'r'``.) |
| """ |
| from .testing import pyparsing_test |
| |
| parseAll = parseAll and parse_all |
| fullDump = fullDump and full_dump |
| printResults = printResults and print_results |
| failureTests = failureTests or failure_tests |
| postParse = postParse or post_parse |
| if isinstance(tests, str_type): |
| line_strip = type(tests).strip |
| tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] |
| if isinstance(comment, str_type): |
| comment = Literal(comment) |
| if file is None: |
| file = sys.stdout |
| print_ = file.write |
| |
| result: Union[ParseResults, Exception] |
| allResults = [] |
| comments = [] |
| success = True |
| NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) |
| BOM = "\ufeff" |
| for t in tests: |
| if comment is not None and comment.matches(t, False) or comments and not t: |
| comments.append( |
| pyparsing_test.with_line_numbers(t) if with_line_numbers else t |
| ) |
| continue |
| if not t: |
| continue |
| out = [ |
| "\n" + "\n".join(comments) if comments else "", |
| pyparsing_test.with_line_numbers(t) if with_line_numbers else t, |
| ] |
| comments = [] |
| try: |
| # convert newline marks to actual newlines, and strip leading BOM if present |
| t = NL.transform_string(t.lstrip(BOM)) |
| result = self.parse_string(t, parse_all=parseAll) |
| except ParseBaseException as pe: |
| fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" |
| out.append(pe.explain()) |
| out.append("FAIL: " + str(pe)) |
| if ParserElement.verbose_stacktrace: |
| out.extend(traceback.format_tb(pe.__traceback__)) |
| success = success and failureTests |
| result = pe |
| except Exception as exc: |
| out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc)) |
| if ParserElement.verbose_stacktrace: |
| out.extend(traceback.format_tb(exc.__traceback__)) |
| success = success and failureTests |
| result = exc |
| else: |
| success = success and not failureTests |
| if postParse is not None: |
| try: |
| pp_value = postParse(t, result) |
| if pp_value is not None: |
| if isinstance(pp_value, ParseResults): |
| out.append(pp_value.dump()) |
| else: |
| out.append(str(pp_value)) |
| else: |
| out.append(result.dump()) |
| except Exception as e: |
| out.append(result.dump(full=fullDump)) |
| out.append( |
| "{} failed: {}: {}".format( |
| postParse.__name__, type(e).__name__, e |
| ) |
| ) |
| else: |
| out.append(result.dump(full=fullDump)) |
| out.append("") |
| |
| if printResults: |
| print_("\n".join(out)) |
| |
| allResults.append((t, result)) |
| |
| return success, allResults |
| |
| def create_diagram( |
| self, |
| output_html: Union[TextIO, Path, str], |
| vertical: int = 3, |
| show_results_names: bool = False, |
| show_groups: bool = False, |
| **kwargs, |
| ) -> None: |
| """ |
| Create a railroad diagram for the parser. |
| |
| Parameters: |
| - output_html (str or file-like object) - output target for generated |
| diagram HTML |
| - vertical (int) - threshold for formatting multiple alternatives vertically |
| instead of horizontally (default=3) |
| - show_results_names - bool flag whether diagram should show annotations for |
| defined results names |
| - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box |
| Additional diagram-formatting keyword arguments can also be included; |
| see railroad.Diagram class. |
| """ |
| |
| try: |
| from .diagram import to_railroad, railroad_to_html |
| except ImportError as ie: |
| raise Exception( |
| "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" |
| ) from ie |
| |
| self.streamline() |
| |
| railroad = to_railroad( |
| self, |
| vertical=vertical, |
| show_results_names=show_results_names, |
| show_groups=show_groups, |
| diagram_kwargs=kwargs, |
| ) |
| if isinstance(output_html, (str, Path)): |
| with open(output_html, "w", encoding="utf-8") as diag_file: |
| diag_file.write(railroad_to_html(railroad)) |
| else: |
| # we were passed a file-like object, just write to it |
| output_html.write(railroad_to_html(railroad)) |
| |
| setDefaultWhitespaceChars = set_default_whitespace_chars |
| inlineLiteralsUsing = inline_literals_using |
| setResultsName = set_results_name |
| setBreak = set_break |
| setParseAction = set_parse_action |
| addParseAction = add_parse_action |
| addCondition = add_condition |
| setFailAction = set_fail_action |
| tryParse = try_parse |
| canParseNext = can_parse_next |
| resetCache = reset_cache |
| enableLeftRecursion = enable_left_recursion |
| enablePackrat = enable_packrat |
| parseString = parse_string |
| scanString = scan_string |
| searchString = search_string |
| transformString = transform_string |
| setWhitespaceChars = set_whitespace_chars |
| parseWithTabs = parse_with_tabs |
| setDebugActions = set_debug_actions |
| setDebug = set_debug |
| defaultName = default_name |
| setName = set_name |
| parseFile = parse_file |
| runTests = run_tests |
| ignoreWhitespace = ignore_whitespace |
| leaveWhitespace = leave_whitespace |
| |
| |
| class _PendingSkip(ParserElement): |
| # internal placeholder class to hold a place were '...' is added to a parser element, |
| # once another ParserElement is added, this placeholder will be replaced with a SkipTo |
| def __init__(self, expr: ParserElement, must_skip: bool = False): |
| super().__init__() |
| self.anchor = expr |
| self.must_skip = must_skip |
| |
| def _generateDefaultName(self): |
| return str(self.anchor + Empty()).replace("Empty", "...") |
| |
| def __add__(self, other) -> "ParserElement": |
| skipper = SkipTo(other).set_name("...")("_skipped*") |
| if self.must_skip: |
| |
| def must_skip(t): |
| if not t._skipped or t._skipped.as_list() == [""]: |
| del t[0] |
| t.pop("_skipped", None) |
| |
| def show_skip(t): |
| if t._skipped.as_list()[-1:] == [""]: |
| t.pop("_skipped") |
| t["_skipped"] = "missing <" + repr(self.anchor) + ">" |
| |
| return ( |
| self.anchor + skipper().add_parse_action(must_skip) |
| | skipper().add_parse_action(show_skip) |
| ) + other |
| |
| return self.anchor + skipper + other |
| |
| def __repr__(self): |
| return self.defaultName |
| |
| def parseImpl(self, *args): |
| raise Exception( |
| "use of `...` expression without following SkipTo target expression" |
| ) |
| |
| |
| class Token(ParserElement): |
| """Abstract :class:`ParserElement` subclass, for defining atomic |
| matching patterns. |
| """ |
| |
| def __init__(self): |
| super().__init__(savelist=False) |
| |
| def _generateDefaultName(self): |
| return type(self).__name__ |
| |
| |
| class Empty(Token): |
| """ |
| An empty token, will always match. |
| """ |
| |
| def __init__(self): |
| super().__init__() |
| self.mayReturnEmpty = True |
| self.mayIndexError = False |
| |
| |
| class NoMatch(Token): |
| """ |
| A token that will never match. |
| """ |
| |
| def __init__(self): |
| super().__init__() |
| self.mayReturnEmpty = True |
| self.mayIndexError = False |
| self.errmsg = "Unmatchable token" |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class Literal(Token): |
| """ |
| Token to exactly match a specified string. |
| |
| Example:: |
| |
| Literal('blah').parse_string('blah') # -> ['blah'] |
| Literal('blah').parse_string('blahfooblah') # -> ['blah'] |
| Literal('blah').parse_string('bla') # -> Exception: Expected "blah" |
| |
| For case-insensitive matching, use :class:`CaselessLiteral`. |
| |
| For keyword matching (force word break before and after the matched string), |
| use :class:`Keyword` or :class:`CaselessKeyword`. |
| """ |
| |
| def __init__(self, match_string: str = "", *, matchString: str = ""): |
| super().__init__() |
| match_string = matchString or match_string |
| self.match = match_string |
| self.matchLen = len(match_string) |
| try: |
| self.firstMatchChar = match_string[0] |
| except IndexError: |
| raise ValueError("null string passed to Literal; use Empty() instead") |
| self.errmsg = "Expected " + self.name |
| self.mayReturnEmpty = False |
| self.mayIndexError = False |
| |
| # Performance tuning: modify __class__ to select |
| # a parseImpl optimized for single-character check |
| if self.matchLen == 1 and type(self) is Literal: |
| self.__class__ = _SingleCharLiteral |
| |
| def _generateDefaultName(self): |
| return repr(self.match) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if instring[loc] == self.firstMatchChar and instring.startswith( |
| self.match, loc |
| ): |
| return loc + self.matchLen, self.match |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class _SingleCharLiteral(Literal): |
| def parseImpl(self, instring, loc, doActions=True): |
| if instring[loc] == self.firstMatchChar: |
| return loc + 1, self.match |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| ParserElement._literalStringClass = Literal |
| |
| |
| class Keyword(Token): |
| """ |
| Token to exactly match a specified string as a keyword, that is, |
| it must be immediately followed by a non-keyword character. Compare |
| with :class:`Literal`: |
| |
| - ``Literal("if")`` will match the leading ``'if'`` in |
| ``'ifAndOnlyIf'``. |
| - ``Keyword("if")`` will not; it will only match the leading |
| ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` |
| |
| Accepts two optional constructor arguments in addition to the |
| keyword string: |
| |
| - ``identChars`` is a string of characters that would be valid |
| identifier characters, defaulting to all alphanumerics + "_" and |
| "$" |
| - ``caseless`` allows case-insensitive matching, default is ``False``. |
| |
| Example:: |
| |
| Keyword("start").parse_string("start") # -> ['start'] |
| Keyword("start").parse_string("starting") # -> Exception |
| |
| For case-insensitive matching, use :class:`CaselessKeyword`. |
| """ |
| |
| DEFAULT_KEYWORD_CHARS = alphanums + "_$" |
| |
| def __init__( |
| self, |
| match_string: str = "", |
| ident_chars: typing.Optional[str] = None, |
| caseless: bool = False, |
| *, |
| matchString: str = "", |
| identChars: typing.Optional[str] = None, |
| ): |
| super().__init__() |
| identChars = identChars or ident_chars |
| if identChars is None: |
| identChars = Keyword.DEFAULT_KEYWORD_CHARS |
| match_string = matchString or match_string |
| self.match = match_string |
| self.matchLen = len(match_string) |
| try: |
| self.firstMatchChar = match_string[0] |
| except IndexError: |
| raise ValueError("null string passed to Keyword; use Empty() instead") |
| self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) |
| self.mayReturnEmpty = False |
| self.mayIndexError = False |
| self.caseless = caseless |
| if caseless: |
| self.caselessmatch = match_string.upper() |
| identChars = identChars.upper() |
| self.identChars = set(identChars) |
| |
| def _generateDefaultName(self): |
| return repr(self.match) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| errmsg = self.errmsg |
| errloc = loc |
| if self.caseless: |
| if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: |
| if loc == 0 or instring[loc - 1].upper() not in self.identChars: |
| if ( |
| loc >= len(instring) - self.matchLen |
| or instring[loc + self.matchLen].upper() not in self.identChars |
| ): |
| return loc + self.matchLen, self.match |
| else: |
| # followed by keyword char |
| errmsg += ", was immediately followed by keyword character" |
| errloc = loc + self.matchLen |
| else: |
| # preceded by keyword char |
| errmsg += ", keyword was immediately preceded by keyword character" |
| errloc = loc - 1 |
| # else no match just raise plain exception |
| |
| else: |
| if ( |
| instring[loc] == self.firstMatchChar |
| and self.matchLen == 1 |
| or instring.startswith(self.match, loc) |
| ): |
| if loc == 0 or instring[loc - 1] not in self.identChars: |
| if ( |
| loc >= len(instring) - self.matchLen |
| or instring[loc + self.matchLen] not in self.identChars |
| ): |
| return loc + self.matchLen, self.match |
| else: |
| # followed by keyword char |
| errmsg += ( |
| ", keyword was immediately followed by keyword character" |
| ) |
| errloc = loc + self.matchLen |
| else: |
| # preceded by keyword char |
| errmsg += ", keyword was immediately preceded by keyword character" |
| errloc = loc - 1 |
| # else no match just raise plain exception |
| |
| raise ParseException(instring, errloc, errmsg, self) |
| |
| @staticmethod |
| def set_default_keyword_chars(chars) -> None: |
| """ |
| Overrides the default characters used by :class:`Keyword` expressions. |
| """ |
| Keyword.DEFAULT_KEYWORD_CHARS = chars |
| |
| setDefaultKeywordChars = set_default_keyword_chars |
| |
| |
| class CaselessLiteral(Literal): |
| """ |
| Token to match a specified string, ignoring case of letters. |
| Note: the matched results will always be in the case of the given |
| match string, NOT the case of the input text. |
| |
| Example:: |
| |
| CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") |
| # -> ['CMD', 'CMD', 'CMD'] |
| |
| (Contrast with example for :class:`CaselessKeyword`.) |
| """ |
| |
| def __init__(self, match_string: str = "", *, matchString: str = ""): |
| match_string = matchString or match_string |
| super().__init__(match_string.upper()) |
| # Preserve the defining literal. |
| self.returnString = match_string |
| self.errmsg = "Expected " + self.name |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if instring[loc : loc + self.matchLen].upper() == self.match: |
| return loc + self.matchLen, self.returnString |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class CaselessKeyword(Keyword): |
| """ |
| Caseless version of :class:`Keyword`. |
| |
| Example:: |
| |
| CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") |
| # -> ['CMD', 'CMD'] |
| |
| (Contrast with example for :class:`CaselessLiteral`.) |
| """ |
| |
| def __init__( |
| self, |
| match_string: str = "", |
| ident_chars: typing.Optional[str] = None, |
| *, |
| matchString: str = "", |
| identChars: typing.Optional[str] = None, |
| ): |
| identChars = identChars or ident_chars |
| match_string = matchString or match_string |
| super().__init__(match_string, identChars, caseless=True) |
| |
| |
| class CloseMatch(Token): |
| """A variation on :class:`Literal` which matches "close" matches, |
| that is, strings with at most 'n' mismatching characters. |
| :class:`CloseMatch` takes parameters: |
| |
| - ``match_string`` - string to be matched |
| - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters |
| - ``max_mismatches`` - (``default=1``) maximum number of |
| mismatches allowed to count as a match |
| |
| The results from a successful parse will contain the matched text |
| from the input string and the following named results: |
| |
| - ``mismatches`` - a list of the positions within the |
| match_string where mismatches were found |
| - ``original`` - the original match_string used to compare |
| against the input string |
| |
| If ``mismatches`` is an empty list, then the match was an exact |
| match. |
| |
| Example:: |
| |
| patt = CloseMatch("ATCATCGAATGGA") |
| patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) |
| patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) |
| |
| # exact match |
| patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) |
| |
| # close match allowing up to 2 mismatches |
| patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) |
| patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) |
| """ |
| |
| def __init__( |
| self, |
| match_string: str, |
| max_mismatches: int = None, |
| *, |
| maxMismatches: int = 1, |
| caseless=False, |
| ): |
| maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches |
| super().__init__() |
| self.match_string = match_string |
| self.maxMismatches = maxMismatches |
| self.errmsg = "Expected {!r} (with up to {} mismatches)".format( |
| self.match_string, self.maxMismatches |
| ) |
| self.caseless = caseless |
| self.mayIndexError = False |
| self.mayReturnEmpty = False |
| |
| def _generateDefaultName(self): |
| return "{}:{!r}".format(type(self).__name__, self.match_string) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| start = loc |
| instrlen = len(instring) |
| maxloc = start + len(self.match_string) |
| |
| if maxloc <= instrlen: |
| match_string = self.match_string |
| match_stringloc = 0 |
| mismatches = [] |
| maxMismatches = self.maxMismatches |
| |
| for match_stringloc, s_m in enumerate( |
| zip(instring[loc:maxloc], match_string) |
| ): |
| src, mat = s_m |
| if self.caseless: |
| src, mat = src.lower(), mat.lower() |
| |
| if src != mat: |
| mismatches.append(match_stringloc) |
| if len(mismatches) > maxMismatches: |
| break |
| else: |
| loc = start + match_stringloc + 1 |
| results = ParseResults([instring[start:loc]]) |
| results["original"] = match_string |
| results["mismatches"] = mismatches |
| return loc, results |
| |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class Word(Token): |
| """Token for matching words composed of allowed character sets. |
| Parameters: |
| - ``init_chars`` - string of all characters that should be used to |
| match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; |
| if ``body_chars`` is also specified, then this is the string of |
| initial characters |
| - ``body_chars`` - string of characters that |
| can be used for matching after a matched initial character as |
| given in ``init_chars``; if omitted, same as the initial characters |
| (default=``None``) |
| - ``min`` - minimum number of characters to match (default=1) |
| - ``max`` - maximum number of characters to match (default=0) |
| - ``exact`` - exact number of characters to match (default=0) |
| - ``as_keyword`` - match as a keyword (default=``False``) |
| - ``exclude_chars`` - characters that might be |
| found in the input ``body_chars`` string but which should not be |
| accepted for matching ;useful to define a word of all |
| printables except for one or two characters, for instance |
| (default=``None``) |
| |
| :class:`srange` is useful for defining custom character set strings |
| for defining :class:`Word` expressions, using range notation from |
| regular expression character sets. |
| |
| A common mistake is to use :class:`Word` to match a specific literal |
| string, as in ``Word("Address")``. Remember that :class:`Word` |
| uses the string argument to define *sets* of matchable characters. |
| This expression would match "Add", "AAA", "dAred", or any other word |
| made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an |
| exact literal string, use :class:`Literal` or :class:`Keyword`. |
| |
| pyparsing includes helper strings for building Words: |
| |
| - :class:`alphas` |
| - :class:`nums` |
| - :class:`alphanums` |
| - :class:`hexnums` |
| - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 |
| - accented, tilded, umlauted, etc.) |
| - :class:`punc8bit` (non-alphabetic characters in ASCII range |
| 128-255 - currency, symbols, superscripts, diacriticals, etc.) |
| - :class:`printables` (any non-whitespace character) |
| |
| ``alphas``, ``nums``, and ``printables`` are also defined in several |
| Unicode sets - see :class:`pyparsing_unicode``. |
| |
| Example:: |
| |
| # a word composed of digits |
| integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) |
| |
| # a word with a leading capital, and zero or more lowercase |
| capital_word = Word(alphas.upper(), alphas.lower()) |
| |
| # hostnames are alphanumeric, with leading alpha, and '-' |
| hostname = Word(alphas, alphanums + '-') |
| |
| # roman numeral (not a strict parser, accepts invalid mix of characters) |
| roman = Word("IVXLCDM") |
| |
| # any string of non-whitespace characters, except for ',' |
| csv_value = Word(printables, exclude_chars=",") |
| """ |
| |
| def __init__( |
| self, |
| init_chars: str = "", |
| body_chars: typing.Optional[str] = None, |
| min: int = 1, |
| max: int = 0, |
| exact: int = 0, |
| as_keyword: bool = False, |
| exclude_chars: typing.Optional[str] = None, |
| *, |
| initChars: typing.Optional[str] = None, |
| bodyChars: typing.Optional[str] = None, |
| asKeyword: bool = False, |
| excludeChars: typing.Optional[str] = None, |
| ): |
| initChars = initChars or init_chars |
| bodyChars = bodyChars or body_chars |
| asKeyword = asKeyword or as_keyword |
| excludeChars = excludeChars or exclude_chars |
| super().__init__() |
| if not initChars: |
| raise ValueError( |
| "invalid {}, initChars cannot be empty string".format( |
| type(self).__name__ |
| ) |
| ) |
| |
| initChars = set(initChars) |
| self.initChars = initChars |
| if excludeChars: |
| excludeChars = set(excludeChars) |
| initChars -= excludeChars |
| if bodyChars: |
| bodyChars = set(bodyChars) - excludeChars |
| self.initCharsOrig = "".join(sorted(initChars)) |
| |
| if bodyChars: |
| self.bodyCharsOrig = "".join(sorted(bodyChars)) |
| self.bodyChars = set(bodyChars) |
| else: |
| self.bodyCharsOrig = "".join(sorted(initChars)) |
| self.bodyChars = set(initChars) |
| |
| self.maxSpecified = max > 0 |
| |
| if min < 1: |
| raise ValueError( |
| "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" |
| ) |
| |
| self.minLen = min |
| |
| if max > 0: |
| self.maxLen = max |
| else: |
| self.maxLen = _MAX_INT |
| |
| if exact > 0: |
| self.maxLen = exact |
| self.minLen = exact |
| |
| self.errmsg = "Expected " + self.name |
| self.mayIndexError = False |
| self.asKeyword = asKeyword |
| |
| # see if we can make a regex for this Word |
| if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): |
| if self.bodyChars == self.initChars: |
| if max == 0: |
| repeat = "+" |
| elif max == 1: |
| repeat = "" |
| else: |
| repeat = "{{{},{}}}".format( |
| self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen |
| ) |
| self.reString = "[{}]{}".format( |
| _collapse_string_to_ranges(self.initChars), |
| repeat, |
| ) |
| elif len(self.initChars) == 1: |
| if max == 0: |
| repeat = "*" |
| else: |
| repeat = "{{0,{}}}".format(max - 1) |
| self.reString = "{}[{}]{}".format( |
| re.escape(self.initCharsOrig), |
| _collapse_string_to_ranges(self.bodyChars), |
| repeat, |
| ) |
| else: |
| if max == 0: |
| repeat = "*" |
| elif max == 2: |
| repeat = "" |
| else: |
| repeat = "{{0,{}}}".format(max - 1) |
| self.reString = "[{}][{}]{}".format( |
| _collapse_string_to_ranges(self.initChars), |
| _collapse_string_to_ranges(self.bodyChars), |
| repeat, |
| ) |
| if self.asKeyword: |
| self.reString = r"\b" + self.reString + r"\b" |
| |
| try: |
| self.re = re.compile(self.reString) |
| except re.error: |
| self.re = None |
| else: |
| self.re_match = self.re.match |
| self.__class__ = _WordRegex |
| |
| def _generateDefaultName(self): |
| def charsAsStr(s): |
| max_repr_len = 16 |
| s = _collapse_string_to_ranges(s, re_escape=False) |
| if len(s) > max_repr_len: |
| return s[: max_repr_len - 3] + "..." |
| else: |
| return s |
| |
| if self.initChars != self.bodyChars: |
| base = "W:({}, {})".format( |
| charsAsStr(self.initChars), charsAsStr(self.bodyChars) |
| ) |
| else: |
| base = "W:({})".format(charsAsStr(self.initChars)) |
| |
| # add length specification |
| if self.minLen > 1 or self.maxLen != _MAX_INT: |
| if self.minLen == self.maxLen: |
| if self.minLen == 1: |
| return base[2:] |
| else: |
| return base + "{{{}}}".format(self.minLen) |
| elif self.maxLen == _MAX_INT: |
| return base + "{{{},...}}".format(self.minLen) |
| else: |
| return base + "{{{},{}}}".format(self.minLen, self.maxLen) |
| return base |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if instring[loc] not in self.initChars: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| start = loc |
| loc += 1 |
| instrlen = len(instring) |
| bodychars = self.bodyChars |
| maxloc = start + self.maxLen |
| maxloc = min(maxloc, instrlen) |
| while loc < maxloc and instring[loc] in bodychars: |
| loc += 1 |
| |
| throwException = False |
| if loc - start < self.minLen: |
| throwException = True |
| elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: |
| throwException = True |
| elif self.asKeyword: |
| if ( |
| start > 0 |
| and instring[start - 1] in bodychars |
| or loc < instrlen |
| and instring[loc] in bodychars |
| ): |
| throwException = True |
| |
| if throwException: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| return loc, instring[start:loc] |
| |
| |
| class _WordRegex(Word): |
| def parseImpl(self, instring, loc, doActions=True): |
| result = self.re_match(instring, loc) |
| if not result: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| loc = result.end() |
| return loc, result.group() |
| |
| |
| class Char(_WordRegex): |
| """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, |
| when defining a match of any single character in a string of |
| characters. |
| """ |
| |
| def __init__( |
| self, |
| charset: str, |
| as_keyword: bool = False, |
| exclude_chars: typing.Optional[str] = None, |
| *, |
| asKeyword: bool = False, |
| excludeChars: typing.Optional[str] = None, |
| ): |
| asKeyword = asKeyword or as_keyword |
| excludeChars = excludeChars or exclude_chars |
| super().__init__( |
| charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars |
| ) |
| self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars)) |
| if asKeyword: |
| self.reString = r"\b{}\b".format(self.reString) |
| self.re = re.compile(self.reString) |
| self.re_match = self.re.match |
| |
| |
| class Regex(Token): |
| r"""Token for matching strings that match a given regular |
| expression. Defined with string specifying the regular expression in |
| a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. |
| If the given regex contains named groups (defined using ``(?P<name>...)``), |
| these will be preserved as named :class:`ParseResults`. |
| |
| If instead of the Python stdlib ``re`` module you wish to use a different RE module |
| (such as the ``regex`` module), you can do so by building your ``Regex`` object with |
| a compiled RE that was compiled using ``regex``. |
| |
| Example:: |
| |
| realnum = Regex(r"[+-]?\d+\.\d*") |
| # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression |
| roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") |
| |
| # named fields in a regex will be returned as named results |
| date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') |
| |
| # the Regex class will accept re's compiled using the regex module |
| import regex |
| parser = pp.Regex(regex.compile(r'[0-9]')) |
| """ |
| |
| def __init__( |
| self, |
| pattern: Any, |
| flags: Union[re.RegexFlag, int] = 0, |
| as_group_list: bool = False, |
| as_match: bool = False, |
| *, |
| asGroupList: bool = False, |
| asMatch: bool = False, |
| ): |
| """The parameters ``pattern`` and ``flags`` are passed |
| to the ``re.compile()`` function as-is. See the Python |
| `re module <https://docs.python.org/3/library/re.html>`_ module for an |
| explanation of the acceptable patterns and flags. |
| """ |
| super().__init__() |
| asGroupList = asGroupList or as_group_list |
| asMatch = asMatch or as_match |
| |
| if isinstance(pattern, str_type): |
| if not pattern: |
| raise ValueError("null string passed to Regex; use Empty() instead") |
| |
| self._re = None |
| self.reString = self.pattern = pattern |
| self.flags = flags |
| |
| elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): |
| self._re = pattern |
| self.pattern = self.reString = pattern.pattern |
| self.flags = flags |
| |
| else: |
| raise TypeError( |
| "Regex may only be constructed with a string or a compiled RE object" |
| ) |
| |
| self.errmsg = "Expected " + self.name |
| self.mayIndexError = False |
| self.asGroupList = asGroupList |
| self.asMatch = asMatch |
| if self.asGroupList: |
| self.parseImpl = self.parseImplAsGroupList |
| if self.asMatch: |
| self.parseImpl = self.parseImplAsMatch |
| |
| @cached_property |
| def re(self): |
| if self._re: |
| return self._re |
| else: |
| try: |
| return re.compile(self.pattern, self.flags) |
| except re.error: |
| raise ValueError( |
| "invalid pattern ({!r}) passed to Regex".format(self.pattern) |
| ) |
| |
| @cached_property |
| def re_match(self): |
| return self.re.match |
| |
| @cached_property |
| def mayReturnEmpty(self): |
| return self.re_match("") is not None |
| |
| def _generateDefaultName(self): |
| return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| result = self.re_match(instring, loc) |
| if not result: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| loc = result.end() |
| ret = ParseResults(result.group()) |
| d = result.groupdict() |
| if d: |
| for k, v in d.items(): |
| ret[k] = v |
| return loc, ret |
| |
| def parseImplAsGroupList(self, instring, loc, doActions=True): |
| result = self.re_match(instring, loc) |
| if not result: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| loc = result.end() |
| ret = result.groups() |
| return loc, ret |
| |
| def parseImplAsMatch(self, instring, loc, doActions=True): |
| result = self.re_match(instring, loc) |
| if not result: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| loc = result.end() |
| ret = result |
| return loc, ret |
| |
| def sub(self, repl: str) -> ParserElement: |
| r""" |
| Return :class:`Regex` with an attached parse action to transform the parsed |
| result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. |
| |
| Example:: |
| |
| make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") |
| print(make_html.transform_string("h1:main title:")) |
| # prints "<h1>main title</h1>" |
| """ |
| if self.asGroupList: |
| raise TypeError("cannot use sub() with Regex(asGroupList=True)") |
| |
| if self.asMatch and callable(repl): |
| raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)") |
| |
| if self.asMatch: |
| |
| def pa(tokens): |
| return tokens[0].expand(repl) |
| |
| else: |
| |
| def pa(tokens): |
| return self.re.sub(repl, tokens[0]) |
| |
| return self.add_parse_action(pa) |
| |
| |
| class QuotedString(Token): |
| r""" |
| Token for matching strings that are delimited by quoting characters. |
| |
| Defined with the following parameters: |
| |
| - ``quote_char`` - string of one or more characters defining the |
| quote delimiting string |
| - ``esc_char`` - character to re_escape quotes, typically backslash |
| (default= ``None``) |
| - ``esc_quote`` - special quote sequence to re_escape an embedded quote |
| string (such as SQL's ``""`` to re_escape an embedded ``"``) |
| (default= ``None``) |
| - ``multiline`` - boolean indicating whether quotes can span |
| multiple lines (default= ``False``) |
| - ``unquote_results`` - boolean indicating whether the matched text |
| should be unquoted (default= ``True``) |
| - ``end_quote_char`` - string of one or more characters defining the |
| end of the quote delimited string (default= ``None`` => same as |
| quote_char) |
| - ``convert_whitespace_escapes`` - convert escaped whitespace |
| (``'\t'``, ``'\n'``, etc.) to actual whitespace |
| (default= ``True``) |
| |
| Example:: |
| |
| qs = QuotedString('"') |
| print(qs.search_string('lsjdf "This is the quote" sldjf')) |
| complex_qs = QuotedString('{{', end_quote_char='}}') |
| print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) |
| sql_qs = QuotedString('"', esc_quote='""') |
| print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) |
| |
| prints:: |
| |
| [['This is the quote']] |
| [['This is the "quote"']] |
| [['This is the quote with "embedded" quotes']] |
| """ |
| ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")) |
| |
| def __init__( |
| self, |
| quote_char: str = "", |
| esc_char: typing.Optional[str] = None, |
| esc_quote: typing.Optional[str] = None, |
| multiline: bool = False, |
| unquote_results: bool = True, |
| end_quote_char: typing.Optional[str] = None, |
| convert_whitespace_escapes: bool = True, |
| *, |
| quoteChar: str = "", |
| escChar: typing.Optional[str] = None, |
| escQuote: typing.Optional[str] = None, |
| unquoteResults: bool = True, |
| endQuoteChar: typing.Optional[str] = None, |
| convertWhitespaceEscapes: bool = True, |
| ): |
| super().__init__() |
| escChar = escChar or esc_char |
| escQuote = escQuote or esc_quote |
| unquoteResults = unquoteResults and unquote_results |
| endQuoteChar = endQuoteChar or end_quote_char |
| convertWhitespaceEscapes = ( |
| convertWhitespaceEscapes and convert_whitespace_escapes |
| ) |
| quote_char = quoteChar or quote_char |
| |
| # remove white space from quote chars - wont work anyway |
| quote_char = quote_char.strip() |
| if not quote_char: |
| raise ValueError("quote_char cannot be the empty string") |
| |
| if endQuoteChar is None: |
| endQuoteChar = quote_char |
| else: |
| endQuoteChar = endQuoteChar.strip() |
| if not endQuoteChar: |
| raise ValueError("endQuoteChar cannot be the empty string") |
| |
| self.quoteChar = quote_char |
| self.quoteCharLen = len(quote_char) |
| self.firstQuoteChar = quote_char[0] |
| self.endQuoteChar = endQuoteChar |
| self.endQuoteCharLen = len(endQuoteChar) |
| self.escChar = escChar |
| self.escQuote = escQuote |
| self.unquoteResults = unquoteResults |
| self.convertWhitespaceEscapes = convertWhitespaceEscapes |
| |
| sep = "" |
| inner_pattern = "" |
| |
| if escQuote: |
| inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote)) |
| sep = "|" |
| |
| if escChar: |
| inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar)) |
| sep = "|" |
| self.escCharReplacePattern = re.escape(self.escChar) + "(.)" |
| |
| if len(self.endQuoteChar) > 1: |
| inner_pattern += ( |
| "{}(?:".format(sep) |
| + "|".join( |
| "(?:{}(?!{}))".format( |
| re.escape(self.endQuoteChar[:i]), |
| re.escape(self.endQuoteChar[i:]), |
| ) |
| for i in range(len(self.endQuoteChar) - 1, 0, -1) |
| ) |
| + ")" |
| ) |
| sep = "|" |
| |
| if multiline: |
| self.flags = re.MULTILINE | re.DOTALL |
| inner_pattern += r"{}(?:[^{}{}])".format( |
| sep, |
| _escape_regex_range_chars(self.endQuoteChar[0]), |
| (_escape_regex_range_chars(escChar) if escChar is not None else ""), |
| ) |
| else: |
| self.flags = 0 |
| inner_pattern += r"{}(?:[^{}\n\r{}])".format( |
| sep, |
| _escape_regex_range_chars(self.endQuoteChar[0]), |
| (_escape_regex_range_chars(escChar) if escChar is not None else ""), |
| ) |
| |
| self.pattern = "".join( |
| [ |
| re.escape(self.quoteChar), |
| "(?:", |
| inner_pattern, |
| ")*", |
| re.escape(self.endQuoteChar), |
| ] |
| ) |
| |
| try: |
| self.re = re.compile(self.pattern, self.flags) |
| self.reString = self.pattern |
| self.re_match = self.re.match |
| except re.error: |
| raise ValueError( |
| "invalid pattern {!r} passed to Regex".format(self.pattern) |
| ) |
| |
| self.errmsg = "Expected " + self.name |
| self.mayIndexError = False |
| self.mayReturnEmpty = True |
| |
| def _generateDefaultName(self): |
| if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): |
| return "string enclosed in {!r}".format(self.quoteChar) |
| |
| return "quoted string, starting with {} ending with {}".format( |
| self.quoteChar, self.endQuoteChar |
| ) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| result = ( |
| instring[loc] == self.firstQuoteChar |
| and self.re_match(instring, loc) |
| or None |
| ) |
| if not result: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| loc = result.end() |
| ret = result.group() |
| |
| if self.unquoteResults: |
| |
| # strip off quotes |
| ret = ret[self.quoteCharLen : -self.endQuoteCharLen] |
| |
| if isinstance(ret, str_type): |
| # replace escaped whitespace |
| if "\\" in ret and self.convertWhitespaceEscapes: |
| for wslit, wschar in self.ws_map: |
| ret = ret.replace(wslit, wschar) |
| |
| # replace escaped characters |
| if self.escChar: |
| ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) |
| |
| # replace escaped quotes |
| if self.escQuote: |
| ret = ret.replace(self.escQuote, self.endQuoteChar) |
| |
| return loc, ret |
| |
| |
| class CharsNotIn(Token): |
| """Token for matching words composed of characters *not* in a given |
| set (will include whitespace in matched characters if not listed in |
| the provided exclusion set - see example). Defined with string |
| containing all disallowed characters, and an optional minimum, |
| maximum, and/or exact length. The default value for ``min`` is |
| 1 (a minimum value < 1 is not valid); the default values for |
| ``max`` and ``exact`` are 0, meaning no maximum or exact |
| length restriction. |
| |
| Example:: |
| |
| # define a comma-separated-value as anything that is not a ',' |
| csv_value = CharsNotIn(',') |
| print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) |
| |
| prints:: |
| |
| ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] |
| """ |
| |
| def __init__( |
| self, |
| not_chars: str = "", |
| min: int = 1, |
| max: int = 0, |
| exact: int = 0, |
| *, |
| notChars: str = "", |
| ): |
| super().__init__() |
| self.skipWhitespace = False |
| self.notChars = not_chars or notChars |
| self.notCharsSet = set(self.notChars) |
| |
| if min < 1: |
| raise ValueError( |
| "cannot specify a minimum length < 1; use " |
| "Opt(CharsNotIn()) if zero-length char group is permitted" |
| ) |
| |
| self.minLen = min |
| |
| if max > 0: |
| self.maxLen = max |
| else: |
| self.maxLen = _MAX_INT |
| |
| if exact > 0: |
| self.maxLen = exact |
| self.minLen = exact |
| |
| self.errmsg = "Expected " + self.name |
| self.mayReturnEmpty = self.minLen == 0 |
| self.mayIndexError = False |
| |
| def _generateDefaultName(self): |
| not_chars_str = _collapse_string_to_ranges(self.notChars) |
| if len(not_chars_str) > 16: |
| return "!W:({}...)".format(self.notChars[: 16 - 3]) |
| else: |
| return "!W:({})".format(self.notChars) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| notchars = self.notCharsSet |
| if instring[loc] in notchars: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| start = loc |
| loc += 1 |
| maxlen = min(start + self.maxLen, len(instring)) |
| while loc < maxlen and instring[loc] not in notchars: |
| loc += 1 |
| |
| if loc - start < self.minLen: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| return loc, instring[start:loc] |
| |
| |
| class White(Token): |
| """Special matching class for matching whitespace. Normally, |
| whitespace is ignored by pyparsing grammars. This class is included |
| when some whitespace structures are significant. Define with |
| a string containing the whitespace characters to be matched; default |
| is ``" \\t\\r\\n"``. Also takes optional ``min``, |
| ``max``, and ``exact`` arguments, as defined for the |
| :class:`Word` class. |
| """ |
| |
| whiteStrs = { |
| " ": "<SP>", |
| "\t": "<TAB>", |
| "\n": "<LF>", |
| "\r": "<CR>", |
| "\f": "<FF>", |
| "\u00A0": "<NBSP>", |
| "\u1680": "<OGHAM_SPACE_MARK>", |
| "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", |
| "\u2000": "<EN_QUAD>", |
| "\u2001": "<EM_QUAD>", |
| "\u2002": "<EN_SPACE>", |
| "\u2003": "<EM_SPACE>", |
| "\u2004": "<THREE-PER-EM_SPACE>", |
| "\u2005": "<FOUR-PER-EM_SPACE>", |
| "\u2006": "<SIX-PER-EM_SPACE>", |
| "\u2007": "<FIGURE_SPACE>", |
| "\u2008": "<PUNCTUATION_SPACE>", |
| "\u2009": "<THIN_SPACE>", |
| "\u200A": "<HAIR_SPACE>", |
| "\u200B": "<ZERO_WIDTH_SPACE>", |
| "\u202F": "<NNBSP>", |
| "\u205F": "<MMSP>", |
| "\u3000": "<IDEOGRAPHIC_SPACE>", |
| } |
| |
| def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): |
| super().__init__() |
| self.matchWhite = ws |
| self.set_whitespace_chars( |
| "".join(c for c in self.whiteStrs if c not in self.matchWhite), |
| copy_defaults=True, |
| ) |
| # self.leave_whitespace() |
| self.mayReturnEmpty = True |
| self.errmsg = "Expected " + self.name |
| |
| self.minLen = min |
| |
| if max > 0: |
| self.maxLen = max |
| else: |
| self.maxLen = _MAX_INT |
| |
| if exact > 0: |
| self.maxLen = exact |
| self.minLen = exact |
| |
| def _generateDefaultName(self): |
| return "".join(White.whiteStrs[c] for c in self.matchWhite) |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if instring[loc] not in self.matchWhite: |
| raise ParseException(instring, loc, self.errmsg, self) |
| start = loc |
| loc += 1 |
| maxloc = start + self.maxLen |
| maxloc = min(maxloc, len(instring)) |
| while loc < maxloc and instring[loc] in self.matchWhite: |
| loc += 1 |
| |
| if loc - start < self.minLen: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| return loc, instring[start:loc] |
| |
| |
| class PositionToken(Token): |
| def __init__(self): |
| super().__init__() |
| self.mayReturnEmpty = True |
| self.mayIndexError = False |
| |
| |
| class GoToColumn(PositionToken): |
| """Token to advance to a specific column of input text; useful for |
| tabular report scraping. |
| """ |
| |
| def __init__(self, colno: int): |
| super().__init__() |
| self.col = colno |
| |
| def preParse(self, instring, loc): |
| if col(loc, instring) != self.col: |
| instrlen = len(instring) |
| if self.ignoreExprs: |
| loc = self._skipIgnorables(instring, loc) |
| while ( |
| loc < instrlen |
| and instring[loc].isspace() |
| and col(loc, instring) != self.col |
| ): |
| loc += 1 |
| return loc |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| thiscol = col(loc, instring) |
| if thiscol > self.col: |
| raise ParseException(instring, loc, "Text not in expected column", self) |
| newloc = loc + self.col - thiscol |
| ret = instring[loc:newloc] |
| return newloc, ret |
| |
| |
| class LineStart(PositionToken): |
| r"""Matches if current position is at the beginning of a line within |
| the parse string |
| |
| Example:: |
| |
| test = '''\ |
| AAA this line |
| AAA and this line |
| AAA but not this one |
| B AAA and definitely not this one |
| ''' |
| |
| for t in (LineStart() + 'AAA' + restOfLine).search_string(test): |
| print(t) |
| |
| prints:: |
| |
| ['AAA', ' this line'] |
| ['AAA', ' and this line'] |
| |
| """ |
| |
| def __init__(self): |
| super().__init__() |
| self.leave_whitespace() |
| self.orig_whiteChars = set() | self.whiteChars |
| self.whiteChars.discard("\n") |
| self.skipper = Empty().set_whitespace_chars(self.whiteChars) |
| self.errmsg = "Expected start of line" |
| |
| def preParse(self, instring, loc): |
| if loc == 0: |
| return loc |
| else: |
| ret = self.skipper.preParse(instring, loc) |
| if "\n" in self.orig_whiteChars: |
| while instring[ret : ret + 1] == "\n": |
| ret = self.skipper.preParse(instring, ret + 1) |
| return ret |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if col(loc, instring) == 1: |
| return loc, [] |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class LineEnd(PositionToken): |
| """Matches if current position is at the end of a line within the |
| parse string |
| """ |
| |
| def __init__(self): |
| super().__init__() |
| self.whiteChars.discard("\n") |
| self.set_whitespace_chars(self.whiteChars, copy_defaults=False) |
| self.errmsg = "Expected end of line" |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if loc < len(instring): |
| if instring[loc] == "\n": |
| return loc + 1, "\n" |
| else: |
| raise ParseException(instring, loc, self.errmsg, self) |
| elif loc == len(instring): |
| return loc + 1, [] |
| else: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class StringStart(PositionToken): |
| """Matches if current position is at the beginning of the parse |
| string |
| """ |
| |
| def __init__(self): |
| super().__init__() |
| self.errmsg = "Expected start of text" |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if loc != 0: |
| # see if entire string up to here is just whitespace and ignoreables |
| if loc != self.preParse(instring, 0): |
| raise ParseException(instring, loc, self.errmsg, self) |
| return loc, [] |
| |
| |
| class StringEnd(PositionToken): |
| """ |
| Matches if current position is at the end of the parse string |
| """ |
| |
| def __init__(self): |
| super().__init__() |
| self.errmsg = "Expected end of text" |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if loc < len(instring): |
| raise ParseException(instring, loc, self.errmsg, self) |
| elif loc == len(instring): |
| return loc + 1, [] |
| elif loc > len(instring): |
| return loc, [] |
| else: |
| raise ParseException(instring, loc, self.errmsg, self) |
| |
| |
| class WordStart(PositionToken): |
| """Matches if the current position is at the beginning of a |
| :class:`Word`, and is not preceded by any character in a given |
| set of ``word_chars`` (default= ``printables``). To emulate the |
| ``\b`` behavior of regular expressions, use |
| ``WordStart(alphanums)``. ``WordStart`` will also match at |
| the beginning of the string being parsed, or at the beginning of |
| a line. |
| """ |
| |
| def __init__(self, word_chars: str = printables, *, wordChars: str = printables): |
| wordChars = word_chars if wordChars == printables else wordChars |
| super().__init__() |
| self.wordChars = set(wordChars) |
| self.errmsg = "Not at the start of a word" |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| if loc != 0: |
| if ( |
| instring[loc - 1] in self.wordChars |
| or instring[loc] not in self.wordChars |
| ): |
| raise ParseException(instring, loc, self.errmsg, self) |
| return loc, [] |
| |
| |
| class WordEnd(PositionToken): |
| """Matches if the current position is at the end of a :class:`Word`, |
| and is not followed by any character in a given set of ``word_chars`` |
| (default= ``printables``). To emulate the ``\b`` behavior of |
| regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` |
| will also match at the end of the string being parsed, or at the end |
| of a line. |
| """ |
| |
| def __init__(self, word_chars: str = printables, *, wordChars: str = printables): |
| wordChars = word_chars if wordChars == printables else wordChars |
| super().__init__() |
| self.wordChars = set(wordChars) |
| self.skipWhitespace = False |
| self.errmsg = "Not at the end of a word" |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| instrlen = len(instring) |
| if instrlen > 0 and loc < instrlen: |
| if ( |
| instring[loc] in self.wordChars |
| or instring[loc - 1] not in self.wordChars |
| ): |
| raise ParseException(instring, loc, self.errmsg, self) |
| return loc, [] |
| |
| |
| class ParseExpression(ParserElement): |
| """Abstract subclass of ParserElement, for combining and |
| post-processing parsed tokens. |
| """ |
| |
| def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): |
| super().__init__(savelist) |
| self.exprs: List[ParserElement] |
| if isinstance(exprs, _generatorType): |
| exprs = list(exprs) |
| |
| if isinstance(exprs, str_type): |
| self.exprs = [self._literalStringClass(exprs)] |
| elif isinstance(exprs, ParserElement): |
| self.exprs = [exprs] |
| elif isinstance(exprs, Iterable): |
| exprs = list(exprs) |
| # if sequence of strings provided, wrap with Literal |
| if any(isinstance(expr, str_type) for expr in exprs): |
| exprs = ( |
| self._literalStringClass(e) if isinstance(e, str_type) else e |
| for e in exprs |
| ) |
| self.exprs = list(exprs) |
| else: |
| try: |
| self.exprs = list(exprs) |
| except TypeError: |
| self.exprs = [exprs] |
| self.callPreparse = False |
| |
| def recurse(self) -> Sequence[ParserElement]: |
| return self.exprs[:] |
| |
| def append(self, other) -> ParserElement: |
| self.exprs.append(other) |
| self._defaultName = None |
| return self |
| |
| def leave_whitespace(self, recursive: bool = True) -> ParserElement: |
| """ |
| Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on |
| all contained expressions. |
| """ |
| super().leave_whitespace(recursive) |
| |
| if recursive: |
| self.exprs = [e.copy() for e in self.exprs] |
| for e in self.exprs: |
| e.leave_whitespace(recursive) |
| return self |
| |
| def ignore_whitespace(self, recursive: bool = True) -> ParserElement: |
| """ |
| Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on |
| all contained expressions. |
| """ |
| super().ignore_whitespace(recursive) |
| if recursive: |
| self.exprs = [e.copy() for e in self.exprs] |
| for e in self.exprs: |
| e.ignore_whitespace(recursive) |
| return self |
| |
| def ignore(self, other) -> ParserElement: |
| if isinstance(other, Suppress): |
| if other not in self.ignoreExprs: |
| super().ignore(other) |
| for e in self.exprs: |
| e.ignore(self.ignoreExprs[-1]) |
| else: |
| super().ignore(other) |
| for e in self.exprs: |
| e.ignore(self.ignoreExprs[-1]) |
| return self |
| |
| def _generateDefaultName(self): |
| return "{}:({})".format(self.__class__.__name__, str(self.exprs)) |
| |
| def streamline(self) -> ParserElement: |
| if self.streamlined: |
| return self |
| |
| super().streamline() |
| |
| for e in self.exprs: |
| e.streamline() |
| |
| # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` |
| # but only if there are no parse actions or resultsNames on the nested And's |
| # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) |
| if len(self.exprs) == 2: |
| other = self.exprs[0] |
| if ( |
| isinstance(other, self.__class__) |
| and not other.parseAction |
| and other.resultsName is None |
| and not other.debug |
| ): |
| self.exprs = other.exprs[:] + [self.exprs[1]] |
| self._defaultName = None |
| self.mayReturnEmpty |= other.mayReturnEmpty |
| self.mayIndexError |= other.mayIndexError |
| |
| other = self.exprs[-1] |
| if ( |
| isinstance(other, self.__class__) |
| and not other.parseAction |
| and other.resultsName is None |
| and not other.debug |
| ): |
| self.exprs = self.exprs[:-1] + other.exprs[:] |
| self._defaultName = None |
| self.mayReturnEmpty |= other.mayReturnEmpty |
| self.mayIndexError |= other.mayIndexError |
| |
| self.errmsg = "Expected " + str(self) |
| |
| return self |
| |
| def validate(self, validateTrace=None) -> None: |
| tmp = (validateTrace if validateTrace is not None else [])[:] + [self] |
| for e in self.exprs: |
| e.validate(tmp) |
| self._checkRecursion([]) |
| |
| def copy(self) -> ParserElement: |
| ret = super().copy() |
| ret.exprs = [e.copy() for e in self.exprs] |
| return ret |
| |
| def _setResultsName(self, name, listAllMatches=False): |
| if ( |
| __diag__.warn_ungrouped_named_tokens_in_collection |
| and Diagnostics.warn_ungrouped_named_tokens_in_collection |
| not in self.suppress_warnings_ |
| ): |
| for e in self.exprs: |
| if ( |
| isinstance(e, ParserElement) |
| and e.resultsName |
| and Diagnostics.warn_ungrouped_named_tokens_in_collection |
| not in e.suppress_warnings_ |
| ): |
| warnings.warn( |
| "{}: setting results name {!r} on {} expression " |
| "collides with {!r} on contained expression".format( |
| "warn_ungrouped_named_tokens_in_collection", |
| name, |
| type(self).__name__, |
| e.resultsName, |
| ), |
| stacklevel=3, |
| ) |
| |
| return super()._setResultsName(name, listAllMatches) |
| |
| ignoreWhitespace = ignore_whitespace |
| leaveWhitespace = leave_whitespace |
| |
| |
| class And(ParseExpression): |
| """ |
| Requires all given :class:`ParseExpression` s to be found in the given order. |
| Expressions may be separated by whitespace. |
| May be constructed using the ``'+'`` operator. |
| May also be constructed using the ``'-'`` operator, which will |
| suppress backtracking. |
| |
| Example:: |
| |
| integer = Word(nums) |
| name_expr = Word(alphas)[1, ...] |
| |
| expr = And([integer("id"), name_expr("name"), integer("age")]) |
| # more easily written as: |
| expr = integer("id") + name_expr("name") + integer("age") |
| """ |
| |
| class _ErrorStop(Empty): |
| def __init__(self, *args, **kwargs): |
| super().__init__(*args, **kwargs) |
| self.leave_whitespace() |
| |
| def _generateDefaultName(self): |
| return "-" |
| |
| def __init__( |
| self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True |
| ): |
| exprs: List[ParserElement] = list(exprs_arg) |
| if exprs and Ellipsis in exprs: |
| tmp = [] |
| for i, expr in enumerate(exprs): |
| if expr is Ellipsis: |
| if i < len(exprs) - 1: |
| skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1] |
| tmp.append(SkipTo(skipto_arg)("_skipped*")) |
| else: |
| raise Exception( |
| "cannot construct And with sequence ending in ..." |
| ) |
| else: |
| tmp.append(expr) |
| exprs[:] = tmp |
| super().__init__(exprs, savelist) |
| if self.exprs: |
| self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) |
| if not isinstance(self.exprs[0], White): |
| self.set_whitespace_chars( |
| self.exprs[0].whiteChars, |
| copy_defaults=self.exprs[0].copyDefaultWhiteChars, |
| ) |
| self.skipWhitespace = self.exprs[0].skipWhitespace |
| else: |
| self.skipWhitespace = False |
| else: |
| self.mayReturnEmpty = True |
| self.callPreparse = True |
| |
| def streamline(self) -> ParserElement: |
| # collapse any _PendingSkip's |
| if self.exprs: |
| if any( |
| isinstance(e, ParseExpression) |
| and e.exprs |
| and isinstance(e.exprs[-1], _PendingSkip) |
| for e in self.exprs[:-1] |
| ): |
| for i, e in enumerate(self.exprs[:-1]): |
| if e is None: |
| continue |
| if ( |
| isinstance(e, ParseExpression) |
| and e.exprs |
| and isinstance(e.exprs[-1], _PendingSkip) |
| ): |
| e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] |
| self.exprs[i + 1] = None |
| self.exprs = [e for e in self.exprs if e is not None] |
| |
| super().streamline() |
| |
| # link any IndentedBlocks to the prior expression |
| for prev, cur in zip(self.exprs, self.exprs[1:]): |
| # traverse cur or any first embedded expr of cur looking for an IndentedBlock |
| # (but watch out for recursive grammar) |
| seen = set() |
| while cur: |
| if id(cur) in seen: |
| break |
| seen.add(id(cur)) |
| if isinstance(cur, IndentedBlock): |
| prev.add_parse_action( |
| lambda s, l, t, cur_=cur: setattr( |
| cur_, "parent_anchor", col(l, s) |
| ) |
| ) |
| break |
| subs = cur.recurse() |
| cur = next(iter(subs), None) |
| |
| self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) |
| return self |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| # pass False as callPreParse arg to _parse for first element, since we already |
| # pre-parsed the string as part of our And pre-parsing |
| loc, resultlist = self.exprs[0]._parse( |
| instring, loc, doActions, callPreParse=False |
| ) |
| errorStop = False |
| for e in self.exprs[1:]: |
| # if isinstance(e, And._ErrorStop): |
| if type(e) is And._ErrorStop: |
| errorStop = True |
| continue |
| if errorStop: |
| try: |
| loc, exprtokens = e._parse(instring, loc, doActions) |
| except ParseSyntaxException: |
| raise |
| except ParseBaseException as pe: |
| pe.__traceback__ = None |
| raise ParseSyntaxException._from_exception(pe) |
| except IndexError: |
| raise ParseSyntaxException( |
| instring, len(instring), self.errmsg, self |
| ) |
| else: |
| loc, exprtokens = e._parse(instring, loc, doActions) |
| if exprtokens or exprtokens.haskeys(): |
| resultlist += exprtokens |
| return loc, resultlist |
| |
| def __iadd__(self, other): |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| return self.append(other) # And([self, other]) |
| |
| def _checkRecursion(self, parseElementList): |
| subRecCheckList = parseElementList[:] + [self] |
| for e in self.exprs: |
| e._checkRecursion(subRecCheckList) |
| if not e.mayReturnEmpty: |
| break |
| |
| def _generateDefaultName(self): |
| inner = " ".join(str(e) for e in self.exprs) |
| # strip off redundant inner {}'s |
| while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": |
| inner = inner[1:-1] |
| return "{" + inner + "}" |
| |
| |
| class Or(ParseExpression): |
| """Requires that at least one :class:`ParseExpression` is found. If |
| two expressions match, the expression that matches the longest |
| string will be used. May be constructed using the ``'^'`` |
| operator. |
| |
| Example:: |
| |
| # construct Or using '^' operator |
| |
| number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) |
| print(number.search_string("123 3.1416 789")) |
| |
| prints:: |
| |
| [['123'], ['3.1416'], ['789']] |
| """ |
| |
| def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): |
| super().__init__(exprs, savelist) |
| if self.exprs: |
| self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) |
| self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) |
| else: |
| self.mayReturnEmpty = True |
| |
| def streamline(self) -> ParserElement: |
| super().streamline() |
| if self.exprs: |
| self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) |
| self.saveAsList = any(e.saveAsList for e in self.exprs) |
| self.skipWhitespace = all( |
| e.skipWhitespace and not isinstance(e, White) for e in self.exprs |
| ) |
| else: |
| self.saveAsList = False |
| return self |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| maxExcLoc = -1 |
| maxException = None |
| matches = [] |
| fatals = [] |
| if all(e.callPreparse for e in self.exprs): |
| loc = self.preParse(instring, loc) |
| for e in self.exprs: |
| try: |
| loc2 = e.try_parse(instring, loc, raise_fatal=True) |
| except ParseFatalException as pfe: |
| pfe.__traceback__ = None |
| pfe.parserElement = e |
| fatals.append(pfe) |
| maxException = None |
| maxExcLoc = -1 |
| except ParseException as err: |
| if not fatals: |
| err.__traceback__ = None |
| if err.loc > maxExcLoc: |
| maxException = err |
| maxExcLoc = err.loc |
| except IndexError: |
| if len(instring) > maxExcLoc: |
| maxException = ParseException( |
| instring, len(instring), e.errmsg, self |
| ) |
| maxExcLoc = len(instring) |
| else: |
| # save match among all matches, to retry longest to shortest |
| matches.append((loc2, e)) |
| |
| if matches: |
| # re-evaluate all matches in descending order of length of match, in case attached actions |
| # might change whether or how much they match of the input. |
| matches.sort(key=itemgetter(0), reverse=True) |
| |
| if not doActions: |
| # no further conditions or parse actions to change the selection of |
| # alternative, so the first match will be the best match |
| best_expr = matches[0][1] |
| return best_expr._parse(instring, loc, doActions) |
| |
| longest = -1, None |
| for loc1, expr1 in matches: |
| if loc1 <= longest[0]: |
| # already have a longer match than this one will deliver, we are done |
| return longest |
| |
| try: |
| loc2, toks = expr1._parse(instring, loc, doActions) |
| except ParseException as err: |
| err.__traceback__ = None |
| if err.loc > maxExcLoc: |
| maxException = err |
| maxExcLoc = err.loc |
| else: |
| if loc2 >= loc1: |
| return loc2, toks |
| # didn't match as much as before |
| elif loc2 > longest[0]: |
| longest = loc2, toks |
| |
| if longest != (-1, None): |
| return longest |
| |
| if fatals: |
| if len(fatals) > 1: |
| fatals.sort(key=lambda e: -e.loc) |
| if fatals[0].loc == fatals[1].loc: |
| fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) |
| max_fatal = fatals[0] |
| raise max_fatal |
| |
| if maxException is not None: |
| maxException.msg = self.errmsg |
| raise maxException |
| else: |
| raise ParseException( |
| instring, loc, "no defined alternatives to match", self |
| ) |
| |
| def __ixor__(self, other): |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| return self.append(other) # Or([self, other]) |
| |
| def _generateDefaultName(self): |
| return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" |
| |
| def _setResultsName(self, name, listAllMatches=False): |
| if ( |
| __diag__.warn_multiple_tokens_in_named_alternation |
| and Diagnostics.warn_multiple_tokens_in_named_alternation |
| not in self.suppress_warnings_ |
| ): |
| if any( |
| isinstance(e, And) |
| and Diagnostics.warn_multiple_tokens_in_named_alternation |
| not in e.suppress_warnings_ |
| for e in self.exprs |
| ): |
| warnings.warn( |
| "{}: setting results name {!r} on {} expression " |
| "will return a list of all parsed tokens in an And alternative, " |
| "in prior versions only the first token was returned; enclose " |
| "contained argument in Group".format( |
| "warn_multiple_tokens_in_named_alternation", |
| name, |
| type(self).__name__, |
| ), |
| stacklevel=3, |
| ) |
| |
| return super()._setResultsName(name, listAllMatches) |
| |
| |
| class MatchFirst(ParseExpression): |
| """Requires that at least one :class:`ParseExpression` is found. If |
| more than one expression matches, the first one listed is the one that will |
| match. May be constructed using the ``'|'`` operator. |
| |
| Example:: |
| |
| # construct MatchFirst using '|' operator |
| |
| # watch the order of expressions to match |
| number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) |
| print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] |
| |
| # put more selective expression first |
| number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) |
| print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] |
| """ |
| |
| def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): |
| super().__init__(exprs, savelist) |
| if self.exprs: |
| self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) |
| self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) |
| else: |
| self.mayReturnEmpty = True |
| |
| def streamline(self) -> ParserElement: |
| if self.streamlined: |
| return self |
| |
| super().streamline() |
| if self.exprs: |
| self.saveAsList = any(e.saveAsList for e in self.exprs) |
| self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) |
| self.skipWhitespace = all( |
| e.skipWhitespace and not isinstance(e, White) for e in self.exprs |
| ) |
| else: |
| self.saveAsList = False |
| self.mayReturnEmpty = True |
| return self |
| |
| def parseImpl(self, instring, loc, doActions=True): |
| maxExcLoc = -1 |
| maxException = None |
| |
| for e in self.exprs: |
| try: |
| return e._parse( |
| instring, |
| loc, |
| doActions, |
| ) |
| except ParseFatalException as pfe: |
| pfe.__traceback__ = None |
| pfe.parserElement = e |
| raise |
| except ParseException as err: |
| if err.loc > maxExcLoc: |
| maxException = err |
| maxExcLoc = err.loc |
| except IndexError: |
| if len(instring) > maxExcLoc: |
| maxException = ParseException( |
| instring, len(instring), e.errmsg, self |
| ) |
| maxExcLoc = len(instring) |
| |
| if maxException is not None: |
| maxException.msg = self.errmsg |
| raise maxException |
| else: |
| raise ParseException( |
| instring, loc, "no defined alternatives to match", self |
| ) |
| |
| def __ior__(self, other): |
| if isinstance(other, str_type): |
| other = self._literalStringClass(other) |
| return self.append(other) # MatchFirst([self, other]) |
| |
| def _generateDefaultName(self): |
| return "{" + " | ".join(str(e) for e in self.exprs) + "}" |
| |
| def _setResultsName(self, name, listAllMatches=False): |
| if ( |
| __diag__.warn_multiple_tokens_in_named_alternation |
| and Diagnostics.warn_multiple_tokens_in_named_alternation |
| not in self.suppress_warnings_ |
| ): |
| if any( |
| isinstance(e, And) |
| and Diagnostics.warn_multiple_tokens_in_named_alternation |
| not in e.suppress_warnings_ |
| for e in self.exprs |
| ): |
| warnings.warn( |
| "{}: setting results name {!r} on {} expression " |
| "will return a list of all parsed tokens in an And alternative, " |
| "in prior versions only the first token was returned; enclose " |
| "contained argument in Group".format( |
| "warn_multiple_tokens_in_named_alternation", |
| name, |
| type(self).__name__, |
| ), |
| stacklevel=3, |
| ) |
| |
| return super()._setResultsName(name, listAllMatches) |
| |
| |
| class Each(ParseExpression): |
| """Requires all given :class:`ParseExpression` s to be found, but in |
| any order. Expressions may be separated by whitespace. |
| |
| May be constructed using the ``'&'`` operator. |
| |
| Example:: |
| |
| color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") |
| shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") |
| integer = Word(nums) |
| shape_attr = "shape:" + shape_type("shape") |
| posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") |
| color_attr = "color:" + color("color") |
| size_attr = "size:" + integer("size") |
| |
| # use Each (using operator '&') to accept attributes in any order |
| # (shape and posn are required, color and size are optional) |
| shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) |
| |
| shape_spec.run_tests(''' |
| shape: SQUARE color: BLACK posn: 100, 120 |
| shape: CIRCLE size: 50 color: BLUE posn: 50,80 |
| color:GREEN size:20 shape:TRIANGLE posn:20,40 |
| ''' |
| ) |
| |
| prints:: |
| |
| shape: SQUARE color: BLACK posn: 100, 120 |
| ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] |
| - color: BLACK |
| - posn: ['100', ',', '120'] |
| - x: 100 |
| - y: 120 |
| - shape: SQUARE |
| |
| |
| shape: CIRCLE size: 50 color: BLUE posn: 50,80 |
|