openlane/dependencies/includedyaml/scanner.py - third_party/shuttle/sky130/mpw-006/slot-033 - Git at Google

 # Scanner produces tokens of the following types:
 # STREAM-START
 # STREAM-END
 # DIRECTIVE(name, value)
 # DOCUMENT-START
 # DOCUMENT-END
 # BLOCK-SEQUENCE-START
 # BLOCK-MAPPING-START
 # BLOCK-END
 # FLOW-SEQUENCE-START
 # FLOW-MAPPING-START
 # FLOW-SEQUENCE-END
 # FLOW-MAPPING-END
 # BLOCK-ENTRY
 # FLOW-ENTRY
 # KEY
 # VALUE
 # ALIAS(value)
 # ANCHOR(value)
 # TAG(value)
 # SCALAR(value, plain, style)
 #
 # Read comments in the Scanner code for more details.
 #

 __all__ = ["Scanner", "ScannerError"]

 from .error import MarkedYAMLError
 from .tokens import *


 class ScannerError(MarkedYAMLError):
     pass


 class SimpleKey:
     # See below simple keys treatment.

     def __init__(self, token_number, required, index, line, column, mark):
         self.token_number = token_number
         self.required = required
         self.index = index
         self.line = line
         self.column = column
         self.mark = mark


 class Scanner:
     def __init__(self):
         """Initialize the scanner."""
         # It is assumed that Scanner and Reader will have a common descendant.
         # Reader do the dirty work of checking for BOM and converting the
         # input data to Unicode. It also adds NUL to the end.
         #
         # Reader supports the following methods
         #   self.peek(i=0)       # peek the next i-th character
         #   self.prefix(l=1)     # peek the next l characters
         #   self.forward(l=1)    # read the next l characters and move the pointer.

         # Had we reached the end of the stream?
         self.done = False

         # The number of unclosed '{' and '['. `flow_level == 0` means block
         # context.
         self.flow_level = 0

         # List of processed tokens that are not yet emitted.
         self.tokens = []

         # Add the STREAM-START token.
         self.fetch_stream_start()

         # Number of tokens that were emitted through the `get_token` method.
         self.tokens_taken = 0

         # The current indentation level.
         self.indent = -1

         # Past indentation levels.
         self.indents = []

         # Variables related to simple keys treatment.

         # A simple key is a key that is not denoted by the '?' indicator.
         # Example of simple keys:
         #   ---
         #   block simple key: value
         #   ? not a simple key:
         #   : { flow simple key: value }
         # We emit the KEY token before all keys, so when we find a potential
         # simple key, we try to locate the corresponding ':' indicator.
         # Simple keys should be limited to a single line and 1024 characters.

         # Can a simple key start at the current position? A simple key may
         # start:
         # - at the beginning of the line, not counting indentation spaces
         #       (in block context),
         # - after '{', '[', ',' (in the flow context),
         # - after '?', ':', '-' (in the block context).
         # In the block context, this flag also signifies if a block collection
         # may start at the current position.
         self.allow_simple_key = True

         # Keep track of possible simple keys. This is a dictionary. The key
         # is `flow_level`; there can be no more that one possible simple key
         # for each level. The value is a SimpleKey record:
         #   (token_number, required, index, line, column, mark)
         # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
         # '[', or '{' tokens.
         self.possible_simple_keys = {}

     # Public methods.

     def check_token(self, *choices):
         # Check if the next token is one of the given types.
         while self.need_more_tokens():
             self.fetch_more_tokens()
         if self.tokens:
             if not choices:
                 return True
             for choice in choices:
                 if isinstance(self.tokens[0], choice):
                     return True
         return False

     def peek_token(self):
         # Return the next token, but do not delete if from the queue.
         # Return None if no more tokens.
         while self.need_more_tokens():
             self.fetch_more_tokens()
         if self.tokens:
             return self.tokens[0]
         else:
             return None

     def get_token(self):
         # Return the next token.
         while self.need_more_tokens():
             self.fetch_more_tokens()
         if self.tokens:
             self.tokens_taken += 1
             return self.tokens.pop(0)

     # Private methods.

     def need_more_tokens(self):
         if self.done:
             return False
         if not self.tokens:
             return True
         # The current token may be a potential simple key, so we
         # need to look further.
         self.stale_possible_simple_keys()
         if self.next_possible_simple_key() == self.tokens_taken:
             return True

     def fetch_more_tokens(self):

         # Eat whitespaces and comments until we reach the next token.
         self.scan_to_next_token()

         # Remove obsolete possible simple keys.
         self.stale_possible_simple_keys()

         # Compare the current indentation and column. It may add some tokens
         # and decrease the current indentation level.
         self.unwind_indent(self.column)

         # Peek the next character.
         ch = self.peek()

         # Is it the end of stream?
         if ch == "\0":
             return self.fetch_stream_end()

         # Is it a directive?
         if ch == "%" and self.check_directive():
             return self.fetch_directive()

         # Is it the document start?
         if ch == "-" and self.check_document_start():
             return self.fetch_document_start()

         # Is it the document end?
         if ch == "." and self.check_document_end():
             return self.fetch_document_end()

         # TODO: support for BOM within a stream.
         # if ch == '\uFEFF':
         #    return self.fetch_bom()    <-- issue BOMToken

         # Note: the order of the following checks is NOT significant.

         # Is it the flow sequence start indicator?
         if ch == "[":
             return self.fetch_flow_sequence_start()

         # Is it the flow mapping start indicator?
         if ch == "{":
             return self.fetch_flow_mapping_start()

         # Is it the flow sequence end indicator?
         if ch == "]":
             return self.fetch_flow_sequence_end()

         # Is it the flow mapping end indicator?
         if ch == "}":
             return self.fetch_flow_mapping_end()

         # Is it the flow entry indicator?
         if ch == ",":
             return self.fetch_flow_entry()

         # Is it the block entry indicator?
         if ch == "-" and self.check_block_entry():
             return self.fetch_block_entry()

         # Is it the key indicator?
         if ch == "?" and self.check_key():
             return self.fetch_key()

         # Is it the value indicator?
         if ch == ":" and self.check_value():
             return self.fetch_value()

         # Is it an alias?
         if ch == "*":
             return self.fetch_alias()

         # Is it an anchor?
         if ch == "&":
             return self.fetch_anchor()

         # Is it a tag?
         if ch == "!":
             return self.fetch_tag()

         # Is it a literal scalar?
         if ch == "|" and not self.flow_level:
             return self.fetch_literal()

         # Is it a folded scalar?
         if ch == ">" and not self.flow_level:
             return self.fetch_folded()

         # Is it a single quoted scalar?
         if ch == "'":
             return self.fetch_single()

         # Is it a double quoted scalar?
         if ch == '"':
             return self.fetch_double()

         # It must be a plain scalar then.
         if self.check_plain():
             return self.fetch_plain()

         # No? It's an error. Let's produce a nice error message.
         raise ScannerError(
             "while scanning for the next token",
             None,
             "found character %r that cannot start any token" % ch,
             self.get_mark(),
         )

     # Simple keys treatment.

     def next_possible_simple_key(self):
         # Return the number of the nearest possible simple key. Actually we
         # don't need to loop through the whole dictionary. We may replace it
         # with the following code:
         #   if not self.possible_simple_keys:
         #       return None
         #   return self.possible_simple_keys[
         #           min(self.possible_simple_keys.keys())].token_number
         min_token_number = None
         for level in self.possible_simple_keys:
             key = self.possible_simple_keys[level]
             if min_token_number is None or key.token_number < min_token_number:
                 min_token_number = key.token_number
         return min_token_number

     def stale_possible_simple_keys(self):
         # Remove entries that are no longer possible simple keys. According to
         # the YAML specification, simple keys
         # - should be limited to a single line,
         # - should be no longer than 1024 characters.
         # Disabling this procedure will allow simple keys of any length and
         # height (may cause problems if indentation is broken though).
         for level in list(self.possible_simple_keys):
             key = self.possible_simple_keys[level]
             if key.line != self.line or self.index - key.index > 1024:
                 if key.required:
                     raise ScannerError(
                         "while scanning a simple key",
                         key.mark,
                         "could not find expected ':'",
                         self.get_mark(),
                     )
                 del self.possible_simple_keys[level]

     def save_possible_simple_key(self):
         # The next token may start a simple key. We check if it's possible
         # and save its position. This function is called for
         #   ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.

         # Check if a simple key is required at the current position.
         required = not self.flow_level and self.indent == self.column

         # The next token might be a simple key. Let's save it's number and
         # position.
         if self.allow_simple_key:
             self.remove_possible_simple_key()
             token_number = self.tokens_taken + len(self.tokens)
             key = SimpleKey(
                 token_number,
                 required,
                 self.index,
                 self.line,
                 self.column,
                 self.get_mark(),
             )
             self.possible_simple_keys[self.flow_level] = key

     def remove_possible_simple_key(self):
         # Remove the saved possible key position at the current flow level.
         if self.flow_level in self.possible_simple_keys:
             key = self.possible_simple_keys[self.flow_level]

             if key.required:
                 raise ScannerError(
                     "while scanning a simple key",
                     key.mark,
                     "could not find expected ':'",
                     self.get_mark(),
                 )

             del self.possible_simple_keys[self.flow_level]

     # Indentation functions.

     def unwind_indent(self, column):

         ## In flow context, tokens should respect indentation.
         ## Actually the condition should be `self.indent >= column` according to
         ## the spec. But this condition will prohibit intuitively correct
         ## constructions such as
         ## key : {
         ## }
         # if self.flow_level and self.indent > column:
         #    raise ScannerError(None, None,
         #            "invalid indentation or unclosed '[' or '{'",
         #            self.get_mark())

         # In the flow context, indentation is ignored. We make the scanner less
         # restrictive then specification requires.
         if self.flow_level:
             return

         # In block context, we may need to issue the BLOCK-END tokens.
         while self.indent > column:
             mark = self.get_mark()
             self.indent = self.indents.pop()
             self.tokens.append(BlockEndToken(mark, mark))

     def add_indent(self, column):
         # Check if we need to increase indentation.
         if self.indent < column:
             self.indents.append(self.indent)
             self.indent = column
             return True
         return False

     # Fetchers.

     def fetch_stream_start(self):
         # We always add STREAM-START as the first token and STREAM-END as the
         # last token.

         # Read the token.
         mark = self.get_mark()

         # Add STREAM-START.
         self.tokens.append(StreamStartToken(mark, mark, encoding=self.encoding))

     def fetch_stream_end(self):

         # Set the current indentation to -1.
         self.unwind_indent(-1)

         # Reset simple keys.
         self.remove_possible_simple_key()
         self.allow_simple_key = False
         self.possible_simple_keys = {}

         # Read the token.
         mark = self.get_mark()

         # Add STREAM-END.
         self.tokens.append(StreamEndToken(mark, mark))

         # The steam is finished.
         self.done = True

     def fetch_directive(self):

         # Set the current indentation to -1.
         self.unwind_indent(-1)

         # Reset simple keys.
         self.remove_possible_simple_key()
         self.allow_simple_key = False

         # Scan and add DIRECTIVE.
         self.tokens.append(self.scan_directive())

     def fetch_document_start(self):
         self.fetch_document_indicator(DocumentStartToken)

     def fetch_document_end(self):
         self.fetch_document_indicator(DocumentEndToken)

     def fetch_document_indicator(self, TokenClass):

         # Set the current indentation to -1.
         self.unwind_indent(-1)

         # Reset simple keys. Note that there could not be a block collection
         # after '---'.
         self.remove_possible_simple_key()
         self.allow_simple_key = False

         # Add DOCUMENT-START or DOCUMENT-END.
         start_mark = self.get_mark()
         self.forward(3)
         end_mark = self.get_mark()
         self.tokens.append(TokenClass(start_mark, end_mark))

     def fetch_flow_sequence_start(self):
         self.fetch_flow_collection_start(FlowSequenceStartToken)

     def fetch_flow_mapping_start(self):
         self.fetch_flow_collection_start(FlowMappingStartToken)

     def fetch_flow_collection_start(self, TokenClass):

         # '[' and '{' may start a simple key.
         self.save_possible_simple_key()

         # Increase the flow level.
         self.flow_level += 1

         # Simple keys are allowed after '[' and '{'.
         self.allow_simple_key = True

         # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
         start_mark = self.get_mark()
         self.forward()
         end_mark = self.get_mark()
         self.tokens.append(TokenClass(start_mark, end_mark))

     def fetch_flow_sequence_end(self):
         self.fetch_flow_collection_end(FlowSequenceEndToken)

     def fetch_flow_mapping_end(self):
         self.fetch_flow_collection_end(FlowMappingEndToken)

     def fetch_flow_collection_end(self, TokenClass):

         # Reset possible simple key on the current level.
         self.remove_possible_simple_key()

         # Decrease the flow level.
         self.flow_level -= 1

         # No simple keys after ']' or '}'.
         self.allow_simple_key = False

         # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
         start_mark = self.get_mark()
         self.forward()
         end_mark = self.get_mark()
         self.tokens.append(TokenClass(start_mark, end_mark))

     def fetch_flow_entry(self):

         # Simple keys are allowed after ','.
         self.allow_simple_key = True

         # Reset possible simple key on the current level.
         self.remove_possible_simple_key()

         # Add FLOW-ENTRY.
         start_mark = self.get_mark()
         self.forward()
         end_mark = self.get_mark()
         self.tokens.append(FlowEntryToken(start_mark, end_mark))

     def fetch_block_entry(self):

         # Block context needs additional checks.
         if not self.flow_level:

             # Are we allowed to start a new entry?
             if not self.allow_simple_key:
                 raise ScannerError(
                     None, None, "sequence entries are not allowed here", self.get_mark()
                 )

             # We may need to add BLOCK-SEQUENCE-START.
             if self.add_indent(self.column):
                 mark = self.get_mark()
                 self.tokens.append(BlockSequenceStartToken(mark, mark))

         # It's an error for the block entry to occur in the flow context,
         # but we let the parser detect this.
         else:
             pass

         # Simple keys are allowed after '-'.
         self.allow_simple_key = True

         # Reset possible simple key on the current level.
         self.remove_possible_simple_key()

         # Add BLOCK-ENTRY.
         start_mark = self.get_mark()
         self.forward()
         end_mark = self.get_mark()
         self.tokens.append(BlockEntryToken(start_mark, end_mark))

     def fetch_key(self):

         # Block context needs additional checks.
         if not self.flow_level:

             # Are we allowed to start a key (not necessary a simple)?
             if not self.allow_simple_key:
                 raise ScannerError(
                     None, None, "mapping keys are not allowed here", self.get_mark()
                 )

             # We may need to add BLOCK-MAPPING-START.
             if self.add_indent(self.column):
                 mark = self.get_mark()
                 self.tokens.append(BlockMappingStartToken(mark, mark))

         # Simple keys are allowed after '?' in the block context.
         self.allow_simple_key = not self.flow_level

         # Reset possible simple key on the current level.
         self.remove_possible_simple_key()

         # Add KEY.
         start_mark = self.get_mark()
         self.forward()
         end_mark = self.get_mark()
         self.tokens.append(KeyToken(start_mark, end_mark))

     def fetch_value(self):

         # Do we determine a simple key?
         if self.flow_level in self.possible_simple_keys:

             # Add KEY.
             key = self.possible_simple_keys[self.flow_level]
             del self.possible_simple_keys[self.flow_level]
             self.tokens.insert(
                 key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark)
             )

             # If this key starts a new block mapping, we need to add
             # BLOCK-MAPPING-START.
             if not self.flow_level:
                 if self.add_indent(key.column):
                     self.tokens.insert(
                         key.token_number - self.tokens_taken,
                         BlockMappingStartToken(key.mark, key.mark),
                     )

             # There cannot be two simple keys one after another.
             self.allow_simple_key = False

         # It must be a part of a complex key.
         else:

             # Block context needs additional checks.
             # (Do we really need them? They will be caught by the parser
             # anyway.)
             if not self.flow_level:

                 # We are allowed to start a complex value if and only if
                 # we can start a simple key.
                 if not self.allow_simple_key:
                     raise ScannerError(
                         None,
                         None,
                         "mapping values are not allowed here",
                         self.get_mark(),
                     )

             # If this value starts a new block mapping, we need to add
             # BLOCK-MAPPING-START.  It will be detected as an error later by
             # the parser.
             if not self.flow_level:
                 if self.add_indent(self.column):
                     mark = self.get_mark()
                     self.tokens.append(BlockMappingStartToken(mark, mark))

             # Simple keys are allowed after ':' in the block context.
             self.allow_simple_key = not self.flow_level

             # Reset possible simple key on the current level.
             self.remove_possible_simple_key()

         # Add VALUE.
         start_mark = self.get_mark()
         self.forward()
         end_mark = self.get_mark()
         self.tokens.append(ValueToken(start_mark, end_mark))

     def fetch_alias(self):

         # ALIAS could be a simple key.
         self.save_possible_simple_key()

         # No simple keys after ALIAS.
         self.allow_simple_key = False

         # Scan and add ALIAS.
         self.tokens.append(self.scan_anchor(AliasToken))

     def fetch_anchor(self):

         # ANCHOR could start a simple key.
         self.save_possible_simple_key()

         # No simple keys after ANCHOR.
         self.allow_simple_key = False

         # Scan and add ANCHOR.
         self.tokens.append(self.scan_anchor(AnchorToken))

     def fetch_tag(self):

         # TAG could start a simple key.
         self.save_possible_simple_key()

         # No simple keys after TAG.
         self.allow_simple_key = False

         # Scan and add TAG.
         self.tokens.append(self.scan_tag())

     def fetch_literal(self):
         self.fetch_block_scalar(style="|")

     def fetch_folded(self):
         self.fetch_block_scalar(style=">")

     def fetch_block_scalar(self, style):

         # A simple key may follow a block scalar.
         self.allow_simple_key = True

         # Reset possible simple key on the current level.
         self.remove_possible_simple_key()

         # Scan and add SCALAR.
         self.tokens.append(self.scan_block_scalar(style))

     def fetch_single(self):
         self.fetch_flow_scalar(style="'")

     def fetch_double(self):
         self.fetch_flow_scalar(style='"')

     def fetch_flow_scalar(self, style):

         # A flow scalar could be a simple key.
         self.save_possible_simple_key()

         # No simple keys after flow scalars.
         self.allow_simple_key = False

         # Scan and add SCALAR.
         self.tokens.append(self.scan_flow_scalar(style))

     def fetch_plain(self):

         # A plain scalar could be a simple key.
         self.save_possible_simple_key()

         # No simple keys after plain scalars. But note that `scan_plain` will
         # change this flag if the scan is finished at the beginning of the
         # line.
         self.allow_simple_key = False

         # Scan and add SCALAR. May change `allow_simple_key`.
         self.tokens.append(self.scan_plain())

     # Checkers.

     def check_directive(self):

         # DIRECTIVE:        ^ '%' ...
         # The '%' indicator is already checked.
         if self.column == 0:
             return True

     def check_document_start(self):

         # DOCUMENT-START:   ^ '---' (' '|'\n')
         if self.column == 0:
             if self.prefix(3) == "---" and self.peek(3) in "\0 \t\r\n\x85\u2028\u2029":
                 return True

     def check_document_end(self):

         # DOCUMENT-END:     ^ '...' (' '|'\n')
         if self.column == 0:
             if self.prefix(3) == "..." and self.peek(3) in "\0 \t\r\n\x85\u2028\u2029":
                 return True

     def check_block_entry(self):

         # BLOCK-ENTRY:      '-' (' '|'\n')
         return self.peek(1) in "\0 \t\r\n\x85\u2028\u2029"

     def check_key(self):

         # KEY(flow context):    '?'
         if self.flow_level:
             return True

         # KEY(block context):   '?' (' '|'\n')
         else:
             return self.peek(1) in "\0 \t\r\n\x85\u2028\u2029"

     def check_value(self):

         # VALUE(flow context):  ':'
         if self.flow_level:
             return True

         # VALUE(block context): ':' (' '|'\n')
         else:
             return self.peek(1) in "\0 \t\r\n\x85\u2028\u2029"

     def check_plain(self):

         # A plain scalar may start with any non-space character except:
         #   '-', '?', ':', ',', '[', ']', '{', '}',
         #   '#', '&', '*', '!', '|', '>', '\'', '\"',
         #   '%', '@', '`'.
         #
         # It may also start with
         #   '-', '?', ':'
         # if it is followed by a non-space character.
         #
         # Note that we limit the last rule to the block context (except the
         # '-' character) because we want the flow context to be space
         # independent.
         ch = self.peek()
         return ch not in "\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>'\"%@`" or (
             self.peek(1) not in "\0 \t\r\n\x85\u2028\u2029"
             and (ch == "-" or (not self.flow_level and ch in "?:"))
         )

     # Scanners.

     def scan_to_next_token(self):
         # We ignore spaces, line breaks and comments.
         # If we find a line break in the block context, we set the flag
         # `allow_simple_key` on.
         # The byte order mark is stripped if it's the first character in the
         # stream. We do not yet support BOM inside the stream as the
         # specification requires. Any such mark will be considered as a part
         # of the document.
         #
         # TODO: We need to make tab handling rules more sane. A good rule is
         #   Tabs cannot precede tokens
         #   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
         #   KEY(block), VALUE(block), BLOCK-ENTRY
         # So the checking code is
         #   if <TAB>:
         #       self.allow_simple_keys = False
         # We also need to add the check for `allow_simple_keys == True` to
         # `unwind_indent` before issuing BLOCK-END.
         # Scanners for block, flow, and plain scalars need to be modified.

         if self.index == 0 and self.peek() == "\uFEFF":
             self.forward()
         found = False
         while not found:
             while self.peek() == " ":
                 self.forward()
             if self.peek() == "#":
                 while self.peek() not in "\0\r\n\x85\u2028\u2029":
                     self.forward()
             if self.scan_line_break():
                 if not self.flow_level:
                     self.allow_simple_key = True
             else:
                 found = True

     def scan_directive(self):
         # See the specification for details.
         start_mark = self.get_mark()
         self.forward()
         name = self.scan_directive_name(start_mark)
         value = None
         if name == "YAML":
             value = self.scan_yaml_directive_value(start_mark)
             end_mark = self.get_mark()
         elif name == "TAG":
             value = self.scan_tag_directive_value(start_mark)
             end_mark = self.get_mark()
         else:
             end_mark = self.get_mark()
             while self.peek() not in "\0\r\n\x85\u2028\u2029":
                 self.forward()
         self.scan_directive_ignored_line(start_mark)
         return DirectiveToken(name, value, start_mark, end_mark)

     def scan_directive_name(self, start_mark):
         # See the specification for details.
         length = 0
         ch = self.peek(length)
         while "0" <= ch <= "9" or "A" <= ch <= "Z" or "a" <= ch <= "z" or ch in "-_":
             length += 1
             ch = self.peek(length)
         if not length:
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected alphabetic or numeric character, but found %r" % ch,
                 self.get_mark(),
             )
         value = self.prefix(length)
         self.forward(length)
         ch = self.peek()
         if ch not in "\0 \r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected alphabetic or numeric character, but found %r" % ch,
                 self.get_mark(),
             )
         return value

     def scan_yaml_directive_value(self, start_mark):
         # See the specification for details.
         while self.peek() == " ":
             self.forward()
         major = self.scan_yaml_directive_number(start_mark)
         if self.peek() != ".":
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected a digit or '.', but found %r" % self.peek(),
                 self.get_mark(),
             )
         self.forward()
         minor = self.scan_yaml_directive_number(start_mark)
         if self.peek() not in "\0 \r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected a digit or ' ', but found %r" % self.peek(),
                 self.get_mark(),
             )
         return (major, minor)

     def scan_yaml_directive_number(self, start_mark):
         # See the specification for details.
         ch = self.peek()
         if not ("0" <= ch <= "9"):
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected a digit, but found %r" % ch,
                 self.get_mark(),
             )
         length = 0
         while "0" <= self.peek(length) <= "9":
             length += 1
         value = int(self.prefix(length))
         self.forward(length)
         return value

     def scan_tag_directive_value(self, start_mark):
         # See the specification for details.
         while self.peek() == " ":
             self.forward()
         handle = self.scan_tag_directive_handle(start_mark)
         while self.peek() == " ":
             self.forward()
         prefix = self.scan_tag_directive_prefix(start_mark)
         return (handle, prefix)

     def scan_tag_directive_handle(self, start_mark):
         # See the specification for details.
         value = self.scan_tag_handle("directive", start_mark)
         ch = self.peek()
         if ch != " ":
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected ' ', but found %r" % ch,
                 self.get_mark(),
             )
         return value

     def scan_tag_directive_prefix(self, start_mark):
         # See the specification for details.
         value = self.scan_tag_uri("directive", start_mark)
         ch = self.peek()
         if ch not in "\0 \r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected ' ', but found %r" % ch,
                 self.get_mark(),
             )
         return value

     def scan_directive_ignored_line(self, start_mark):
         # See the specification for details.
         while self.peek() == " ":
             self.forward()
         if self.peek() == "#":
             while self.peek() not in "\0\r\n\x85\u2028\u2029":
                 self.forward()
         ch = self.peek()
         if ch not in "\0\r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a directive",
                 start_mark,
                 "expected a comment or a line break, but found %r" % ch,
                 self.get_mark(),
             )
         self.scan_line_break()

     def scan_anchor(self, TokenClass):
         # The specification does not restrict characters for anchors and
         # aliases. This may lead to problems, for instance, the document:
         #   [ *alias, value ]
         # can be interpreted in two ways, as
         #   [ "value" ]
         # and
         #   [ *alias , "value" ]
         # Therefore we restrict aliases to numbers and ASCII letters.
         start_mark = self.get_mark()
         indicator = self.peek()
         if indicator == "*":
             name = "alias"
         else:
             name = "anchor"
         self.forward()
         length = 0
         ch = self.peek(length)
         while "0" <= ch <= "9" or "A" <= ch <= "Z" or "a" <= ch <= "z" or ch in "-_":
             length += 1
             ch = self.peek(length)
         if not length:
             raise ScannerError(
                 "while scanning an %s" % name,
                 start_mark,
                 "expected alphabetic or numeric character, but found %r" % ch,
                 self.get_mark(),
             )
         value = self.prefix(length)
         self.forward(length)
         ch = self.peek()
         if ch not in "\0 \t\r\n\x85\u2028\u2029?:,]}%@`":
             raise ScannerError(
                 "while scanning an %s" % name,
                 start_mark,
                 "expected alphabetic or numeric character, but found %r" % ch,
                 self.get_mark(),
             )
         end_mark = self.get_mark()
         return TokenClass(value, start_mark, end_mark)

     def scan_tag(self):
         # See the specification for details.
         start_mark = self.get_mark()
         ch = self.peek(1)
         if ch == "<":
             handle = None
             self.forward(2)
             suffix = self.scan_tag_uri("tag", start_mark)
             if self.peek() != ">":
                 raise ScannerError(
                     "while parsing a tag",
                     start_mark,
                     "expected '>', but found %r" % self.peek(),
                     self.get_mark(),
                 )
             self.forward()
         elif ch in "\0 \t\r\n\x85\u2028\u2029":
             handle = None
             suffix = "!"
             self.forward()
         else:
             length = 1
             use_handle = False
             while ch not in "\0 \r\n\x85\u2028\u2029":
                 if ch == "!":
                     use_handle = True
                     break
                 length += 1
                 ch = self.peek(length)
             handle = "!"
             if use_handle:
                 handle = self.scan_tag_handle("tag", start_mark)
             else:
                 handle = "!"
                 self.forward()
             suffix = self.scan_tag_uri("tag", start_mark)
         ch = self.peek()
         if ch not in "\0 \r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a tag",
                 start_mark,
                 "expected ' ', but found %r" % ch,
                 self.get_mark(),
             )
         value = (handle, suffix)
         end_mark = self.get_mark()
         return TagToken(value, start_mark, end_mark)

     def scan_block_scalar(self, style):
         # See the specification for details.

         if style == ">":
             folded = True
         else:
             folded = False

         chunks = []
         start_mark = self.get_mark()

         # Scan the header.
         self.forward()
         chomping, increment = self.scan_block_scalar_indicators(start_mark)
         self.scan_block_scalar_ignored_line(start_mark)

         # Determine the indentation level and go to the first non-empty line.
         min_indent = self.indent + 1
         if min_indent < 1:
             min_indent = 1
         if increment is None:
             breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
             indent = max(min_indent, max_indent)
         else:
             indent = min_indent + increment - 1
             breaks, end_mark = self.scan_block_scalar_breaks(indent)
         line_break = ""

         # Scan the inner part of the block scalar.
         while self.column == indent and self.peek() != "\0":
             chunks.extend(breaks)
             leading_non_space = self.peek() not in " \t"
             length = 0
             while self.peek(length) not in "\0\r\n\x85\u2028\u2029":
                 length += 1
             chunks.append(self.prefix(length))
             self.forward(length)
             line_break = self.scan_line_break()
             breaks, end_mark = self.scan_block_scalar_breaks(indent)
             if self.column == indent and self.peek() != "\0":

                 # Unfortunately, folding rules are ambiguous.
                 #
                 # This is the folding according to the specification:

                 if (
                     folded
                     and line_break == "\n"
                     and leading_non_space
                     and self.peek() not in " \t"
                 ):
                     if not breaks:
                         chunks.append(" ")
                 else:
                     chunks.append(line_break)

                 # This is Clark Evans's interpretation (also in the spec
                 # examples):
                 #
                 # if folded and line_break == '\n':
                 #    if not breaks:
                 #        if self.peek() not in ' \t':
                 #            chunks.append(' ')
                 #        else:
                 #            chunks.append(line_break)
                 # else:
                 #    chunks.append(line_break)
             else:
                 break

         # Chomp the tail.
         if chomping is not False:
             chunks.append(line_break)
         if chomping is True:
             chunks.extend(breaks)

         # We are done.
         return ScalarToken("".join(chunks), False, start_mark, end_mark, style)

     def scan_block_scalar_indicators(self, start_mark):
         # See the specification for details.
         chomping = None
         increment = None
         ch = self.peek()
         if ch in "+-":
             if ch == "+":
                 chomping = True
             else:
                 chomping = False
             self.forward()
             ch = self.peek()
             if ch in "0123456789":
                 increment = int(ch)
                 if increment == 0:
                     raise ScannerError(
                         "while scanning a block scalar",
                         start_mark,
                         "expected indentation indicator in the range 1-9, but found 0",
                         self.get_mark(),
                     )
                 self.forward()
         elif ch in "0123456789":
             increment = int(ch)
             if increment == 0:
                 raise ScannerError(
                     "while scanning a block scalar",
                     start_mark,
                     "expected indentation indicator in the range 1-9, but found 0",
                     self.get_mark(),
                 )
             self.forward()
             ch = self.peek()
             if ch in "+-":
                 if ch == "+":
                     chomping = True
                 else:
                     chomping = False
                 self.forward()
         ch = self.peek()
         if ch not in "\0 \r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a block scalar",
                 start_mark,
                 "expected chomping or indentation indicators, but found %r" % ch,
                 self.get_mark(),
             )
         return chomping, increment

     def scan_block_scalar_ignored_line(self, start_mark):
         # See the specification for details.
         while self.peek() == " ":
             self.forward()
         if self.peek() == "#":
             while self.peek() not in "\0\r\n\x85\u2028\u2029":
                 self.forward()
         ch = self.peek()
         if ch not in "\0\r\n\x85\u2028\u2029":
             raise ScannerError(
                 "while scanning a block scalar",
                 start_mark,
                 "expected a comment or a line break, but found %r" % ch,
                 self.get_mark(),
             )
         self.scan_line_break()

     def scan_block_scalar_indentation(self):
         # See the specification for details.
         chunks = []
         max_indent = 0
         end_mark = self.get_mark()
         while self.peek() in " \r\n\x85\u2028\u2029":
             if self.peek() != " ":
                 chunks.append(self.scan_line_break())
                 end_mark = self.get_mark()
             else:
                 self.forward()
                 if self.column > max_indent:
                     max_indent = self.column
         return chunks, max_indent, end_mark

     def scan_block_scalar_breaks(self, indent):
         # See the specification for details.
         chunks = []
         end_mark = self.get_mark()
         while self.column < indent and self.peek() == " ":
             self.forward()
         while self.peek() in "\r\n\x85\u2028\u2029":
             chunks.append(self.scan_line_break())
             end_mark = self.get_mark()
             while self.column < indent and self.peek() == " ":
                 self.forward()
         return chunks, end_mark

     def scan_flow_scalar(self, style):
         # See the specification for details.
         # Note that we loose indentation rules for quoted scalars. Quoted
         # scalars don't need to adhere indentation because " and ' clearly
         # mark the beginning and the end of them. Therefore we are less
         # restrictive then the specification requires. We only need to check
         # that document separators are not included in scalars.
         if style == '"':
             double = True
         else:
             double = False
         chunks = []
         start_mark = self.get_mark()
         quote = self.peek()
         self.forward()
         chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
         while self.peek() != quote:
             chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
             chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
         self.forward()
         end_mark = self.get_mark()
         return ScalarToken("".join(chunks), False, start_mark, end_mark, style)

     ESCAPE_REPLACEMENTS = {
         "0": "\0",
         "a": "\x07",
         "b": "\x08",
         "t": "\x09",
         "\t": "\x09",
         "n": "\x0A",
         "v": "\x0B",
         "f": "\x0C",
         "r": "\x0D",
         "e": "\x1B",
         " ": "\x20",
         '"': '"',
         "\\": "\\",
         "/": "/",
         "N": "\x85",
         "_": "\xA0",
         "L": "\u2028",
         "P": "\u2029",
     }

     ESCAPE_CODES = {
         "x": 2,
         "u": 4,
         "U": 8,
     }

     def scan_flow_scalar_non_spaces(self, double, start_mark):
         # See the specification for details.
         chunks = []
         while True:
             length = 0
             while self.peek(length) not in "'\"\\\0 \t\r\n\x85\u2028\u2029":
                 length += 1
             if length:
                 chunks.append(self.prefix(length))
                 self.forward(length)
             ch = self.peek()
             if not double and ch == "'" and self.peek(1) == "'":
                 chunks.append("'")
                 self.forward(2)
             elif (double and ch == "'") or (not double and ch in '"\\'):
                 chunks.append(ch)
                 self.forward()
             elif double and ch == "\\":
                 self.forward()
                 ch = self.peek()
                 if ch in self.ESCAPE_REPLACEMENTS:
                     chunks.append(self.ESCAPE_REPLACEMENTS[ch])
                     self.forward()
                 elif ch in self.ESCAPE_CODES:
                     length = self.ESCAPE_CODES[ch]
                     self.forward()
                     for k in range(length):
                         if self.peek(k) not in "0123456789ABCDEFabcdef":
                             raise ScannerError(
                                 "while scanning a double-quoted scalar",
                                 start_mark,
                                 "expected escape sequence of %d hexadecimal numbers, but found %r"
                                 % (length, self.peek(k)),
                                 self.get_mark(),
                             )
                     code = int(self.prefix(length), 16)
                     chunks.append(chr(code))
                     self.forward(length)
                 elif ch in "\r\n\x85\u2028\u2029":
                     self.scan_line_break()
                     chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
                 else:
                     raise ScannerError(
                         "while scanning a double-quoted scalar",
                         start_mark,
                         "found unknown escape character %r" % ch,
                         self.get_mark(),
                     )
             else:
                 return chunks

     def scan_flow_scalar_spaces(self, double, start_mark):
         # See the specification for details.
         chunks = []
         length = 0
         while self.peek(length) in " \t":
             length += 1
         whitespaces = self.prefix(length)
         self.forward(length)
         ch = self.peek()
         if ch == "\0":
             raise ScannerError(
                 "while scanning a quoted scalar",
                 start_mark,
                 "found unexpected end of stream",
                 self.get_mark(),
             )
         elif ch in "\r\n\x85\u2028\u2029":
             line_break = self.scan_line_break()
             breaks = self.scan_flow_scalar_breaks(double, start_mark)
             if line_break != "\n":
                 chunks.append(line_break)
             elif not breaks:
                 chunks.append(" ")
             chunks.extend(breaks)
         else:
             chunks.append(whitespaces)
         return chunks

     def scan_flow_scalar_breaks(self, double, start_mark):
         # See the specification for details.
         chunks = []
         while True:
             # Instead of checking indentation, we check for document
             # separators.
             prefix = self.prefix(3)
             if (prefix == "---" or prefix == "...") and self.peek(
                 3
             ) in "\0 \t\r\n\x85\u2028\u2029":
                 raise ScannerError(
                     "while scanning a quoted scalar",
                     start_mark,
                     "found unexpected document separator",
                     self.get_mark(),
                 )
             while self.peek() in " \t":
                 self.forward()
             if self.peek() in "\r\n\x85\u2028\u2029":
                 chunks.append(self.scan_line_break())
             else:
                 return chunks

     def scan_plain(self):
         # See the specification for details.
         # We add an additional restriction for the flow context:
         #   plain scalars in the flow context cannot contain ',' or '?'.
         # We also keep track of the `allow_simple_key` flag here.
         # Indentation rules are loosed for the flow context.
         chunks = []
         start_mark = self.get_mark()
         end_mark = start_mark
         indent = self.indent + 1
         # We allow zero indentation for scalars, but then we need to check for
         # document separators at the beginning of the line.
         # if indent == 0:
         #    indent = 1
         spaces = []
         while True:
             length = 0
             if self.peek() == "#":
                 break
             while True:
                 ch = self.peek(length)
                 if (
                     ch in "\0 \t\r\n\x85\u2028\u2029"
                     or (
                         ch == ":"
                         and self.peek(length + 1)
                         in "\0 \t\r\n\x85\u2028\u2029"
                         + (",[]{}" if self.flow_level else "")
                     )
                     or (self.flow_level and ch in ",?[]{}")
                 ):
                     break
                 length += 1
             if length == 0:
                 break
             self.allow_simple_key = False
             chunks.extend(spaces)
             chunks.append(self.prefix(length))
             self.forward(length)
             end_mark = self.get_mark()
             spaces = self.scan_plain_spaces(indent, start_mark)
             if (
                 not spaces
                 or self.peek() == "#"
                 or (not self.flow_level and self.column < indent)
             ):
                 break
         return ScalarToken("".join(chunks), True, start_mark, end_mark)

     def scan_plain_spaces(self, indent, start_mark):
         # See the specification for details.
         # The specification is really confusing about tabs in plain scalars.
         # We just forbid them completely. Do not use tabs in YAML!
         chunks = []
         length = 0
         while self.peek(length) in " ":
             length += 1
         whitespaces = self.prefix(length)
         self.forward(length)
         ch = self.peek()
         if ch in "\r\n\x85\u2028\u2029":
             line_break = self.scan_line_break()
             self.allow_simple_key = True
             prefix = self.prefix(3)
             if (prefix == "---" or prefix == "...") and self.peek(
                 3
             ) in "\0 \t\r\n\x85\u2028\u2029":
                 return
             breaks = []
             while self.peek() in " \r\n\x85\u2028\u2029":
                 if self.peek() == " ":
                     self.forward()
                 else:
                     breaks.append(self.scan_line_break())
                     prefix = self.prefix(3)
                     if (prefix == "---" or prefix == "...") and self.peek(
                         3
                     ) in "\0 \t\r\n\x85\u2028\u2029":
                         return
             if line_break != "\n":
                 chunks.append(line_break)
             elif not breaks:
                 chunks.append(" ")
             chunks.extend(breaks)
         elif whitespaces:
             chunks.append(whitespaces)
         return chunks

     def scan_tag_handle(self, name, start_mark):
         # See the specification for details.
         # For some strange reasons, the specification does not allow '_' in
         # tag handles. I have allowed it anyway.
         ch = self.peek()
         if ch != "!":
             raise ScannerError(
                 "while scanning a %s" % name,
                 start_mark,
                 "expected '!', but found %r" % ch,
                 self.get_mark(),
             )
         length = 1
         ch = self.peek(length)
         if ch != " ":
             while (
                 "0" <= ch <= "9" or "A" <= ch <= "Z" or "a" <= ch <= "z" or ch in "-_"
             ):
                 length += 1
                 ch = self.peek(length)
             if ch != "!":
                 self.forward(length)
                 raise ScannerError(
                     "while scanning a %s" % name,
                     start_mark,
                     "expected '!', but found %r" % ch,
                     self.get_mark(),
                 )
             length += 1
         value = self.prefix(length)
         self.forward(length)
         return value

     def scan_tag_uri(self, name, start_mark):
         # See the specification for details.
         # Note: we do not check if URI is well-formed.
         chunks = []
         length = 0
         ch = self.peek(length)
         while (
             "0" <= ch <= "9"
             or "A" <= ch <= "Z"
             or "a" <= ch <= "z"
             or ch in "-;/?:@&=+$,_.!~*'()[]%"
         ):
             if ch == "%":
                 chunks.append(self.prefix(length))
                 self.forward(length)
                 length = 0
                 chunks.append(self.scan_uri_escapes(name, start_mark))
             else:
                 length += 1
             ch = self.peek(length)
         if length:
             chunks.append(self.prefix(length))
             self.forward(length)
             length = 0
         if not chunks:
             raise ScannerError(
                 "while parsing a %s" % name,
                 start_mark,
                 "expected URI, but found %r" % ch,
                 self.get_mark(),
             )
         return "".join(chunks)

     def scan_uri_escapes(self, name, start_mark):
         # See the specification for details.
         codes = []
         mark = self.get_mark()
         while self.peek() == "%":
             self.forward()
             for k in range(2):
                 if self.peek(k) not in "0123456789ABCDEFabcdef":
                     raise ScannerError(
                         "while scanning a %s" % name,
                         start_mark,
                         "expected URI escape sequence of 2 hexadecimal numbers, but found %r"
                         % self.peek(k),
                         self.get_mark(),
                     )
             codes.append(int(self.prefix(2), 16))
             self.forward(2)
         try:
             value = bytes(codes).decode("utf-8")
         except UnicodeDecodeError as exc:
             raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
         return value

     def scan_line_break(self):
         # Transforms:
         #   '\r\n'      :   '\n'
         #   '\r'        :   '\n'
         #   '\n'        :   '\n'
         #   '\x85'      :   '\n'
         #   '\u2028'    :   '\u2028'
         #   '\u2029     :   '\u2029'
         #   default     :   ''
         ch = self.peek()
         if ch in "\r\n\x85":
             if self.prefix(2) == "\r\n":
                 self.forward(2)
             else:
                 self.forward()
             return "\n"
         elif ch in "\u2028\u2029":
             self.forward()
             return ch
         return ""