| 1 | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
| 2 | # Licensed to PSF under a Contributor Agreement.
|
| 3 |
|
| 4 | # Modifications:
|
| 5 | # Copyright 2006 Google, Inc. All Rights Reserved.
|
| 6 | # Licensed to PSF under a Contributor Agreement.
|
| 7 |
|
| 8 | """Parser driver.
|
| 9 |
|
| 10 | This provides a high-level interface to parse a file into a syntax tree.
|
| 11 |
|
| 12 | """
|
| 13 |
|
| 14 | __author__ = "Guido van Rossum <guido@python.org>"
|
| 15 |
|
| 16 | __all__ = ["Driver", "load_grammar"]
|
| 17 |
|
| 18 | # Python imports
|
| 19 | import codecs
|
| 20 | import io
|
| 21 | import os
|
| 22 | import logging
|
| 23 | import sys
|
| 24 |
|
| 25 | # Pgen imports
|
| 26 | from . import grammar, parse, token, tokenize
|
| 27 |
|
| 28 |
|
| 29 | class Driver(object):
|
| 30 |
|
| 31 | def __init__(self, grammar, convert=None, logger=None):
|
| 32 | self.grammar = grammar
|
| 33 | if logger is None:
|
| 34 | logger = logging.getLogger()
|
| 35 | self.logger = logger
|
| 36 | self.convert = convert
|
| 37 |
|
| 38 | def parse_tokens(self, tokens, start_symbol=None, debug=False):
|
| 39 | """Parse a series of tokens and return the syntax tree."""
|
| 40 | # XXX Move the prefix computation into a wrapper around tokenize.
|
| 41 | p = parse.Parser(self.grammar, self.convert)
|
| 42 | p.setup(start=start_symbol)
|
| 43 | lineno = 1
|
| 44 | column = 0
|
| 45 | type = value = start = end = line_text = None
|
| 46 | prefix = ""
|
| 47 | for quintuple in tokens:
|
| 48 | type, value, start, end, line_text = quintuple
|
| 49 | if start != (lineno, column):
|
| 50 | assert (lineno, column) <= start, ((lineno, column), start)
|
| 51 | s_lineno, s_column = start
|
| 52 | if lineno < s_lineno:
|
| 53 | prefix += "\n" * (s_lineno - lineno)
|
| 54 | lineno = s_lineno
|
| 55 | column = 0
|
| 56 | if column < s_column:
|
| 57 | prefix += line_text[column:s_column]
|
| 58 | column = s_column
|
| 59 | if type in (tokenize.COMMENT, tokenize.NL):
|
| 60 | prefix += value
|
| 61 | lineno, column = end
|
| 62 | if value.endswith("\n"):
|
| 63 | lineno += 1
|
| 64 | column = 0
|
| 65 | continue
|
| 66 | if type == token.OP:
|
| 67 | type = grammar.opmap[value]
|
| 68 | if debug:
|
| 69 | self.logger.debug("%s %r (prefix=%r)",
|
| 70 | token.tok_name[type], value, prefix)
|
| 71 | if p.addtoken(type, value, (prefix, start)):
|
| 72 | if debug:
|
| 73 | self.logger.debug("Stop.")
|
| 74 | break
|
| 75 | prefix = ""
|
| 76 | lineno, column = end
|
| 77 | if value.endswith("\n"):
|
| 78 | lineno += 1
|
| 79 | column = 0
|
| 80 | else:
|
| 81 | # We never broke out -- EOF is too soon (how can this happen???)
|
| 82 | raise parse.ParseError("incomplete input",
|
| 83 | type, value, (prefix, start))
|
| 84 | return p.rootnode
|
| 85 |
|
| 86 | def parse_stream_raw(self, stream, debug=False):
|
| 87 | """Parse a stream and return the syntax tree."""
|
| 88 | tokens = tokenize.generate_tokens(stream.readline)
|
| 89 | return self.parse_tokens(tokens, debug)
|
| 90 |
|
| 91 | def parse_stream(self, stream, debug=False):
|
| 92 | """Parse a stream and return the syntax tree."""
|
| 93 | return self.parse_stream_raw(stream, debug)
|
| 94 |
|
| 95 | def parse_file(self, filename, encoding=None, debug=False):
|
| 96 | """Parse a file and return the syntax tree."""
|
| 97 | stream = codecs.open(filename, "r", encoding)
|
| 98 | try:
|
| 99 | return self.parse_stream(stream, debug)
|
| 100 | finally:
|
| 101 | stream.close()
|
| 102 |
|
| 103 | def parse_string(self, text, debug=False):
|
| 104 | """Parse a string and return the syntax tree."""
|
| 105 | tokens = tokenize.generate_tokens(io.StringIO(text).readline)
|
| 106 | return self.parse_tokens(tokens, debug)
|