# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.

# Licensed to PSF under a Contributor Agreement.



"""This module defines the data structures used to represent a grammar.



These are a bit arcane because they are derived from the data

structures used by Python's 'pgen' parser generator.



There's also a table here mapping operators to their names in the

token module; the Python tokenize module reports all operators as the

fallback token code OP, but the parser needs the actual token code.



"""



# Python imports

import pickle



# Local imports

from . import token, tokenize





class Grammar(object):

    """Pgen parsing tables tables conversion class.



    Once initialized, this class supplies the grammar tables for the

    parsing engine implemented by parse.py.  The parsing engine

    accesses the instance variables directly.  The class here does not

    provide initialization of the tables; several subclasses exist to

    do this (see the conv and pgen modules).



    The load() method reads the tables from a pickle file, which is

    much faster than the other ways offered by subclasses.  The pickle

    file is written by calling dump() (after loading the grammar

    tables using a subclass).  The report() method prints a readable

    representation of the tables to stdout, for debugging.



    The instance variables are as follows:



    symbol2number -- a dict mapping symbol names to numbers.  Symbol

                     numbers are always 256 or higher, to distinguish

                     them from token numbers, which are between 0 and

                     255 (inclusive).



    number2symbol -- a dict mapping numbers to symbol names;

                     these two are each other's inverse.



    states        -- a list of DFAs, where each DFA is a list of

                     states, each state is is a list of arcs, and each

                     arc is a (i, j) pair where i is a label and j is

                     a state number.  The DFA number is the index into

                     this list.  (This name is slightly confusing.)

                     Final states are represented by a special arc of

                     the form (0, j) where j is its own state number.



    dfas          -- a dict mapping symbol numbers to (DFA, first)

                     pairs, where DFA is an item from the states list

                     above, and first is a set of tokens that can

                     begin this grammar rule (represented by a dict

                     whose values are always 1).



    labels        -- a list of (x, y) pairs where x is either a token

                     number or a symbol number, and y is either None

                     or a string; the strings are keywords.  The label

                     number is the index in this list; label numbers

                     are used to mark state transitions (arcs) in the

                     DFAs.



    start         -- the number of the grammar's start symbol.



    keywords      -- a dict mapping keyword strings to arc labels.



    tokens        -- a dict mapping token numbers to arc labels.



    """



    def __init__(self):

        self.symbol2number = {}

        self.number2symbol = {}

        self.states = []

        self.dfas = {}

        self.labels = [(0, "EMPTY")]

        self.keywords = {}

        self.tokens = {}

        self.symbol2label = {}

        self.start = 256



    def dump(self, filename):

        """Dump the grammar tables to a pickle file."""

        f = open(filename, "wb")

        pickle.dump(self.__dict__, f, 2)

        f.close()



    def load(self, filename):

        """Load the grammar tables from a pickle file."""

        f = open(filename, "rb")

        d = pickle.load(f)

        f.close()

        self.__dict__.update(d)



    def report(self):

        """Dump the grammar tables to standard output, for debugging."""

        from pprint import pprint

        print "s2n"

        pprint(self.symbol2number)

        print "n2s"

        pprint(self.number2symbol)

        print "states"

        pprint(self.states)

        print "dfas"

        pprint(self.dfas)

        print "labels"

        pprint(self.labels)

        print "start", self.start





# Map from operator to number (since tokenize doesn't do this)



opmap_raw = """

( LPAR

) RPAR

[ LSQB

] RSQB

: COLON

, COMMA

; SEMI

+ PLUS

- MINUS

* STAR

/ SLASH

| VBAR

& AMPER

< LESS

> GREATER

= EQUAL

. DOT

% PERCENT

` BACKQUOTE

{ LBRACE

} RBRACE

@ AT

== EQEQUAL

!= NOTEQUAL

<> NOTEQUAL

<= LESSEQUAL

>= GREATEREQUAL

~ TILDE

^ CIRCUMFLEX

<< LEFTSHIFT

>> RIGHTSHIFT

** DOUBLESTAR

+= PLUSEQUAL

-= MINEQUAL

*= STAREQUAL

/= SLASHEQUAL

%= PERCENTEQUAL

&= AMPEREQUAL

|= VBAREQUAL

^= CIRCUMFLEXEQUAL

<<= LEFTSHIFTEQUAL

>>= RIGHTSHIFTEQUAL

**= DOUBLESTAREQUAL

// DOUBLESLASH

//= DOUBLESLASHEQUAL

-> RARROW

"""



opmap = {}

for line in opmap_raw.splitlines():

    if line:

        op, name = line.split()

        opmap[op] = getattr(token, name)

