"""Iterator based sre token scanner



"""



import re

import sre_parse

import sre_compile

import sre_constants



from re import VERBOSE, MULTILINE, DOTALL

from sre_constants import BRANCH, SUBPATTERN



__all__ = ['Scanner', 'pattern']



FLAGS = (VERBOSE | MULTILINE | DOTALL)



class Scanner(object):

    def __init__(self, lexicon, flags=FLAGS):

        self.actions = [None]

        # Combine phrases into a compound pattern

        s = sre_parse.Pattern()

        s.flags = flags

        p = []

        for idx, token in enumerate(lexicon):

            phrase = token.pattern

            try:

                subpattern = sre_parse.SubPattern(s,

                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])

            except sre_constants.error:

                raise

            p.append(subpattern)

            self.actions.append(token)



        s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work

        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])

        self.scanner = sre_compile.compile(p)



    def iterscan(self, string, idx=0, context=None):

        """Yield match, end_idx for each match



        """

        match = self.scanner.scanner(string, idx).match

        actions = self.actions

        lastend = idx

        end = len(string)

        while True:

            m = match()

            if m is None:

                break

            matchbegin, matchend = m.span()

            if lastend == matchend:

                break

            action = actions[m.lastindex]

            if action is not None:

                rval, next_pos = action(m, context)

                if next_pos is not None and next_pos != matchend:

                    # "fast forward" the scanner

                    matchend = next_pos

                    match = self.scanner.scanner(string, matchend).match

                yield rval, matchend

            lastend = matchend





def pattern(pattern, flags=FLAGS):

    def decorator(fn):

        fn.pattern = pattern

        fn.regex = re.compile(pattern, flags)

        return fn

    return decorator

