import xml.sax

import xml.sax.handler

import types



try:

    _StringTypes = [types.StringType, types.UnicodeType]

except AttributeError:

    _StringTypes = [types.StringType]



START_ELEMENT = "START_ELEMENT"

END_ELEMENT = "END_ELEMENT"

COMMENT = "COMMENT"

START_DOCUMENT = "START_DOCUMENT"

END_DOCUMENT = "END_DOCUMENT"

PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"

IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"

CHARACTERS = "CHARACTERS"



class PullDOM(xml.sax.ContentHandler):

    _locator = None

    document = None



    def __init__(self, documentFactory=None):

        from xml.dom import XML_NAMESPACE

        self.documentFactory = documentFactory

        self.firstEvent = [None, None]

        self.lastEvent = self.firstEvent

        self.elementStack = []

        self.push = self.elementStack.append

        try:

            self.pop = self.elementStack.pop

        except AttributeError:

            # use class' pop instead

            pass

        self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts

        self._current_context = self._ns_contexts[-1]

        self.pending_events = []



    def pop(self):

        result = self.elementStack[-1]

        del self.elementStack[-1]

        return result



    def setDocumentLocator(self, locator):

        self._locator = locator



    def startPrefixMapping(self, prefix, uri):

        if not hasattr(self, '_xmlns_attrs'):

            self._xmlns_attrs = []

        self._xmlns_attrs.append((prefix or 'xmlns', uri))

        self._ns_contexts.append(self._current_context.copy())

        self._current_context[uri] = prefix or None



    def endPrefixMapping(self, prefix):

        self._current_context = self._ns_contexts.pop()



    def startElementNS(self, name, tagName , attrs):

        # Retrieve xml namespace declaration attributes.

        xmlns_uri = 'http://www.w3.org/2000/xmlns/'

        xmlns_attrs = getattr(self, '_xmlns_attrs', None)

        if xmlns_attrs is not None:

            for aname, value in xmlns_attrs:

                attrs._attrs[(xmlns_uri, aname)] = value

            self._xmlns_attrs = []

        uri, localname = name

        if uri:

            # When using namespaces, the reader may or may not

            # provide us with the original name. If not, create

            # *a* valid tagName from the current context.

            if tagName is None:

                prefix = self._current_context[uri]

                if prefix:

                    tagName = prefix + ":" + localname

                else:

                    tagName = localname

            if self.document:

                node = self.document.createElementNS(uri, tagName)

            else:

                node = self.buildDocument(uri, tagName)

        else:

            # When the tagname is not prefixed, it just appears as

            # localname

            if self.document:

                node = self.document.createElement(localname)

            else:

                node = self.buildDocument(None, localname)



        for aname,value in attrs.items():

            a_uri, a_localname = aname

            if a_uri == xmlns_uri:

                if a_localname == 'xmlns':

                    qname = a_localname

                else:

                    qname = 'xmlns:' + a_localname

                attr = self.document.createAttributeNS(a_uri, qname)

                node.setAttributeNodeNS(attr)

            elif a_uri:

                prefix = self._current_context[a_uri]

                if prefix:

                    qname = prefix + ":" + a_localname

                else:

                    qname = a_localname

                attr = self.document.createAttributeNS(a_uri, qname)

                node.setAttributeNodeNS(attr)

            else:

                attr = self.document.createAttribute(a_localname)

                node.setAttributeNode(attr)

            attr.value = value



        self.lastEvent[1] = [(START_ELEMENT, node), None]

        self.lastEvent = self.lastEvent[1]

        self.push(node)



    def endElementNS(self, name, tagName):

        self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]

        self.lastEvent = self.lastEvent[1]



    def startElement(self, name, attrs):

        if self.document:

            node = self.document.createElement(name)

        else:

            node = self.buildDocument(None, name)



        for aname,value in attrs.items():

            attr = self.document.createAttribute(aname)

            attr.value = value

            node.setAttributeNode(attr)



        self.lastEvent[1] = [(START_ELEMENT, node), None]

        self.lastEvent = self.lastEvent[1]

        self.push(node)



    def endElement(self, name):

        self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]

        self.lastEvent = self.lastEvent[1]



    def comment(self, s):

        if self.document:

            node = self.document.createComment(s)

            self.lastEvent[1] = [(COMMENT, node), None]

            self.lastEvent = self.lastEvent[1]

        else:

            event = [(COMMENT, s), None]

            self.pending_events.append(event)



    def processingInstruction(self, target, data):

        if self.document:

            node = self.document.createProcessingInstruction(target, data)

            self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]

            self.lastEvent = self.lastEvent[1]

        else:

            event = [(PROCESSING_INSTRUCTION, target, data), None]

            self.pending_events.append(event)



    def ignorableWhitespace(self, chars):

        node = self.document.createTextNode(chars)

        self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]

        self.lastEvent = self.lastEvent[1]



    def characters(self, chars):

        node = self.document.createTextNode(chars)

        self.lastEvent[1] = [(CHARACTERS, node), None]

        self.lastEvent = self.lastEvent[1]



    def startDocument(self):

        if self.documentFactory is None:

            import xml.dom.minidom

            self.documentFactory = xml.dom.minidom.Document.implementation



    def buildDocument(self, uri, tagname):

        # Can't do that in startDocument, since we need the tagname

        # XXX: obtain DocumentType

        node = self.documentFactory.createDocument(uri, tagname, None)

        self.document = node

        self.lastEvent[1] = [(START_DOCUMENT, node), None]

        self.lastEvent = self.lastEvent[1]

        self.push(node)

        # Put everything we have seen so far into the document

        for e in self.pending_events:

            if e[0][0] == PROCESSING_INSTRUCTION:

                _,target,data = e[0]

                n = self.document.createProcessingInstruction(target, data)

                e[0] = (PROCESSING_INSTRUCTION, n)

            elif e[0][0] == COMMENT:

                n = self.document.createComment(e[0][1])

                e[0] = (COMMENT, n)

            else:

                raise AssertionError("Unknown pending event ",e[0][0])

            self.lastEvent[1] = e

            self.lastEvent = e

        self.pending_events = None

        return node.firstChild



    def endDocument(self):

        self.lastEvent[1] = [(END_DOCUMENT, self.document), None]

        self.pop()



    def clear(self):

        "clear(): Explicitly release parsing structures"

        self.document = None



class ErrorHandler:

    def warning(self, exception):

        print exception

    def error(self, exception):

        raise exception

    def fatalError(self, exception):

        raise exception



class DOMEventStream:

    def __init__(self, stream, parser, bufsize):

        self.stream = stream

        self.parser = parser

        self.bufsize = bufsize

        if not hasattr(self.parser, 'feed'):

            self.getEvent = self._slurp

        self.reset()



    def reset(self):

        self.pulldom = PullDOM()

        # This content handler relies on namespace support

        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)

        self.parser.setContentHandler(self.pulldom)



    def __getitem__(self, pos):

        rc = self.getEvent()

        if rc:

            return rc

        raise IndexError



    def next(self):

        rc = self.getEvent()

        if rc:

            return rc

        raise StopIteration



    def __iter__(self):

        return self



    def expandNode(self, node):

        event = self.getEvent()

        parents = [node]

        while event:

            token, cur_node = event

            if cur_node is node:

                return

            if token != END_ELEMENT:

                parents[-1].appendChild(cur_node)

            if token == START_ELEMENT:

                parents.append(cur_node)

            elif token == END_ELEMENT:

                del parents[-1]

            event = self.getEvent()



    def getEvent(self):

        # use IncrementalParser interface, so we get the desired

        # pull effect

        if not self.pulldom.firstEvent[1]:

            self.pulldom.lastEvent = self.pulldom.firstEvent

        while not self.pulldom.firstEvent[1]:

            buf = self.stream.read(self.bufsize)

            if not buf:

                self.parser.close()

                return None

            self.parser.feed(buf)

        rc = self.pulldom.firstEvent[1][0]

        self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]

        return rc



    def _slurp(self):

        """ Fallback replacement for getEvent() using the

            standard SAX2 interface, which means we slurp the

            SAX events into memory (no performance gain, but

            we are compatible to all SAX parsers).

        """

        self.parser.parse(self.stream)

        self.getEvent = self._emit

        return self._emit()



    def _emit(self):

        """ Fallback replacement for getEvent() that emits

            the events that _slurp() read previously.

        """

        rc = self.pulldom.firstEvent[1][0]

        self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]

        return rc



    def clear(self):

        """clear(): Explicitly release parsing objects"""

        self.pulldom.clear()

        del self.pulldom

        self.parser = None

        self.stream = None



class SAX2DOM(PullDOM):



    def startElementNS(self, name, tagName , attrs):

        PullDOM.startElementNS(self, name, tagName, attrs)

        curNode = self.elementStack[-1]

        parentNode = self.elementStack[-2]

        parentNode.appendChild(curNode)



    def startElement(self, name, attrs):

        PullDOM.startElement(self, name, attrs)

        curNode = self.elementStack[-1]

        parentNode = self.elementStack[-2]

        parentNode.appendChild(curNode)



    def processingInstruction(self, target, data):

        PullDOM.processingInstruction(self, target, data)

        node = self.lastEvent[0][1]

        parentNode = self.elementStack[-1]

        parentNode.appendChild(node)



    def ignorableWhitespace(self, chars):

        PullDOM.ignorableWhitespace(self, chars)

        node = self.lastEvent[0][1]

        parentNode = self.elementStack[-1]

        parentNode.appendChild(node)



    def characters(self, chars):

        PullDOM.characters(self, chars)

        node = self.lastEvent[0][1]

        parentNode = self.elementStack[-1]

        parentNode.appendChild(node)





default_bufsize = (2 ** 14) - 20



def parse(stream_or_string, parser=None, bufsize=None):

    if bufsize is None:

        bufsize = default_bufsize

    if type(stream_or_string) in _StringTypes:

        stream = open(stream_or_string)

    else:

        stream = stream_or_string

    if not parser:

        parser = xml.sax.make_parser()

    return DOMEventStream(stream, parser, bufsize)



def parseString(string, parser=None):

    try:

        from cStringIO import StringIO

    except ImportError:

        from StringIO import StringIO



    bufsize = len(string)

    buf = StringIO(string)

    if not parser:

        parser = xml.sax.make_parser()

    return DOMEventStream(buf, parser, bufsize)

