""" Standard "encodings" Package



    Standard Python encoding modules are stored in this package

    directory.



    Codec modules must have names corresponding to normalized encoding

    names as defined in the normalize_encoding() function below, e.g.

    'utf-8' must be implemented by the module 'utf_8.py'.



    Each codec module must export the following interface:



    * getregentry() -> codecs.CodecInfo object

    The getregentry() API must a CodecInfo object with encoder, decoder,

    incrementalencoder, incrementaldecoder, streamwriter and streamreader

    atttributes which adhere to the Python Codec Interface Standard.



    In addition, a module may optionally also define the following

    APIs which are then used by the package's codec search function:



    * getaliases() -> sequence of encoding name strings to use as aliases



    Alias names returned by getaliases() must be normalized encoding

    names as defined by normalize_encoding().



Written by Marc-Andre Lemburg (mal@lemburg.com).



(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.



"""#"



import codecs

from encodings import aliases

import __builtin__



_cache = {}

_unknown = '--unknown--'

_import_tail = ['*']

_norm_encoding_map = ('                                              . '

                      '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     '

                      ' abcdefghijklmnopqrstuvwxyz                     '

                      '                                                '

                      '                                                '

                      '                ')

_aliases = aliases.aliases



class CodecRegistryError(LookupError, SystemError):

    pass



def normalize_encoding(encoding):



    """ Normalize an encoding name.



        Normalization works as follows: all non-alphanumeric

        characters except the dot used for Python package names are

        collapsed and replaced with a single underscore, e.g. '  -;#'

        becomes '_'. Leading and trailing underscores are removed.



        Note that encoding names should be ASCII only; if they do use

        non-ASCII characters, these must be Latin-1 compatible.



    """

    # Make sure we have an 8-bit string, because .translate() works

    # differently for Unicode strings.

    if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):

        # Note that .encode('latin-1') does *not* use the codec

        # registry, so this call doesn't recurse. (See unicodeobject.c

        # PyUnicode_AsEncodedString() for details)

        encoding = encoding.encode('latin-1')

    return '_'.join(encoding.translate(_norm_encoding_map).split())



def search_function(encoding):



    # Cache lookup

    entry = _cache.get(encoding, _unknown)

    if entry is not _unknown:

        return entry



    # Import the module:

    #

    # First try to find an alias for the normalized encoding

    # name and lookup the module using the aliased name, then try to

    # lookup the module using the standard import scheme, i.e. first

    # try in the encodings package, then at top-level.

    #

    norm_encoding = normalize_encoding(encoding)

    aliased_encoding = _aliases.get(norm_encoding) or \

                       _aliases.get(norm_encoding.replace('.', '_'))

    if aliased_encoding is not None:

        modnames = [aliased_encoding,

                    norm_encoding]

    else:

        modnames = [norm_encoding]

    for modname in modnames:

        if not modname or '.' in modname:

            continue

        try:

            # Import is absolute to prevent the possibly malicious import of a

            # module with side-effects that is not in the 'encodings' package.

            mod = __import__('encodings.' + modname, fromlist=_import_tail,

                             level=0)

        except ImportError:

            pass

        else:

            break

    else:

        mod = None



    try:

        getregentry = mod.getregentry

    except AttributeError:

        # Not a codec module

        mod = None



    if mod is None:

        # Cache misses

        _cache[encoding] = None

        return None



    # Now ask the module for the registry entry

    entry = getregentry()

    if not isinstance(entry, codecs.CodecInfo):

        if not 4 <= len(entry) <= 7:

            raise CodecRegistryError,\

                 'module "%s" (%s) failed to register' % \

                  (mod.__name__, mod.__file__)

        if not callable(entry[0]) or \

           not callable(entry[1]) or \

           (entry[2] is not None and not callable(entry[2])) or \

           (entry[3] is not None and not callable(entry[3])) or \

           (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \

           (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):

            raise CodecRegistryError,\

                'incompatible codecs in module "%s" (%s)' % \

                (mod.__name__, mod.__file__)

        if len(entry)<7 or entry[6] is None:

            entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)

        entry = codecs.CodecInfo(*entry)



    # Cache the codec registry entry

    _cache[encoding] = entry



    # Register its aliases (without overwriting previously registered

    # aliases)

    try:

        codecaliases = mod.getaliases()

    except AttributeError:

        pass

    else:

        for alias in codecaliases:

            if not _aliases.has_key(alias):

                _aliases[alias] = modname



    # Return the registry entry

    return entry



# Register the search_function in the Python codec registry

codecs.register(search_function)

