"""Find modules used by a script, using introspection."""

# This module should be kept compatible with Python 2.2, see PEP 291.



from __future__ import generators

import dis

import imp

import marshal

import os

import sys

import types

import struct



if hasattr(sys.__stdout__, "newlines"):

    READ_MODE = "U"  # universal line endings

else:

    # remain compatible with Python  < 2.3

    READ_MODE = "r"



LOAD_CONST = chr(dis.opname.index('LOAD_CONST'))

IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME'))

STORE_NAME = chr(dis.opname.index('STORE_NAME'))

STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))

STORE_OPS = [STORE_NAME, STORE_GLOBAL]

HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)



# Modulefinder does a good job at simulating Python's, but it can not

# handle __path__ modifications packages make at runtime.  Therefore there

# is a mechanism whereby you can register extra paths in this map for a

# package, and it will be honored.



# Note this is a mapping is lists of paths.

packagePathMap = {}



# A Public interface

def AddPackagePath(packagename, path):

    paths = packagePathMap.get(packagename, [])

    paths.append(path)

    packagePathMap[packagename] = paths



replacePackageMap = {}



# This ReplacePackage mechanism allows modulefinder to work around the

# way the _xmlplus package injects itself under the name "xml" into

# sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")

# before running ModuleFinder.



def ReplacePackage(oldname, newname):

    replacePackageMap[oldname] = newname





class Module:



    def __init__(self, name, file=None, path=None):

        self.__name__ = name

        self.__file__ = file

        self.__path__ = path

        self.__code__ = None

        # The set of global names that are assigned to in the module.

        # This includes those names imported through starimports of

        # Python modules.

        self.globalnames = {}

        # The set of starimports this module did that could not be

        # resolved, ie. a starimport from a non-Python module.

        self.starimports = {}



    def __repr__(self):

        s = "Module(%r" % (self.__name__,)

        if self.__file__ is not None:

            s = s + ", %r" % (self.__file__,)

        if self.__path__ is not None:

            s = s + ", %r" % (self.__path__,)

        s = s + ")"

        return s



class ModuleFinder:



    def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):

        if path is None:

            path = sys.path

        self.path = path

        self.modules = {}

        self.badmodules = {}

        self.debug = debug

        self.indent = 0

        self.excludes = excludes

        self.replace_paths = replace_paths

        self.processed_paths = []   # Used in debugging only



    def msg(self, level, str, *args):

        if level <= self.debug:

            for i in range(self.indent):

                print "   ",

            print str,

            for arg in args:

                print repr(arg),

            print



    def msgin(self, *args):

        level = args[0]

        if level <= self.debug:

            self.indent = self.indent + 1

            self.msg(*args)



    def msgout(self, *args):

        level = args[0]

        if level <= self.debug:

            self.indent = self.indent - 1

            self.msg(*args)



    def run_script(self, pathname):

        self.msg(2, "run_script", pathname)

        fp = open(pathname, READ_MODE)

        stuff = ("", "r", imp.PY_SOURCE)

        self.load_module('__main__', fp, pathname, stuff)



    def load_file(self, pathname):

        dir, name = os.path.split(pathname)

        name, ext = os.path.splitext(name)

        fp = open(pathname, READ_MODE)

        stuff = (ext, "r", imp.PY_SOURCE)

        self.load_module(name, fp, pathname, stuff)



    def import_hook(self, name, caller=None, fromlist=None, level=-1):

        self.msg(3, "import_hook", name, caller, fromlist, level)

        parent = self.determine_parent(caller, level=level)

        q, tail = self.find_head_package(parent, name)

        m = self.load_tail(q, tail)

        if not fromlist:

            return q

        if m.__path__:

            self.ensure_fromlist(m, fromlist)

        return None



    def determine_parent(self, caller, level=-1):

        self.msgin(4, "determine_parent", caller, level)

        if not caller or level == 0:

            self.msgout(4, "determine_parent -> None")

            return None

        pname = caller.__name__

        if level >= 1: # relative import

            if caller.__path__:

                level -= 1

            if level == 0:

                parent = self.modules[pname]

                assert parent is caller

                self.msgout(4, "determine_parent ->", parent)

                return parent

            if pname.count(".") < level:

                raise ImportError, "relative importpath too deep"

            pname = ".".join(pname.split(".")[:-level])

            parent = self.modules[pname]

            self.msgout(4, "determine_parent ->", parent)

            return parent

        if caller.__path__:

            parent = self.modules[pname]

            assert caller is parent

            self.msgout(4, "determine_parent ->", parent)

            return parent

        if '.' in pname:

            i = pname.rfind('.')

            pname = pname[:i]

            parent = self.modules[pname]

            assert parent.__name__ == pname

            self.msgout(4, "determine_parent ->", parent)

            return parent

        self.msgout(4, "determine_parent -> None")

        return None



    def find_head_package(self, parent, name):

        self.msgin(4, "find_head_package", parent, name)

        if '.' in name:

            i = name.find('.')

            head = name[:i]

            tail = name[i+1:]

        else:

            head = name

            tail = ""

        if parent:

            qname = "%s.%s" % (parent.__name__, head)

        else:

            qname = head

        q = self.import_module(head, qname, parent)

        if q:

            self.msgout(4, "find_head_package ->", (q, tail))

            return q, tail

        if parent:

            qname = head

            parent = None

            q = self.import_module(head, qname, parent)

            if q:

                self.msgout(4, "find_head_package ->", (q, tail))

                return q, tail

        self.msgout(4, "raise ImportError: No module named", qname)

        raise ImportError, "No module named " + qname



    def load_tail(self, q, tail):

        self.msgin(4, "load_tail", q, tail)

        m = q

        while tail:

            i = tail.find('.')

            if i < 0: i = len(tail)

            head, tail = tail[:i], tail[i+1:]

            mname = "%s.%s" % (m.__name__, head)

            m = self.import_module(head, mname, m)

            if not m:

                self.msgout(4, "raise ImportError: No module named", mname)

                raise ImportError, "No module named " + mname

        self.msgout(4, "load_tail ->", m)

        return m



    def ensure_fromlist(self, m, fromlist, recursive=0):

        self.msg(4, "ensure_fromlist", m, fromlist, recursive)

        for sub in fromlist:

            if sub == "*":

                if not recursive:

                    all = self.find_all_submodules(m)

                    if all:

                        self.ensure_fromlist(m, all, 1)

            elif not hasattr(m, sub):

                subname = "%s.%s" % (m.__name__, sub)

                submod = self.import_module(sub, subname, m)

                if not submod:

                    raise ImportError, "No module named " + subname



    def find_all_submodules(self, m):

        if not m.__path__:

            return

        modules = {}

        # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].

        # But we must also collect Python extension modules - although

        # we cannot separate normal dlls from Python extensions.

        suffixes = []

        for triple in imp.get_suffixes():

            suffixes.append(triple[0])

        for dir in m.__path__:

            try:

                names = os.listdir(dir)

            except os.error:

                self.msg(2, "can't list directory", dir)

                continue

            for name in names:

                mod = None

                for suff in suffixes:

                    n = len(suff)

                    if name[-n:] == suff:

                        mod = name[:-n]

                        break

                if mod and mod != "__init__":

                    modules[mod] = mod

        return modules.keys()



    def import_module(self, partname, fqname, parent):

        self.msgin(3, "import_module", partname, fqname, parent)

        try:

            m = self.modules[fqname]

        except KeyError:

            pass

        else:

            self.msgout(3, "import_module ->", m)

            return m

        if fqname in self.badmodules:

            self.msgout(3, "import_module -> None")

            return None

        if parent and parent.__path__ is None:

            self.msgout(3, "import_module -> None")

            return None

        try:

            fp, pathname, stuff = self.find_module(partname,

                                                   parent and parent.__path__, parent)

        except ImportError:

            self.msgout(3, "import_module ->", None)

            return None

        try:

            m = self.load_module(fqname, fp, pathname, stuff)

        finally:

            if fp: fp.close()

        if parent:

            setattr(parent, partname, m)

        self.msgout(3, "import_module ->", m)

        return m



    def load_module(self, fqname, fp, pathname, file_info):

        suffix, mode, type = file_info

        self.msgin(2, "load_module", fqname, fp and "fp", pathname)

        if type == imp.PKG_DIRECTORY:

            m = self.load_package(fqname, pathname)

            self.msgout(2, "load_module ->", m)

            return m

        if type == imp.PY_SOURCE:

            co = compile(fp.read()+'\n', pathname, 'exec')

        elif type == imp.PY_COMPILED:

            if fp.read(4) != imp.get_magic():

                self.msgout(2, "raise ImportError: Bad magic number", pathname)

                raise ImportError, "Bad magic number in %s" % pathname

            fp.read(4)

            co = marshal.load(fp)

        else:

            co = None

        m = self.add_module(fqname)

        m.__file__ = pathname

        if co:

            if self.replace_paths:

                co = self.replace_paths_in_code(co)

            m.__code__ = co

            self.scan_code(co, m)

        self.msgout(2, "load_module ->", m)

        return m



    def _add_badmodule(self, name, caller):

        if name not in self.badmodules:

            self.badmodules[name] = {}

        if caller:

            self.badmodules[name][caller.__name__] = 1

        else:

            self.badmodules[name]["-"] = 1



    def _safe_import_hook(self, name, caller, fromlist, level=-1):

        # wrapper for self.import_hook() that won't raise ImportError

        if name in self.badmodules:

            self._add_badmodule(name, caller)

            return

        try:

            self.import_hook(name, caller, level=level)

        except ImportError, msg:

            self.msg(2, "ImportError:", str(msg))

            self._add_badmodule(name, caller)

        else:

            if fromlist:

                for sub in fromlist:

                    if sub in self.badmodules:

                        self._add_badmodule(sub, caller)

                        continue

                    try:

                        self.import_hook(name, caller, [sub], level=level)

                    except ImportError, msg:

                        self.msg(2, "ImportError:", str(msg))

                        fullname = name + "." + sub

                        self._add_badmodule(fullname, caller)



    def scan_opcodes(self, co,

                     unpack = struct.unpack):

        # Scan the code, and yield 'interesting' opcode combinations

        # Version for Python 2.4 and older

        code = co.co_code

        names = co.co_names

        consts = co.co_consts

        while code:

            c = code[0]

            if c in STORE_OPS:

                oparg, = unpack('<H', code[1:3])

                yield "store", (names[oparg],)

                code = code[3:]

                continue

            if c == LOAD_CONST and code[3] == IMPORT_NAME:

                oparg_1, oparg_2 = unpack('<xHxH', code[:6])

                yield "import", (consts[oparg_1], names[oparg_2])

                code = code[6:]

                continue

            if c >= HAVE_ARGUMENT:

                code = code[3:]

            else:

                code = code[1:]



    def scan_opcodes_25(self, co,

                     unpack = struct.unpack):

        # Scan the code, and yield 'interesting' opcode combinations

        # Python 2.5 version (has absolute and relative imports)

        code = co.co_code

        names = co.co_names

        consts = co.co_consts

        LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME

        while code:

            c = code[0]

            if c in STORE_OPS:

                oparg, = unpack('<H', code[1:3])

                yield "store", (names[oparg],)

                code = code[3:]

                continue

            if code[:9:3] == LOAD_LOAD_AND_IMPORT:

                oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])

                level = consts[oparg_1]

                if level == -1: # normal import

                    yield "import", (consts[oparg_2], names[oparg_3])

                elif level == 0: # absolute import

                    yield "absolute_import", (consts[oparg_2], names[oparg_3])

                else: # relative import

                    yield "relative_import", (level, consts[oparg_2], names[oparg_3])

                code = code[9:]

                continue

            if c >= HAVE_ARGUMENT:

                code = code[3:]

            else:

                code = code[1:]



    def scan_code(self, co, m):

        code = co.co_code

        if sys.version_info >= (2, 5):

            scanner = self.scan_opcodes_25

        else:

            scanner = self.scan_opcodes

        for what, args in scanner(co):

            if what == "store":

                name, = args

                m.globalnames[name] = 1

            elif what in ("import", "absolute_import"):

                fromlist, name = args

                have_star = 0

                if fromlist is not None:

                    if "*" in fromlist:

                        have_star = 1

                    fromlist = [f for f in fromlist if f != "*"]

                if what == "absolute_import": level = 0

                else: level = -1

                self._safe_import_hook(name, m, fromlist, level=level)

                if have_star:

                    # We've encountered an "import *". If it is a Python module,

                    # the code has already been parsed and we can suck out the

                    # global names.

                    mm = None

                    if m.__path__:

                        # At this point we don't know whether 'name' is a

                        # submodule of 'm' or a global module. Let's just try

                        # the full name first.

                        mm = self.modules.get(m.__name__ + "." + name)

                    if mm is None:

                        mm = self.modules.get(name)

                    if mm is not None:

                        m.globalnames.update(mm.globalnames)

                        m.starimports.update(mm.starimports)

                        if mm.__code__ is None:

                            m.starimports[name] = 1

                    else:

                        m.starimports[name] = 1

            elif what == "relative_import":

                level, fromlist, name = args

                if name:

                    self._safe_import_hook(name, m, fromlist, level=level)

                else:

                    parent = self.determine_parent(m, level=level)

                    self._safe_import_hook(parent.__name__, None, fromlist, level=0)

            else:

                # We don't expect anything else from the generator.

                raise RuntimeError(what)



        for c in co.co_consts:

            if isinstance(c, type(co)):

                self.scan_code(c, m)



    def load_package(self, fqname, pathname):

        self.msgin(2, "load_package", fqname, pathname)

        newname = replacePackageMap.get(fqname)

        if newname:

            fqname = newname

        m = self.add_module(fqname)

        m.__file__ = pathname

        m.__path__ = [pathname]



        # As per comment at top of file, simulate runtime __path__ additions.

        m.__path__ = m.__path__ + packagePathMap.get(fqname, [])



        fp, buf, stuff = self.find_module("__init__", m.__path__)

        self.load_module(fqname, fp, buf, stuff)

        self.msgout(2, "load_package ->", m)

        return m



    def add_module(self, fqname):

        if fqname in self.modules:

            return self.modules[fqname]

        self.modules[fqname] = m = Module(fqname)

        return m



    def find_module(self, name, path, parent=None):

        if parent is not None:

            # assert path is not None

            fullname = parent.__name__+'.'+name

        else:

            fullname = name

        if fullname in self.excludes:

            self.msgout(3, "find_module -> Excluded", fullname)

            raise ImportError, name



        if path is None:

            if name in sys.builtin_module_names:

                return (None, None, ("", "", imp.C_BUILTIN))



            path = self.path

        return imp.find_module(name, path)



    def report(self):

        """Print a report to stdout, listing the found modules with their

        paths, as well as modules that are missing, or seem to be missing.

        """

        print

        print "  %-25s %s" % ("Name", "File")

        print "  %-25s %s" % ("----", "----")

        # Print modules found

        keys = self.modules.keys()

        keys.sort()

        for key in keys:

            m = self.modules[key]

            if m.__path__:

                print "P",

            else:

                print "m",

            print "%-25s" % key, m.__file__ or ""



        # Print missing modules

        missing, maybe = self.any_missing_maybe()

        if missing:

            print

            print "Missing modules:"

            for name in missing:

                mods = self.badmodules[name].keys()

                mods.sort()

                print "?", name, "imported from", ', '.join(mods)

        # Print modules that may be missing, but then again, maybe not...

        if maybe:

            print

            print "Submodules thay appear to be missing, but could also be",

            print "global names in the parent package:"

            for name in maybe:

                mods = self.badmodules[name].keys()

                mods.sort()

                print "?", name, "imported from", ', '.join(mods)



    def any_missing(self):

        """Return a list of modules that appear to be missing. Use

        any_missing_maybe() if you want to know which modules are

        certain to be missing, and which *may* be missing.

        """

        missing, maybe = self.any_missing_maybe()

        return missing + maybe



    def any_missing_maybe(self):

        """Return two lists, one with modules that are certainly missing

        and one with modules that *may* be missing. The latter names could

        either be submodules *or* just global names in the package.



        The reason it can't always be determined is that it's impossible to

        tell which names are imported when "from module import *" is done

        with an extension module, short of actually importing it.

        """

        missing = []

        maybe = []

        for name in self.badmodules:

            if name in self.excludes:

                continue

            i = name.rfind(".")

            if i < 0:

                missing.append(name)

                continue

            subname = name[i+1:]

            pkgname = name[:i]

            pkg = self.modules.get(pkgname)

            if pkg is not None:

                if pkgname in self.badmodules[name]:

                    # The package tried to import this module itself and

                    # failed. It's definitely missing.

                    missing.append(name)

                elif subname in pkg.globalnames:

                    # It's a global in the package: definitely not missing.

                    pass

                elif pkg.starimports:

                    # It could be missing, but the package did an "import *"

                    # from a non-Python module, so we simply can't be sure.

                    maybe.append(name)

                else:

                    # It's not a global in the package, the package didn't

                    # do funny star imports, it's very likely to be missing.

                    # The symbol could be inserted into the package from the

                    # outside, but since that's not good style we simply list

                    # it missing.

                    missing.append(name)

            else:

                missing.append(name)

        missing.sort()

        maybe.sort()

        return missing, maybe



    def replace_paths_in_code(self, co):

        new_filename = original_filename = os.path.normpath(co.co_filename)

        for f, r in self.replace_paths:

            if original_filename.startswith(f):

                new_filename = r + original_filename[len(f):]

                break



        if self.debug and original_filename not in self.processed_paths:

            if new_filename != original_filename:

                self.msgout(2, "co_filename %r changed to %r" \

                                    % (original_filename,new_filename,))

            else:

                self.msgout(2, "co_filename %r remains unchanged" \

                                    % (original_filename,))

            self.processed_paths.append(original_filename)



        consts = list(co.co_consts)

        for i in range(len(consts)):

            if isinstance(consts[i], type(co)):

                consts[i] = self.replace_paths_in_code(consts[i])



        return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,

                         co.co_flags, co.co_code, tuple(consts), co.co_names,

                         co.co_varnames, new_filename, co.co_name,

                         co.co_firstlineno, co.co_lnotab,

                         co.co_freevars, co.co_cellvars)





def test():

    # Parse command line

    import getopt

    try:

        opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")

    except getopt.error, msg:

        print msg

        return



    # Process options

    debug = 1

    domods = 0

    addpath = []

    exclude = []

    for o, a in opts:

        if o == '-d':

            debug = debug + 1

        if o == '-m':

            domods = 1

        if o == '-p':

            addpath = addpath + a.split(os.pathsep)

        if o == '-q':

            debug = 0

        if o == '-x':

            exclude.append(a)



    # Provide default arguments

    if not args:

        script = "hello.py"

    else:

        script = args[0]



    # Set the path based on sys.path and the script directory

    path = sys.path[:]

    path[0] = os.path.dirname(script)

    path = addpath + path

    if debug > 1:

        print "path:"

        for item in path:

            print "   ", repr(item)



    # Create the module finder and turn its crank

    mf = ModuleFinder(path, debug, exclude)

    for arg in args[1:]:

        if arg == '-m':

            domods = 1

            continue

        if domods:

            if arg[-2:] == '.*':

                mf.import_hook(arg[:-2], None, ["*"])

            else:

                mf.import_hook(arg)

        else:

            mf.load_file(arg)

    mf.run_script(script)

    mf.report()

    return mf  # for -i debugging





if __name__ == '__main__':

    try:

        mf = test()

    except KeyboardInterrupt:

        print "\n[interrupt]"

