performance for generator: Split big generated modules (like _Gtk, PyQt) into smaller...
authorEkaterina Tuzova <Ekaterina.Tuzova@jetbrains.com>
Thu, 16 Jan 2014 17:32:02 +0000 (21:32 +0400)
committerEkaterina Tuzova <Ekaterina.Tuzova@jetbrains.com>
Thu, 16 Jan 2014 17:32:02 +0000 (21:32 +0400)
python/helpers/generator3.py
python/helpers/pycharm_generator_utils/constants.py
python/helpers/pycharm_generator_utils/module_redeclarator.py
python/helpers/pycharm_generator_utils/util_methods.py

index fe24620d374d05d7c5c9702cf1d0ec1ca098e007..ca61efe13be4fb8cce84c8b7b02554e4d86a67ba 100644 (file)
@@ -1,41 +1,13 @@
 # encoding: utf-8
-from pycharm_generator_utils.module_redeclarator import *
-from pycharm_generator_utils.util_methods import *
-from pycharm_generator_utils.constants import *
-import os
 import atexit
 import zipfile
 
-debug_mode = False
-
-
-def build_output_name(dirname, qualified_name):
-    qualifiers = qualified_name.split(".")
-    if dirname and not dirname.endswith("/") and not dirname.endswith("\\"):
-        dirname += os.path.sep # "a -> a/"
-    for pathindex in range(len(qualifiers) - 1): # create dirs for all qualifiers but last
-        subdirname = dirname + os.path.sep.join(qualifiers[0: pathindex + 1])
-        if not os.path.isdir(subdirname):
-            action("creating subdir %r", subdirname)
-            os.makedirs(subdirname)
-        init_py = os.path.join(subdirname, "__init__.py")
-        if os.path.isfile(subdirname + ".py"):
-            os.rename(subdirname + ".py", init_py)
-        elif not os.path.isfile(init_py):
-            init = fopen(init_py, "w")
-            init.close()
-    target_name = dirname + os.path.sep.join(qualifiers)
-    if os.path.isdir(target_name):
-        fname = os.path.join(target_name, "__init__.py")
-    else:
-        fname = target_name + ".py"
-
-    dirname = os.path.dirname(fname)
+from pycharm_generator_utils.module_redeclarator import *
+from pycharm_generator_utils.util_methods import *
+from pycharm_generator_utils.constants import *
 
-    if not os.path.isdir(dirname):
-        os.makedirs(dirname)
 
-    return fname
+debug_mode = False
 
 
 def redo_module(module_name, outfile, module_file_name, doing_builtins):
@@ -263,80 +235,67 @@ def process_one(name, mod_file_name, doing_builtins, subdir):
         say(name)
         sys.stdout.flush()
     action("doing nothing")
-    outfile = None
+
     try:
-        try:
-            fname = build_output_name(subdir, name)
-            action("opening %r", fname)
-            outfile = fopen(fname, "w")
-            old_modules = list(sys.modules.keys())
-            imported_module_names = []
-
-            class MyFinder:
-                #noinspection PyMethodMayBeStatic
-                def find_module(self, fullname, path=None):
-                    if fullname != name:
-                        imported_module_names.append(fullname)
-                    return None
-
-            my_finder = None
-            if hasattr(sys, 'meta_path'):
-                my_finder = MyFinder()
-                sys.meta_path.append(my_finder)
-            else:
-                imported_module_names = None
-
-            action("importing")
-            __import__(name) # sys.modules will fill up with what we want
-
-            if my_finder:
-                sys.meta_path.remove(my_finder)
-            if imported_module_names is None:
-                imported_module_names = [m for m in sys.modules.keys() if m not in old_modules]
-
-            redo_module(name, outfile, mod_file_name, doing_builtins)
-            # The C library may have called Py_InitModule() multiple times to define several modules (gtk._gtk and gtk.gdk);
-            # restore all of them
-            path = name.split(".")
-            redo_imports = not ".".join(path[:-1]) in MODULES_INSPECT_DIR
-            if imported_module_names and redo_imports:
-                for m in sys.modules.keys():
-                    action("looking at possible submodule %r", m)
-                    # if module has __file__ defined, it has Python source code and doesn't need a skeleton
-                    if m not in old_modules and m not in imported_module_names and m != name and not hasattr(
-                            sys.modules[m], '__file__'):
-                        if not quiet:
-                            say(m)
-                            sys.stdout.flush()
-                        fname = build_output_name(subdir, m)
-                        action("opening %r", fname)
-                        subfile = fopen(fname, "w")
-                        try:
-                            redo_module(m, subfile, mod_file_name, doing_builtins)
-                        finally:
-                            action("closing %r", fname)
-                            subfile.close()
-        except:
-            exctype, value = sys.exc_info()[:2]
-            msg = "Failed to process %r while %s: %s"
-            args = name, CURRENT_ACTION, str(value)
-            report(msg, *args)
-            if outfile is not None and not outfile.closed:
-                outfile.write("# encoding: %s\n" % OUT_ENCODING)
-                outfile.write("# module %s\n" % name)
-                outfile.write("# from %s\n" % mod_file_name)
-                outfile.write("# by generator %s\n" % VERSION)
-                outfile.write("\n\n")
-                outfile.write("# Skeleton generation error:\n#\n#     " + (msg % args) + "\n")
-            if debug_mode:
-                if sys.platform == 'cli':
-                    import traceback
-                    traceback.print_exc(file=sys.stderr)
-                raise
-            return False
-    finally:
-        if outfile is not None and not outfile.closed:
-            outfile.close()
+        fname = build_output_name(subdir, name)
+        action("opening %r", fname)
+        old_modules = list(sys.modules.keys())
+        imported_module_names = []
+
+        class MyFinder:
+            #noinspection PyMethodMayBeStatic
+            def find_module(self, fullname, path=None):
+                if fullname != name:
+                    imported_module_names.append(fullname)
+                return None
+
+        my_finder = None
+        if hasattr(sys, 'meta_path'):
+            my_finder = MyFinder()
+            sys.meta_path.append(my_finder)
+        else:
+            imported_module_names = None
+
+        action("importing")
+        __import__(name) # sys.modules will fill up with what we want
+
+        if my_finder:
+            sys.meta_path.remove(my_finder)
+        if imported_module_names is None:
+            imported_module_names = [m for m in sys.modules.keys() if m not in old_modules]
+
+        redo_module(name, fname, mod_file_name, doing_builtins)
+        # The C library may have called Py_InitModule() multiple times to define several modules (gtk._gtk and gtk.gdk);
+        # restore all of them
+        path = name.split(".")
+        redo_imports = not ".".join(path[:-1]) in MODULES_INSPECT_DIR
+        if imported_module_names and redo_imports:
+            for m in sys.modules.keys():
+                if m.startswith("pycharm_generator_utils"): continue
+                action("looking at possible submodule %r", m)
+                # if module has __file__ defined, it has Python source code and doesn't need a skeleton
+                if m not in old_modules and m not in imported_module_names and m != name and not hasattr(
+                        sys.modules[m], '__file__'):
+                    if not quiet:
+                        say(m)
+                        sys.stdout.flush()
+                    fname = build_output_name(subdir, m)
+                    action("opening %r", fname)
+                    try:
+                        redo_module(m, fname, mod_file_name, doing_builtins)
+                    finally:
+                        action("closing %r", fname)
+    except:
+        exctype, value = sys.exc_info()[:2]
+        msg = "Failed to process %r while %s: %s"
+        args = name, CURRENT_ACTION, str(value)
+        report(msg, *args)
+        if debug_mode:
+            if sys.platform == 'cli':
+                import traceback
+                traceback.print_exc(file=sys.stderr)
+            raise
+        return False
     return True
 
 
index 6b77e73049b99fd2891736c0f1c123d06a7d2334..bb0093e626239dbf272797916786a1e4913cd994 100644 (file)
@@ -6,7 +6,7 @@ import string
 import time
 
 
-VERSION = "1.131"
+VERSION = "1.132"
 
 OUT_ENCODING = 'utf-8'
 
index f56fb430e0c85e386316a137c832c5a37271a0dd..0ab6dc14b600fe646018d53bb68f7ce312624d6e 100644 (file)
@@ -1,6 +1,7 @@
+import keyword
+
 from pycharm_generator_utils.util_methods import *
 from pycharm_generator_utils.constants import *
-import keyword, re
 
 
 class emptylistdict(dict):
@@ -49,6 +50,11 @@ class Buf(object):
         return len(self.data) == 0
 
 
+class ClassBuf(Buf):
+    def __init__(self, name, indenter):
+        super(ClassBuf, self).__init__(indenter)
+        self.name = name
+
 #noinspection PyUnresolvedReferences,PyBroadException
 class ModuleRedeclarator(object):
     def __init__(self, module, outfile, mod_filename, indent_size=4, doing_builtins=False):
@@ -67,6 +73,7 @@ class ModuleRedeclarator(object):
         self.imports_buf = Buf(self)
         self.functions_buf = Buf(self)
         self.classes_buf = Buf(self)
+        self.classes_buffs = list()
         self.footer_buf = Buf(self)
         self.indent_size = indent_size
         self._indent_step = " " * self.indent_size
@@ -106,8 +113,39 @@ class ModuleRedeclarator(object):
         return self._indent_step * level
 
     def flush(self):
-        for buf in (self.header_buf, self.imports_buf, self.functions_buf, self.classes_buf, self.footer_buf):
-            buf.flush(self.outfile)
+        init = None
+        try:
+            if self.mod_filename and len(self.classes_buffs) >= 30:
+                mod_path = self.outfile.strip(".py")
+
+                fname = build_output_name(mod_path, "__init__")
+                init = fopen(fname, "w")
+                for buf in (self.header_buf, self.imports_buf, self.functions_buf, self.classes_buf):
+                    buf.flush(init)
+
+                data = ""
+                for buf in self.classes_buffs:
+                    fname = build_output_name(mod_path, buf.name)
+                    dummy = fopen(fname, "w")
+                    buf.flush(dummy)
+                    data += "from " + buf.name + " import " + buf.name + "\n"
+                    dummy.close()
+
+                init.write(data)
+                self.footer_buf.flush(init)
+            else:
+                init = fopen(self.outfile, "w")
+                for buf in (self.header_buf, self.imports_buf, self.functions_buf, self.classes_buf):
+                    buf.flush(init)
+
+                for buf in self.classes_buffs:
+                    buf.flush(init)
+
+                self.footer_buf.flush(init)
+
+        finally:
+            if init is not None and not init.closed:
+                init.close()
 
     # Some builtin classes effectively change __init__ signature without overriding it.
     # This callable serves as a placeholder to be replaced via REDEFINED_BUILTIN_SIGS
@@ -150,7 +188,7 @@ class ModuleRedeclarator(object):
         for initializer_type, r in self._initializers:
             if initializer_type == a_type:
                 return r
-            # NOTE: here we could handle things like defaultdict, sets, etc if we wanted
+                # NOTE: here we could handle things like defaultdict, sets, etc if we wanted
         return "None"
 
 
@@ -224,7 +262,7 @@ class ModuleRedeclarator(object):
                                 seen_values.append(value)
                             if isinstance(k, SIMPLEST_TYPES):
                                 self.fmt_value(out, value, indent + 1, prefix=repr(k) + ": ", postfix=",",
-                                              seen_values=seen_values)
+                                               seen_values=seen_values)
                             else:
                                 # both key and value need fancy formatting
                                 self.fmt_value(out, k, indent + 1, postfix=": ", seen_values=seen_values)
@@ -244,7 +282,7 @@ class ModuleRedeclarator(object):
                     if self._defined.get(found_name, False):
                         out(indent, prefix, found_name, postfix)
                     else:
-                    # a forward / circular declaration happens
+                        # a forward / circular declaration happens
                         notice = ""
                         real_value = cleanup(repr(p_value))
                         if found_name:
@@ -409,7 +447,7 @@ class ModuleRedeclarator(object):
 
     def is_predefined_builtin(self, module_name, class_name, func_name):
         return self.doing_builtins and module_name == BUILTIN_MOD_NAME and (
-        class_name, func_name) in PREDEFINED_BUILTIN_SIGS
+            class_name, func_name) in PREDEFINED_BUILTIN_SIGS
 
 
     def redo_function(self, out, p_func, p_name, indent, p_class=None, p_modname=None, classname=None, seen=None):
@@ -434,7 +472,7 @@ class ModuleRedeclarator(object):
                 return
             else:
                 seen[id(p_func)] = p_name
-            # real work
+                # real work
         if classname is None:
             classname = p_class and p_class.__name__ or None
         if p_class and hasattr(p_class, '__mro__'):
@@ -492,7 +530,7 @@ class ModuleRedeclarator(object):
             out(indent, "def ", p_name, sig, ": # known case of ", ofwhat)
             out_doc_attr(out, p_func, indent + 1, p_class)
         else:
-        # __doc__ is our best source of arglist
+            # __doc__ is our best source of arglist
             sig_note = "real signature unknown"
             spec = ""
             is_init = (p_name == "__init__" and p_class is not None)
@@ -508,17 +546,17 @@ class ModuleRedeclarator(object):
             action("parsing doc of func %r of class %r", p_name, p_class)
             if isinstance(funcdoc, STR_TYPES):
                 (spec, ret_literal, more_notes) = self.parse_func_doc(funcdoc, p_name, p_name, classname, deco,
-                                                                    sip_generated)
+                                                                      sip_generated)
                 if spec is None and p_name == '__init__' and classname:
                     (spec, ret_literal, more_notes) = self.parse_func_doc(funcdoc, classname, p_name, classname, deco,
-                                                                        sip_generated)
+                                                                          sip_generated)
                 sig_restored = spec is not None
                 if more_notes:
                     if sig_note:
                         sig_note += "; "
                     sig_note += more_notes
             if not sig_restored:
-            # use an allow-all declaration
+                # use an allow-all declaration
                 decl = []
                 if p_class:
                     first_param = propose_first_param(deco)
@@ -531,7 +569,7 @@ class ModuleRedeclarator(object):
             # to reduce size of stubs, don't output same docstring twice for class and its __init__ method
             if not is_init or funcdoc != p_class.__doc__:
                 out_docstring(out, funcdoc, indent + 1)
-            # body
+                # body
         if ret_literal and not is_init:
             out(indent + 1, "return ", ret_literal)
         else:
@@ -646,7 +684,7 @@ class ModuleRedeclarator(object):
                 self.redo_function(out, item, item_name, indent + 1, p_class, p_modname, classname=p_name, seen=seen_funcs)
             except:
                 handle_error_func(item_name, out)
-            #
+                #
         known_props = KNOWN_PROPS.get(p_modname, {})
         a_setter = "lambda self, v: None"
         a_deleter = "lambda self: None"
@@ -782,7 +820,7 @@ class ModuleRedeclarator(object):
         for item_name in module_dict:
             note("looking at %s", item_name)
             if item_name in (
-            "__dict__", "__doc__", "__module__", "__file__", "__name__", "__builtins__", "__package__"):
+                "__dict__", "__doc__", "__module__", "__file__", "__name__", "__builtins__", "__package__"):
                 continue # handled otherwise
             try:
                 item = getattr(self.module, item_name) # let getters do the magic
@@ -806,7 +844,7 @@ class ModuleRedeclarator(object):
                     mod_name = getattr(item, '__module__', None)
                 except:
                     pass
-                # we assume that module foo.bar never imports foo; foo may import foo.bar. (see pygame and pygame.rect)
+                    # we assume that module foo.bar never imports foo; foo may import foo.bar. (see pygame and pygame.rect)
             maybe_import_mod_name = mod_name or ""
             import_is_from_top = len(p_name) > len(maybe_import_mod_name) and p_name.startswith(maybe_import_mod_name)
             note("mod_name = %s, prospective = %s,  from top = %s", mod_name, maybe_import_mod_name, import_is_from_top)
@@ -906,9 +944,8 @@ class ModuleRedeclarator(object):
             self.functions_buf.out(0, "# no functions")
             #
         if classes:
-            out = self.functions_buf.out
-            out(0, "# classes")
-            out(0, "")
+            self.classes_buf.out(0, "# classes")
+            self.classes_buf.out(0, "")
             seen_classes = {}
             # sort classes so that inheritance order is preserved
             cls_list = [] # items are (class_name, mro_tuple)
@@ -922,6 +959,9 @@ class ModuleRedeclarator(object):
                         break         # ...and need not go fartehr than first known child
                 cls_list.insert(ins_index, (cls_name, get_mro(cls)))
             for item_name in [cls_item[0] for cls_item in cls_list]:
+                buf = ClassBuf(item_name, self)
+                self.classes_buffs.append(buf)
+                out = buf.out
                 if item_name in omitted_names:
                     out(0, "# definition of ", item_name, " omitted")
                     continue
@@ -974,7 +1014,7 @@ class ModuleRedeclarator(object):
             self.footer_buf.out(0, "# intermittent names")
             for value in values_to_add:
                 self.footer_buf.out(0, value)
-            # imports: last, because previous parts could alter used_imports or hidden_imports
+                # imports: last, because previous parts could alter used_imports or hidden_imports
         self.output_import_froms()
         if self.imports_buf.isEmpty():
             self.imports_buf.out(0, "# no imports")
@@ -1009,7 +1049,7 @@ class ModuleRedeclarator(object):
                             names_pack.append(n)
                             names_pack.append(", ")
                             right_pos += (len_n + 2)
-                        # last line is...
+                            # last line is...
                     if indent_level == 0: # one line
                         names_pack[0] = names_pack[0][:-1] # cut off lpar
                         names_pack[-1] = "" # cut last comma
index 8d6d3568914e3227c074dd2830058c30284fc73f..2ddbc94f4a63aacdc781949b1e498b3c9a860974 100644 (file)
@@ -6,7 +6,7 @@ except ImportError:
     inspect = None
 
 def create_named_tuple():   #TODO: user-skeleton
-        return """
+    return """
 class __namedtuple(tuple):
     '''A mock base class for named tuples.'''
 
@@ -38,12 +38,12 @@ class __namedtuple(tuple):
 """
 
 def create_generator():
-        # Fake <type 'generator'>
-        if version[0] < 3:
-            next_name = "next"
-        else:
-            next_name = "__next__"
-        txt = """
+    # Fake <type 'generator'>
+    if version[0] < 3:
+        next_name = "next"
+    else:
+        next_name = "__next__"
+    txt = """
 class __generator(object):
     '''A mock class representing the generator function type.'''
     def __init__(self):
@@ -59,8 +59,8 @@ class __generator(object):
         '''Return the next item from the container.'''
         pass
 """ % (next_name,)
-        if version[0] >= 3 or (version[0] == 2 and version[1] >= 5):
-            txt += """
+    if version[0] >= 3 or (version[0] == 2 and version[1] >= 5):
+        txt += """
     def close(self):
         '''Raises new GeneratorExit exception inside the generator to terminate the iteration.'''
         pass
@@ -73,10 +73,10 @@ class __generator(object):
         '''Used to raise an exception inside the generator.'''
         pass
 """
-        return txt
+    return txt
 
 def _searchbases(cls, accum):
-# logic copied from inspect.py
+    # logic copied from inspect.py
     if cls not in accum:
         accum.append(cls)
         for x in cls.__bases__:
@@ -84,7 +84,7 @@ def _searchbases(cls, accum):
 
 
 def get_mro(a_class):
-# logic copied from inspect.py
+    # logic copied from inspect.py
     """Returns a tuple of MRO classes."""
     if hasattr(a_class, "__mro__"):
         return a_class.__mro__
@@ -236,7 +236,7 @@ def transform_seq(results, toplevel=True):
                 if toplevel and not has_item_starting_with(ret, "*"):
                     ret.append("*more")
                 else:
-                # we're in a "foo, (bar1, bar2, ...)"; make it "foo, bar_tuple"
+                    # we're in a "foo, (bar1, bar2, ...)"; make it "foo, bar_tuple"
                     return extract_alpha_prefix(results[0][1]) + "_tuple"
             else: # just name
                 ret.append(sanitize_ident(token_name, is_clr))
@@ -271,7 +271,7 @@ def transform_optional_seq(results):
             if len(token) == 3: # name with value; little sense, but can happen in a deeply nested optional
                 ret.append(sanitize_ident(token_name, is_clr) + "=" + sanitize_value(token[2]))
             elif token_name == '...':
-            # we're in a "foo, [bar, ...]"; make it "foo, *bar"
+                # we're in a "foo, [bar, ...]"; make it "foo, *bar"
                 return ["*" + extract_alpha_prefix(
                     results[1][1])] # we must return a seq; [1] is first simple, [1][1] is its name
             else: # just name
@@ -531,7 +531,7 @@ def restore_clr(p_name, p_class):
         if not methods:
             bases = p_class.__bases__
             if len(bases) == 1 and p_name in dir(bases[0]):
-            # skip inherited methods
+                # skip inherited methods
                 return None, None
             return p_name + '(*args)', 'cannot find CLR method'
 
@@ -542,3 +542,31 @@ def restore_clr(p_name, p_class):
     if not methods[0].IsStatic:
         params = ['self'] + params
     return build_signature(p_name, params), None
+
+def build_output_name(dirname, qualified_name):
+    qualifiers = qualified_name.split(".")
+    if dirname and not dirname.endswith("/") and not dirname.endswith("\\"):
+        dirname += os.path.sep # "a -> a/"
+    for pathindex in range(len(qualifiers) - 1): # create dirs for all qualifiers but last
+        subdirname = dirname + os.path.sep.join(qualifiers[0: pathindex + 1])
+        if not os.path.isdir(subdirname):
+            action("creating subdir %r", subdirname)
+            os.makedirs(subdirname)
+        init_py = os.path.join(subdirname, "__init__.py")
+        if os.path.isfile(subdirname + ".py"):
+            os.rename(subdirname + ".py", init_py)
+        elif not os.path.isfile(init_py):
+            init = fopen(init_py, "w")
+            init.close()
+    target_name = dirname + os.path.sep.join(qualifiers)
+    if os.path.isdir(target_name):
+        fname = os.path.join(target_name, "__init__.py")
+    else:
+        fname = target_name + ".py"
+
+    dirname = os.path.dirname(fname)
+
+    if not os.path.isdir(dirname):
+        os.makedirs(dirname)
+
+    return fname