From 2ba61f305cbac06bb935b00bb593ee8ac63ced42 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 19 Jul 2018 03:04:29 +0300 Subject: [PATCH 001/117] fix #3 --- pyan/analyzer.py | 72 ++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 0b8baad..0cf9f0d 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -573,28 +573,57 @@ class CallGraphVisitor(ast.NodeVisitor): def visit_ListComp(self, node): self.logger.debug("ListComp") - with ExecuteInInnerScope(self, "listcomp"): - self.visit(node.elt) - self.analyze_generators(node.generators) + self.analyze_comprehension(node, "listcomp") def visit_SetComp(self, node): self.logger.debug("SetComp") - with ExecuteInInnerScope(self, "setcomp"): - self.visit(node.elt) - self.analyze_generators(node.generators) + self.analyze_comprehension(node, "setcomp") def visit_DictComp(self, node): self.logger.debug("DictComp") - with ExecuteInInnerScope(self, "dictcomp"): - self.visit(node.key) - self.visit(node.value) - self.analyze_generators(node.generators) + self.analyze_comprehension(node, "dictcomp", field1="key", field2="value") def visit_GeneratorExp(self, node): self.logger.debug("GeneratorExp") - with ExecuteInInnerScope(self, "genexpr"): - self.visit(node.elt) - self.analyze_generators(node.generators) + self.analyze_comprehension(node, "genexpr") + + def analyze_comprehension(self, node, label, field1="elt", field2=None): + # The outermost iterator is evaluated in the current scope; + # everything else in the new inner scope. + # + # See function symtable_handle_comprehension() in + # https://github.com/python/cpython/blob/master/Python/symtable.c + # For how it works, see + # https://stackoverflow.com/questions/48753060/what-are-these-extra-symbols-in-a-comprehensions-symtable + # For related discussion, see + # https://bugs.python.org/issue10544 + gens = node.generators # tuple of ast.comprehension + outermost = gens[0] + moregens = gens[1:] if len(gens) > 1 else [] + + outermost_iters = sanitize_exprs(outermost.iter) + outermost_targets = sanitize_exprs(outermost.target) + for expr in outermost_iters: + self.visit(expr) # set self.last_value (to something and hope for the best) + + with ExecuteInInnerScope(self, label): + for expr in outermost_targets: + self.visit(expr) # use self.last_value + self.last_value = None + for expr in outermost.ifs: + self.visit(expr) + + # TODO: there's also an is_async field we might want to use in a future version. + for gen in moregens: + targets = sanitize_exprs(gen.target) + values = sanitize_exprs(gen.iter) + self.analyze_binding(targets, values) + for expr in gen.ifs: + self.visit(expr) + + self.visit(getattr(node, field1)) # e.g. node.elt + if field2: + self.visit(getattr(node, field2)) def visit_Call(self, node): self.logger.debug("Call %s" % (get_ast_node_name(node.func))) @@ -793,23 +822,6 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit(tgt) self.last_value = None - def analyze_generators(self, generators): - """Analyze the generators in a comprehension form. - - Analyzes the binding part, and visits the "if" expressions (if any). - - generators: an iterable of ast.comprehension objects - """ - - for gen in generators: - # TODO: there's also an is_async field we might want to use in a future version. - targets = sanitize_exprs(gen.target) - values = sanitize_exprs(gen.iter) - self.analyze_binding(targets, values) - - for expr in gen.ifs: - self.visit(expr) - def resolve_builtins(self, ast_node): """Resolve those calls to built-in functions whose return values can be determined in a simple manner. From 6d26a3942de46d9023bd674277acefd1dd5d5ada Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 19 Jul 2018 03:26:19 +0300 Subject: [PATCH 002/117] attempt to fix #2 (based on spec; needs Python 3.6+ to test) --- pyan/analyzer.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 0cf9f0d..979d96c 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -535,8 +535,20 @@ class CallGraphVisitor(ast.NodeVisitor): [get_ast_node_name(x) for x in values])) self.analyze_binding(targets, values) - def visit_AnnAssign(self, node): - self.visit_Assign(self, node) # TODO: alias for now; add the annotations to output in a future version? + def visit_AnnAssign(self, node): # PEP 526, Python 3.6+ + target = sanitize_exprs(node.target) + self.last_value = None + if node.value is not None: + value = sanitize_exprs(node.value) + self.logger.debug("AnnAssign %s %s" % (get_ast_node_name(target[0]), + get_ast_node_name(value[0]))) + self.analyze_binding(target, value) + else: # just a type declaration + self.logger.debug("AnnAssign %s " % (get_ast_node_name(target[0]))) + self.last_value = None + self.visit(target[0]) + # TODO: use the type annotation from node.annotation? + # http://greentreesnakes.readthedocs.io/en/latest/nodes.html#AnnAssign def visit_AugAssign(self, node): targets = sanitize_exprs(node.target) From 0af2688dbb8bbf4607d45c36d86990e851ee3ff9 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Mon, 3 Sep 2018 17:15:40 +0300 Subject: [PATCH 003/117] improve debug logging: show input filename and lineno --- pyan/analyzer.py | 51 ++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 979d96c..5a3242e 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -162,7 +162,7 @@ class CallGraphVisitor(ast.NodeVisitor): # https://docs.python.org/3/library/ast.html#abstract-grammar def visit_Module(self, node): - self.logger.debug("Module") + self.logger.debug("Module %s, %s" % (self.module_name, self.filename)) # Modules live in the top-level namespace, ''. module_node = self.get_node('', self.module_name, node, flavor=Flavor.MODULE) @@ -179,7 +179,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.last_value = None def visit_ClassDef(self, node): - self.logger.debug("ClassDef %s" % (node.name)) + self.logger.debug("ClassDef %s, %s:%s" % (node.name, self.filename, node.lineno)) from_node = self.get_node_of_current_namespace() ns = from_node.get_name() @@ -223,7 +223,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.class_stack.pop() def visit_FunctionDef(self, node): - self.logger.debug("FunctionDef %s" % (node.name)) + self.logger.debug("FunctionDef %s, %s:%s" % (node.name, self.filename, node.lineno)) # To begin with: # @@ -314,7 +314,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit_FunctionDef(node) # TODO: alias for now; tag async functions in output in a future version? def visit_Lambda(self, node): - self.logger.debug("Lambda") + self.logger.debug("Lambda, %s:%s" % (self.filename, node.lineno)) with ExecuteInInnerScope(self, "lambda"): for d in node.args.defaults: self.visit(d) @@ -323,7 +323,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit(node.body) # single expr def visit_Import(self, node): - self.logger.debug("Import %s" % [format_alias(x) for x in node.names]) + self.logger.debug("Import %s, %s:%s" % ([format_alias(x) for x in node.names], self.filename, node.lineno)) # TODO: add support for relative imports (path may be like "....something.something") # https://www.python.org/dev/peps/pep-0328/#id10 @@ -366,7 +366,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.logger.info("New edge added for Use from %s to Import %s" % (from_node, to_node)) def visit_ImportFrom(self, node): - self.logger.debug("ImportFrom: from %s import %s" % (node.module, [format_alias(x) for x in node.names])) + self.logger.debug("ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) tgt_name = node.module from_node = self.get_node_of_current_namespace() @@ -406,7 +406,7 @@ class CallGraphVisitor(ast.NodeVisitor): # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Attribute(self, node): objname = get_ast_node_name(node.value) - self.logger.debug("Attribute %s of %s in context %s" % (node.attr, objname, type(node.ctx))) + self.logger.debug("Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno)) # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Attribute() only needs to handle @@ -488,7 +488,7 @@ class CallGraphVisitor(ast.NodeVisitor): # name access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Name(self, node): - self.logger.debug("Name %s in context %s" % (node.id, type(node.ctx))) + self.logger.debug("Name %s in context %s, %s:%s" % (node.id, type(node.ctx), self.filename, node.lineno)) # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Name() only needs to handle @@ -531,8 +531,9 @@ class CallGraphVisitor(ast.NodeVisitor): values = sanitize_exprs(node.value) # values is the same for each set of targets for targets in node.targets: targets = sanitize_exprs(targets) - self.logger.debug("Assign %s %s" % ([get_ast_node_name(x) for x in targets], - [get_ast_node_name(x) for x in values])) + self.logger.debug("Assign %s %s, %s:%s" % ([get_ast_node_name(x) for x in targets], + [get_ast_node_name(x) for x in values], + self.filename, node.lineno)) self.analyze_binding(targets, values) def visit_AnnAssign(self, node): # PEP 526, Python 3.6+ @@ -540,11 +541,13 @@ class CallGraphVisitor(ast.NodeVisitor): self.last_value = None if node.value is not None: value = sanitize_exprs(node.value) - self.logger.debug("AnnAssign %s %s" % (get_ast_node_name(target[0]), - get_ast_node_name(value[0]))) + self.logger.debug("AnnAssign %s %s, %s:%s" % (get_ast_node_name(target[0]), + get_ast_node_name(value[0]), + self.filenaame, node.lineno)) self.analyze_binding(target, value) else: # just a type declaration - self.logger.debug("AnnAssign %s " % (get_ast_node_name(target[0]))) + self.logger.debug("AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), + self.filename, node.lineno)) self.last_value = None self.visit(target[0]) # TODO: use the type annotation from node.annotation? @@ -554,9 +557,10 @@ class CallGraphVisitor(ast.NodeVisitor): targets = sanitize_exprs(node.target) values = sanitize_exprs(node.value) # values is the same for each set of targets - self.logger.debug("AugAssign %s %s %s" % ([get_ast_node_name(x) for x in targets], - type(node.op), - [get_ast_node_name(x) for x in values])) + self.logger.debug("AugAssign %s %s %s, %s:%s" % ([get_ast_node_name(x) for x in targets], + type(node.op), + [get_ast_node_name(x) for x in values], + self.filename, node.lineno)) # TODO: maybe no need to handle tuple unpacking in AugAssign? (but simpler to use the same implementation) self.analyze_binding(targets, values) @@ -569,7 +573,7 @@ class CallGraphVisitor(ast.NodeVisitor): # in use elsewhere.) # def visit_For(self, node): - self.logger.debug("For-loop") + self.logger.debug("For-loop, %s:%s" % (self.filename, node.lineno)) targets = sanitize_exprs(node.target) values = sanitize_exprs(node.iter) @@ -584,19 +588,19 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit_For(node) # TODO: alias for now; tag async for in output in a future version? def visit_ListComp(self, node): - self.logger.debug("ListComp") + self.logger.debug("ListComp, %s:%s" % (self.filename, node.lineno)) self.analyze_comprehension(node, "listcomp") def visit_SetComp(self, node): - self.logger.debug("SetComp") + self.logger.debug("SetComp, %s:%s" % (self.filename, node.lineno)) self.analyze_comprehension(node, "setcomp") def visit_DictComp(self, node): - self.logger.debug("DictComp") + self.logger.debug("DictComp, %s:%s" % (self.filename, node.lineno)) self.analyze_comprehension(node, "dictcomp", field1="key", field2="value") def visit_GeneratorExp(self, node): - self.logger.debug("GeneratorExp") + self.logger.debug("GeneratorExp, %s:%s" % (self.filename, node.lineno)) self.analyze_comprehension(node, "genexpr") def analyze_comprehension(self, node, label, field1="elt", field2=None): @@ -638,7 +642,8 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit(getattr(node, field2)) def visit_Call(self, node): - self.logger.debug("Call %s" % (get_ast_node_name(node.func))) + self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), + self.filename, node.lineno)) # visit args to detect uses for arg in node.args: @@ -685,7 +690,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.logger.info("New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node)) def visit_With(self, node): - self.logger.debug("With (context manager)") + self.logger.debug("With (context manager), %s:%s" % (self.filename, node.lineno)) def add_uses_enter_exit_of(graph_node): # add uses edges to __enter__ and __exit__ methods of given Node From a626485b3956c0bd8558e515445106a3cbf9a4bb Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Mon, 3 Sep 2018 17:16:14 +0300 Subject: [PATCH 004/117] improve comments --- pyan/analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 5a3242e..10f8787 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -278,7 +278,7 @@ class CallGraphVisitor(ast.NodeVisitor): sc.defs[a.arg] = nonsense_node if all_args.vararg is not None: # *args if present sc.defs[all_args.vararg] = nonsense_node - for a in all_args.kwonlyargs: + for a in all_args.kwonlyargs: # any after *args or * sc.defs[a.arg] = nonsense_node if all_args.kwarg is not None: # **kwargs if present sc.defs[all_args.kwarg] = nonsense_node @@ -629,7 +629,7 @@ class CallGraphVisitor(ast.NodeVisitor): for expr in outermost.ifs: self.visit(expr) - # TODO: there's also an is_async field we might want to use in a future version. + # TODO: there's also an is_async field we might want to use in a future version of Pyan. for gen in moregens: targets = sanitize_exprs(gen.target) values = sanitize_exprs(gen.iter) From 8b47a72cb1d46fb51d4f890e688887d27d12f648 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Mon, 3 Sep 2018 17:16:51 +0300 Subject: [PATCH 005/117] fix #4 --- pyan/analyzer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 10f8787..3c3bc30 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -299,8 +299,10 @@ class CallGraphVisitor(ast.NodeVisitor): for d in node.args.defaults: self.visit(d) + # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#arguments for d in node.args.kw_defaults: - self.visit(d) + if d is not None: + self.visit(d) for stmt in node.body: self.visit(stmt) From 6dd64aa631c4492639776831804b799ca35fa2d9 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Mon, 3 Sep 2018 17:17:37 +0300 Subject: [PATCH 006/117] enable visit_Constant (untested, for Python 3.6+) --- pyan/analyzer.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 3c3bc30..27b6ac5 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -390,20 +390,20 @@ class CallGraphVisitor(ast.NodeVisitor): self.set_value(new_name, tgt_id) self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) -# # Edmund Horner's original post has info on what this fixed in Python 2. -# # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ -# # -# # Essentially, this should make '.'.join(...) see str.join. -# # Pyan3 currently handles that in resolve_attribute() and get_attribute(). -# # -# # Python 3.4 does not have ast.Constant, but 3.6 does. Disabling for now. -# # TODO: revisit this part after upgrading Python. -# # -# def visit_Constant(self, node): -# self.logger.debug("Constant %s" % (node.value)) -# t = type(node.value) -# tn = t.__name__ -# self.last_value = self.get_node('', tn, node) + # Edmund Horner's original post has info on what this fixed in Python 2. + # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ + # + # Essentially, this should make '.'.join(...) see str.join. + # Pyan3 currently handles that in resolve_attribute() and get_attribute(). + # + # Python 3.4 does not have ast.Constant, but 3.6 does. + # TODO: actually test this with Python 3.6 or later. + # + def visit_Constant(self, node): + self.logger.debug("Constant %s, %s:%s" % (node.value, self.filename, node.lineno)) + t = type(node.value) + tn = t.__name__ + self.last_value = self.get_node('', tn, node) # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Attribute(self, node): From 68a0adc8124ee970b0f6f162ea7c34365bf9ce3a Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 5 Sep 2018 10:25:30 +0300 Subject: [PATCH 007/117] enh: capture bindings to objects used as default values of arguments in FunctionDef and Lambda --- pyan/analyzer.py | 89 +++++++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 27b6ac5..09d42f0 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -261,27 +261,7 @@ class CallGraphVisitor(ast.NodeVisitor): # Capture which names correspond to function args. # - # In the function scope, set them to a nonsense Node, - # to prevent leakage of identifiers of matching name - # from the enclosing scope (due to the local value being None - # until we set it to this nonsense Node). - # - # As the name of the nonsense node, we can use any string that - # is not a valid Python identifier. - # - # It has no sensible flavor, so we leave its flavor unspecified. - # - sc = self.scopes[inner_ns] - nonsense_node = self.get_node(inner_ns, '^^^argument^^^', None) - all_args = node.args # args, vararg (*args), kwonlyargs, kwarg (**kwargs) - for a in all_args.args: # positional - sc.defs[a.arg] = nonsense_node - if all_args.vararg is not None: # *args if present - sc.defs[all_args.vararg] = nonsense_node - for a in all_args.kwonlyargs: # any after *args or * - sc.defs[a.arg] = nonsense_node - if all_args.kwarg is not None: # **kwargs if present - sc.defs[all_args.kwarg] = nonsense_node + self.generate_args_nodes(node.args, inner_ns) # self_name is just an ordinary name in the method namespace, except # that its value is implicitly set by Python when the method is called. @@ -297,12 +277,11 @@ class CallGraphVisitor(ast.NodeVisitor): self.scopes[inner_ns].defs[self_name] = class_node self.logger.info('Method def: setting self name "%s" to %s' % (self_name, class_node)) - for d in node.args.defaults: - self.visit(d) - # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#arguments - for d in node.args.kw_defaults: - if d is not None: - self.visit(d) + # record bindings of args to the given default values, if present + self.analyze_arguments(node.args) + + # Analyze the function body + # for stmt in node.body: self.visit(stmt) @@ -316,14 +295,62 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit_FunctionDef(node) # TODO: alias for now; tag async functions in output in a future version? def visit_Lambda(self, node): + # TODO: avoid lumping together all lambdas in the same namespace. self.logger.debug("Lambda, %s:%s" % (self.filename, node.lineno)) with ExecuteInInnerScope(self, "lambda"): - for d in node.args.defaults: - self.visit(d) - for d in node.args.kw_defaults: - self.visit(d) + inner_ns = self.get_node_of_current_namespace().get_name() + self.generate_args_nodes(node.args, inner_ns) + self.analyze_arguments(node.args) self.visit(node.body) # single expr + def generate_args_nodes(self, ast_args, inner_ns): + """Capture which names correspond to function args. + + In the function scope, set them to a nonsense Node, + to prevent leakage of identifiers of matching name + from the enclosing scope (due to the local value being None + until we set it to this nonsense Node). + + ast_args: node.args from a FunctionDef or Lambda + inner_ns: namespace of the function or lambda, for scope lookup + """ + sc = self.scopes[inner_ns] + # As the name of the nonsense node, we can use any string that + # is not a valid Python identifier. + # + # It has no sensible flavor, so we leave its flavor unspecified. + nonsense_node = self.get_node(inner_ns, '^^^argument^^^', None) + # args, vararg (*args), kwonlyargs, kwarg (**kwargs) + for a in ast_args.args: # positional + sc.defs[a.arg] = nonsense_node + if ast_args.vararg is not None: # *args if present + sc.defs[ast_args.vararg] = nonsense_node + for a in ast_args.kwonlyargs: # any after *args or * + sc.defs[a.arg] = nonsense_node + if ast_args.kwarg is not None: # **kwargs if present + sc.defs[ast_args.kwarg] = nonsense_node + + def analyze_arguments(self, ast_args): + """Analyze an arguments node of the AST. + + Record bindings of args to the given default values, if present. + + Used for analyzing FunctionDefs and Lambdas.""" + # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#arguments + if ast_args.defaults: + n = len(ast_args.defaults) + for tgt, val in zip(ast_args.args[-n:], ast_args.defaults): + targets = sanitize_exprs(tgt) + values = sanitize_exprs(val) + self.analyze_binding(targets, values) + if ast_args.kw_defaults: + n = len(ast_args.kw_defaults) + for tgt, val in zip(ast_args.kwonlyargs, ast_args.kw_defaults): + if val is not None: + targets = sanitize_exprs(tgt) + values = sanitize_exprs(val) + self.analyze_binding(targets, values) + def visit_Import(self, node): self.logger.debug("Import %s, %s:%s" % ([format_alias(x) for x in node.names], self.filename, node.lineno)) From 2446676829f465ee4de1921242a9590e12d4bda5 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 5 Sep 2018 11:05:40 +0300 Subject: [PATCH 008/117] partly fix #5 --- pyan/analyzer.py | 72 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 09d42f0..1d3d5c1 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -397,25 +397,65 @@ class CallGraphVisitor(ast.NodeVisitor): def visit_ImportFrom(self, node): self.logger.debug("ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) - tgt_name = node.module + # HACK: support "from . import foo"...ish. This is very difficult + # to get right, so right now we don't even try to do it properly. + # + # We only special-case "from . import foo" so that it doesn't crash Pyan, + # and may even occasionally find the right module. + # + # Pyan would need to know the package structure, and how the program + # being analyzed is actually going to be invoked (!), to be able to + # resolve relative imports correctly. The current "here's a set of files, + # analyze them" approach doesn't cut it. + # + # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time from_node = self.get_node_of_current_namespace() - to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace - self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) + if node.module: # import some names from a module + # TODO: This works only for absolute imports. + # + # Relative imports such as "from .mod import foo" and + # "from ..mod import foo" is treated incorrectly, since Pyan has + # no concept of Python packages (and doesn't know what to do + # with node.level). + # + # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#ImportFrom + tgt_name = node.module - if tgt_name in self.module_names: - mod_name = self.module_names[tgt_name] - else: - mod_name = tgt_name + to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace + self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) + if self.add_uses_edge(from_node, to_node): + self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) + + if tgt_name in self.module_names: + mod_name = self.module_names[tgt_name] + else: + mod_name = tgt_name + + for import_item in node.names: + name = import_item.name + new_name = import_item.asname if import_item.asname is not None else name + # we imported the identifier name from the module mod_name + tgt_id = self.get_node(mod_name, name, node, flavor=Flavor.IMPORTEDITEM) + self.set_value(new_name, tgt_id) + self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) + + else: # module name missing = "from . import ..." + for import_item in node.names: # in this case the names are modules + # asname doesn't matter, we want to capture the use of the module + # with its original name. + tgt_name = import_item.name + + if tgt_name in self.module_names: + mod_name = self.module_names[tgt_name] + else: + mod_name = tgt_name + + to_node = self.get_node('', mod_name, node, flavor=Flavor.MODULE) # module, in top-level namespace + + self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) + if self.add_uses_edge(from_node, to_node): + self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) - for import_item in node.names: - name = import_item.name - new_name = import_item.asname if import_item.asname is not None else name - # we imported the identifier name from the module mod_name - tgt_id = self.get_node(mod_name, name, node, flavor=Flavor.IMPORTEDITEM) - self.set_value(new_name, tgt_id) - self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) # Edmund Horner's original post has info on what this fixed in Python 2. # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ From 3034deed9d6eed8d7f38a62172cfb80b5de3047e Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 5 Sep 2018 11:42:42 +0300 Subject: [PATCH 009/117] refactor: unified handling for 'import foo' and 'from . import foo' --- pyan/analyzer.py | 95 +++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 1d3d5c1..3708314 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -356,47 +356,15 @@ class CallGraphVisitor(ast.NodeVisitor): # TODO: add support for relative imports (path may be like "....something.something") # https://www.python.org/dev/peps/pep-0328/#id10 - # Do we need to? Seems that at least "from .foo import bar" works already? - for import_item in node.names: - src_name = import_item.name # what is being imported - tgt_name = import_item.asname if import_item.asname is not None else src_name # under which name - - # mark the use site - # - # where it is being imported to, i.e. the **user** - from_node = self.get_node_of_current_namespace() - # the thing **being used** (under the asname, if any) - to_node = self.get_node('', tgt_name, node, flavor=Flavor.IMPORTEDITEM) - - is_new_edge = self.add_uses_edge(from_node, to_node) - - # bind asname in the current namespace to the imported module - # - # conversion: possible short name -> fully qualified name - # (when analyzing a set of files in the same directory) - if src_name in self.module_names: - mod_name = self.module_names[src_name] - else: - mod_name = src_name - tgt_module = self.get_node('', mod_name, node, flavor=Flavor.MODULE) - # XXX: if there is no asname, it may happen that mod_name == tgt_name, - # in which case these will be the same Node. They are semantically - # distinct (Python name at receiving end, vs. module), but currently - # Pyan has no way of retaining that information. - if to_node is tgt_module: - to_node.flavor = Flavor.MODULE - self.set_value(tgt_name, tgt_module) - - # must do this after possibly munging flavor to avoid confusing - # the user reading the log - self.logger.debug("Use from %s to Import %s" % (from_node, to_node)) - if is_new_edge: - self.logger.info("New edge added for Use from %s to Import %s" % (from_node, to_node)) + for import_item in node.names: # the names are modules + self.analyze_module_import(import_item, node) def visit_ImportFrom(self, node): self.logger.debug("ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + # TODO: add support for relative imports (path may be like "....something.something") + # HACK: support "from . import foo"...ish. This is very difficult # to get right, so right now we don't even try to do it properly. # @@ -431,7 +399,7 @@ class CallGraphVisitor(ast.NodeVisitor): else: mod_name = tgt_name - for import_item in node.names: + for import_item in node.names: # the names are items inside the module name = import_item.name new_name = import_item.asname if import_item.asname is not None else name # we imported the identifier name from the module mod_name @@ -440,22 +408,51 @@ class CallGraphVisitor(ast.NodeVisitor): self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) else: # module name missing = "from . import ..." - for import_item in node.names: # in this case the names are modules - # asname doesn't matter, we want to capture the use of the module - # with its original name. - tgt_name = import_item.name + for import_item in node.names: # the names are modules + self.analyze_module_import(import_item, node) - if tgt_name in self.module_names: - mod_name = self.module_names[tgt_name] - else: - mod_name = tgt_name + def analyze_module_import(self, import_item, ast_node): + """Analyze a names AST node inside an Import or ImportFrom AST node. - to_node = self.get_node('', mod_name, node, flavor=Flavor.MODULE) # module, in top-level namespace + This handles the case where the objects being imported are modules. - self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) + import_item: an item of ast_node.names + ast_node: for recording source location information + """ + src_name = import_item.name # what is being imported + tgt_name = import_item.asname if import_item.asname is not None else src_name # under which name + # mark the use site + # + # where it is being imported to, i.e. the **user** + from_node = self.get_node_of_current_namespace() + # the thing **being used** (under the asname, if any) + to_node = self.get_node('', tgt_name, ast_node, flavor=Flavor.IMPORTEDITEM) + + is_new_edge = self.add_uses_edge(from_node, to_node) + + # bind asname in the current namespace to the imported module + # + # conversion: possible short name -> fully qualified name + # (when analyzing a set of files in the same directory) + if src_name in self.module_names: + mod_name = self.module_names[src_name] + else: + mod_name = src_name + tgt_module = self.get_node('', mod_name, ast_node, flavor=Flavor.MODULE) + # XXX: if there is no asname, it may happen that mod_name == tgt_name, + # in which case these will be the same Node. They are semantically + # distinct (Python name at receiving end, vs. module), but currently + # Pyan has no way of retaining that information. + if to_node is tgt_module: + to_node.flavor = Flavor.MODULE + self.set_value(tgt_name, tgt_module) + + # must do this after possibly munging flavor to avoid confusing + # the user reading the log + self.logger.debug("Use from %s to Import %s" % (from_node, to_node)) + if is_new_edge: + self.logger.info("New edge added for Use from %s to Import %s" % (from_node, to_node)) # Edmund Horner's original post has info on what this fixed in Python 2. # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ From ba414c8e8b79aa69f476212d9d0ffbb9fe5fbadc Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:29:43 +0300 Subject: [PATCH 010/117] Package Pyan3 using setuptools: add setup.py --- setup.py | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fa00271 --- /dev/null +++ b/setup.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +"""setuptools-based setup.py for pyan3. + +Tested on Python 3.6. + +Usage as usual with setuptools: + python3 setup.py build + python3 setup.py sdist + python3 setup.py bdist_wheel --universal + python3 setup.py install + +For details, see + http://setuptools.readthedocs.io/en/latest/setuptools.html#command-reference +or + python3 setup.py --help + python3 setup.py --help-commands + python3 setup.py --help bdist_wheel # or any command +""" + +import os +import ast +import sys +from setuptools import setup + +######################################################### +# General config +######################################################### + +# Name of the top-level package of the library. +# +# This is also the top level of its source tree, relative to the top-level project directory setup.py resides in. +# +libname = "pyan3" + +# Short description for package list on PyPI +# +SHORTDESC = "Offline call graph generator for Python 3" + +# Long description for package homepage on PyPI +# +DESC = """Generate approximate call graphs for Python programs. + +Pyan takes one or more Python source files, performs a (rather superficial) static analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output for rendering by GraphViz or yEd. +""" + +# Set up data files for packaging. +# +# Directories (relative to the top-level directory where setup.py resides) in which to look for data files. +datadirs = () + +# File extensions to be considered as data files. (Literal, no wildcards.) +dataexts = (".py", ".ipynb", ".sh", ".lyx", ".tex", ".txt", ".pdf") + +# Standard documentation to detect (and package if it exists). +# +standard_docs = ["README", "LICENSE", "TODO", "CHANGELOG", "AUTHORS"] # just the basename without file extension +standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for GitHub projects, but other projects may use .rst or .txt (or even blank). + +######################################################### +# Init +######################################################### + +# Gather user-defined data files +# +# http://stackoverflow.com/questions/13628979/setuptools-how-to-make-package-contain-extra-data-folder-and-all-folders-inside +# +datafiles = [] +#getext = lambda filename: os.path.splitext(filename)[1] +#for datadir in datadirs: +# datafiles.extend( [(root, [os.path.join(root, f) for f in files if getext(f) in dataexts]) +# for root, dirs, files in os.walk(datadir)] ) + +# Add standard documentation (README et al.), if any, to data files +# +detected_docs = [] +for docname in standard_docs: + for ext in standard_doc_exts: + filename = "".join((docname, ext)) # relative to the directory in which setup.py resides + if os.path.isfile(filename): + detected_docs.append(filename) +datafiles.append(('.', detected_docs)) + +# Extract __version__ from the package __init__.py +# (since it's not a good idea to actually run __init__.py during the build process). +# +# http://stackoverflow.com/questions/2058802/how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package +# +init_py_path = os.path.join('pyan', '__init__.py') +version = '0.0.unknown' +try: + with open(init_py_path) as f: + for line in f: + if line.startswith('__version__'): + version = ast.parse(line).body[0].value.s + break + else: + print("WARNING: Version information not found in '%s', using placeholder '%s'" % (init_py_path, version), file=sys.stderr) +except FileNotFoundError: + print("WARNING: Could not find file '%s', using placeholder version information '%s'" % (init_py_path, version), file=sys.stderr) + +######################################################### +# Call setup() +######################################################### + +setup( + name="pyan3", + version=version, + author="Juha Jeronen", + author_email="juha.m.jeronen@gmail.com", + url="https://github.com/Technologicat/pyan", + + description=SHORTDESC, + long_description=DESC, + + license="GPL 2.0", + + # free-form text field; http://stackoverflow.com/questions/34994130/what-platforms-argument-to-setup-in-setup-py-does + platforms=["Linux"], + + # See + # https://pypi.python.org/pypi?%3Aaction=list_classifiers + # + # for the standard classifiers. + # + classifiers=["Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Topic :: Software Development" + ], + + # See + # http://setuptools.readthedocs.io/en/latest/setuptools.html + # + setup_requires=[], + install_requires=[], + provides=["pyan"], + + # keywords for PyPI (in case you upload your project) + # + # e.g. the keywords your project uses as topics on GitHub, minus "python" (if there) + # + keywords=["call-graph", "static-code-analysis"], + + # Declare packages so that python -m setup build will copy .py files (especially __init__.py). + # + # This **does not** automatically recurse into subpackages, so they must also be declared. + # + packages=["pyan"], + + scripts=[], + + zip_safe=True, + + # Custom data files not inside a Python package + data_files=datafiles +) From 85c38b3c5b141723bc8f2b38238cbd433d16a7df Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:39:34 +0300 Subject: [PATCH 011/117] bump version, add readme note about Python 2 and 3 versions of Pyan --- README.md | 2 +- pyan/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6cfc9d7..be89ed7 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Pyan takes one or more Python source files, performs a (rather superficial) stat *And now it is available for Python 3!* -Note: The previous Python 2-compatible version is tagged as `pre-python3` +**Note**: This is the repository for the Python 3 version of Pyan. The previous Python 2-compatible version is tagged as `pre-python3` in [davidfraser's maintenance repository](https://github.com/davidfraser/pyan). [![Example output](graph0.png "Example: GraphViz rendering of Pyan output (click for .svg)")](graph0.svg) diff --git a/pyan/__init__.py b/pyan/__init__.py index d99f4f6..93376dd 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -3,4 +3,4 @@ from .main import main -__version__ = "1.0.2" +__version__ = "1.0.3" From 76e2d92c8e92dcc31d042e882229b68742df6f5f Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:45:54 +0300 Subject: [PATCH 012/117] add release management shell scripts --- makedist.sh | 2 ++ uploaddist.sh | 3 +++ 2 files changed, 5 insertions(+) create mode 100755 makedist.sh create mode 100755 uploaddist.sh diff --git a/makedist.sh b/makedist.sh new file mode 100755 index 0000000..338298d --- /dev/null +++ b/makedist.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python3 setup.py sdist bdist_wheel diff --git a/uploaddist.sh b/uploaddist.sh new file mode 100755 index 0000000..2e2e4ca --- /dev/null +++ b/uploaddist.sh @@ -0,0 +1,3 @@ +#!/bin/bash +VERSION="$1" +twine upload dist/pyan-${VERSION}.tar.gz dist/pyan-${VERSION}-py3-none-any.whl From f6552abb93886b13c31541f953890d232f2096f7 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:51:21 +0300 Subject: [PATCH 013/117] rename script pyan.py -> pyan3 --- pyan.py => pyan3 | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pyan.py => pyan3 (100%) diff --git a/pyan.py b/pyan3 similarity index 100% rename from pyan.py rename to pyan3 From 40936f39cdc94a1c93f85007c002c58fea93749d Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:51:32 +0300 Subject: [PATCH 014/117] update package name --- uploaddist.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uploaddist.sh b/uploaddist.sh index 2e2e4ca..d75f5f7 100755 --- a/uploaddist.sh +++ b/uploaddist.sh @@ -1,3 +1,3 @@ #!/bin/bash VERSION="$1" -twine upload dist/pyan-${VERSION}.tar.gz dist/pyan-${VERSION}-py3-none-any.whl +twine upload dist/pyan3-${VERSION}.tar.gz dist/pyan3-${VERSION}-py3-none-any.whl From 0a8d41520781a55c5d1eb75da5b2650120318d52 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:51:41 +0300 Subject: [PATCH 015/117] add pyan3 to scripts --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fa00271..7368d51 100644 --- a/setup.py +++ b/setup.py @@ -152,7 +152,7 @@ setup( # packages=["pyan"], - scripts=[], + scripts=["pyan3"], zip_safe=True, From 872dcc0194848621de989453d2a770195caca58f Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 25 Jun 2019 12:54:33 +0300 Subject: [PATCH 016/117] update example to use script name "pyan3" --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index be89ed7..3f08a16 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ See `pyan --help`. Example: -`pyan *.py --uses --no-defines --colored --grouped --annotated --dot >myuses.dot` +`pyan3 *.py --uses --no-defines --colored --grouped --annotated --dot >myuses.dot` Then render using your favorite GraphViz filter, mainly `dot` or `fdp`: From 9ec0827da9f41efd27e7857c48dddc5dfcdfaf09 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 26 Jun 2019 00:46:55 +0300 Subject: [PATCH 017/117] bump version --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 93376dd..37154a0 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -3,4 +3,4 @@ from .main import main -__version__ = "1.0.3" +__version__ = "1.0.4" From a1b6dd791fceb0e4b830772e06648f5f537e919a Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 26 Jun 2019 00:47:05 +0300 Subject: [PATCH 018/117] fix #9 --- pyan/anutils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyan/anutils.py b/pyan/anutils.py index 47659f5..9cd1bb4 100644 --- a/pyan/anutils.py +++ b/pyan/anutils.py @@ -29,6 +29,10 @@ def get_module_name(filename): if not os.path.exists(init_path): return mod_name + # blank path means we're looking at __init__.py, in cwd, so its module name is "__init__" + if not filename: + return "__init__" + if not os.path.dirname(filename): return mod_name From beab756390786b6118d4410604dd1474516bfbbf Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 26 Jun 2019 00:49:55 +0300 Subject: [PATCH 019/117] Fix crash in logging code analyzing an AnnAssign node (typoed nonexistent attribute). Fixes #8. Suggested-by: matt-kempster <> --- pyan/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 3708314..e5c6ade 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -609,7 +609,7 @@ class CallGraphVisitor(ast.NodeVisitor): value = sanitize_exprs(node.value) self.logger.debug("AnnAssign %s %s, %s:%s" % (get_ast_node_name(target[0]), get_ast_node_name(value[0]), - self.filenaame, node.lineno)) + self.filename, node.lineno)) self.analyze_binding(target, value) else: # just a type declaration self.logger.debug("AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), From e3024d6e1a55fc7e1f5b184f7888db73e56acbbe Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 26 Jun 2019 00:56:02 +0300 Subject: [PATCH 020/117] Protect against name=None in remove_wild. This at least should fix #7. --- pyan/analyzer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index e5c6ade..32563e1 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -1408,6 +1408,9 @@ class CallGraphVisitor(ast.NodeVisitor): Used for cleaning up forward-references once resolved. This prevents spurious edges due to expand_unknowns().""" + if name is None: # relative imports may create nodes with name=None. + return + if from_node not in self.uses_edges: # no uses edges to remove return From 7ac71822fc6810887d92bec4987de6d4682759e9 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 26 Jun 2019 09:49:23 +0300 Subject: [PATCH 021/117] bump version --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 37154a0..0995ca2 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -3,4 +3,4 @@ from .main import main -__version__ = "1.0.4" +__version__ = "1.0.5" From 53ff4282343127593d7af2d02bdcfc7d106c1f24 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 26 Jun 2019 09:49:30 +0300 Subject: [PATCH 022/117] Treat also "node" as an unsafe word in Graphviz node labels Fixes analysis of Pyan source code itself. --- pyan/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/node.py b/pyan/node.py index cad6982..276f446 100644 --- a/pyan/node.py +++ b/pyan/node.py @@ -6,7 +6,7 @@ from enum import Enum def make_safe_label(label): """Avoid name clashes with GraphViz reserved words such as 'graph'.""" - unsafe_words = ("digraph", "graph", "cluster", "subgraph") + unsafe_words = ("digraph", "graph", "cluster", "subgraph", "node") out = label for word in unsafe_words: out = out.replace(word, "%sX" % word) From c0bd7dcb502220f8c71f414ded74fed208637f5c Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 30 Oct 2019 16:17:35 +0200 Subject: [PATCH 023/117] Add a very simple import analyzer for module dependencies --- modvis.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 modvis.py diff --git a/modvis.py b/modvis.py new file mode 100644 index 0000000..f1c245c --- /dev/null +++ b/modvis.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8; -*- +"""A simple import analyzer. Visualize dependencies between modules.""" + +import ast +import os + +# from pyan.anutils import get_module_name + +def get_module_name(fullpath): # we need to see __init__, hence we don't use anutils. + if not fullpath.endswith(".py"): + raise ValueError("Expected a .py filename, got '{}'".format(fullpath)) + rel = ".{}".format(os.path.sep) # ./ + if fullpath.startswith(rel): + fullpath = fullpath[len(rel):] + fullpath = fullpath[:-3] # remove .py + return fullpath.replace(os.path.sep, '.') + +def get_pyfiles(basedir): + pyfiles = [] + for root, dirs_, files in os.walk(basedir): + for filename in files: + if filename.endswith(".py"): + fullpath = os.path.join(root, filename) + pyfiles.append(fullpath) + return pyfiles + +def resolve(current_module, target_module, level): + if level < 0: + raise ValueError("Relative import level must be >= 0, got {}".format(level)) + if level == 0: # absolute import + return target_module + # level > 0 (let's have some simplistic support for relative imports) + base = current_module + for _ in range(level): + k = base.rfind('.') + if k == -1: + raise ValueError("Relative import level {} too large for module name {}".format(level, current_module)) + base = base[:k] + return '.'.join((base, target_module)) + +class ImportVisitor(ast.NodeVisitor): + def __init__(self, basedir): + self.modules = {} # modname: {used0, used1, ...} + self.analyze(basedir) + + def analyze(self, basedir): + for fullpath in get_pyfiles(basedir): + with open(fullpath, "rt", encoding="utf-8") as f: + content = f.read() + self.current_module = get_module_name(fullpath) + self.visit(ast.parse(content, fullpath)) + + def add_dependency(self, target_module): # source module is always self.current_module + m = self.current_module + if m not in self.modules: + self.modules[m] = set() + self.modules[m].add(target_module) + + def visit_Import(self, node): + # print(self.current_module, "Import", [alias.name for alias in node.names]) + for alias in node.names: + self.add_dependency(alias.name) # alias.asname not relevant for our purposes + + def visit_ImportFrom(self, node): + # print(self.current_module, "ImportFrom", node.module, node.level) + self.add_dependency(resolve(self.current_module, node.module, node.level)) + +def main(): + v = ImportVisitor(".") + ms = v.modules + for m in sorted(ms): + print(m) + for d in sorted(ms[m]): + print(" {}".format(d)) + +if __name__ == '__main__': + main() From 35eff1577c12484285d99a1db8c895b8db49c399 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 30 Oct 2019 16:41:32 +0200 Subject: [PATCH 024/117] Import analyzer: ignore irrelevant directories --- modvis.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modvis.py b/modvis.py index f1c245c..32cdd1c 100644 --- a/modvis.py +++ b/modvis.py @@ -15,9 +15,13 @@ def get_module_name(fullpath): # we need to see __init__, hence we don't use an fullpath = fullpath[:-3] # remove .py return fullpath.replace(os.path.sep, '.') +blacklist = (".git", "build", "dist") def get_pyfiles(basedir): pyfiles = [] - for root, dirs_, files in os.walk(basedir): + for root, dirs, files in os.walk(basedir): + for x in blacklist: # don't visit blacklisted dirs + if x in dirs: + dirs.remove(x) for filename in files: if filename.endswith(".py"): fullpath = os.path.join(root, filename) From d0c6cdf9e20080e6a2592b9707b577aeceac17da Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Wed, 30 Oct 2019 16:41:59 +0200 Subject: [PATCH 025/117] Import analyzer: add code to prepare a graph for Pyan vis machinery --- modvis.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/modvis.py b/modvis.py index 32cdd1c..1ad9216 100644 --- a/modvis.py +++ b/modvis.py @@ -4,6 +4,7 @@ import ast import os +import pyan.node # from pyan.anutils import get_module_name def get_module_name(fullpath): # we need to see __init__, hence we don't use anutils. @@ -69,6 +70,42 @@ class ImportVisitor(ast.NodeVisitor): # print(self.current_module, "ImportFrom", node.module, node.level) self.add_dependency(resolve(self.current_module, node.module, node.level)) + # -------------------------------------------------------------------------------- + + def prepare_graph(self): # same format as in pyan.analyzer + self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) + self.uses_edges = {} + # we have no defines_edges, which doesn't matter as long as we don't enable that option in visgraph. + + # TODO: Right now we care only about modules whose files we read. + # TODO: If we want to include in the graph also targets that are not in the analyzed set, + # TODO: then we could create nodes also for the modules listed in the *values* of self.modules. + for m in self.modules: + n = pyan.node.Node(namespace="", # not used + name=m, + ast_node=None, + filename="", # not used + flavor=pyan.node.Flavor.MODULE) + n.defined = True + self.nodes[m] = n + + def add_uses_edge(from_node, to_node): + if to_node not in self.modules: + return + if from_node not in self.uses_edges: + self.uses_edges[from_node] = set() + self.uses_edges[from_node].add(to_node) + + for m, deps in self.modules.items(): + for d in deps: + add_uses_edge(m, d) + + # sanity check output + for m, deps in self.uses_edges.items(): + assert m in self.nodes + for d in deps: + assert d in self.nodes + def main(): v = ImportVisitor(".") ms = v.modules @@ -76,6 +113,8 @@ def main(): print(m) for d in sorted(ms[m]): print(" {}".format(d)) + # v.prepare_graph() + # print(v.nodes, v.uses_edges) if __name__ == '__main__': main() From 3431c25147ebc8f2dcd370fe7fc664c554c5c350 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 10:47:37 +0200 Subject: [PATCH 026/117] modvis: First version that actually generates a .dot graph --- modvis.py | 80 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/modvis.py b/modvis.py index 1ad9216..9ef3b69 100644 --- a/modvis.py +++ b/modvis.py @@ -3,11 +3,15 @@ import ast import os +import logging import pyan.node +import pyan.visgraph +import pyan.writers # from pyan.anutils import get_module_name -def get_module_name(fullpath): # we need to see __init__, hence we don't use anutils. +def filename_to_module_name(fullpath): # we need to see __init__, hence we don't use anutils.get_module_name. + """'some/path/module.py' -> 'some.path.module'""" if not fullpath.endswith(".py"): raise ValueError("Expected a .py filename, got '{}'".format(fullpath)) rel = ".{}".format(os.path.sep) # ./ @@ -16,9 +20,16 @@ def get_module_name(fullpath): # we need to see __init__, hence we don't use an fullpath = fullpath[:-3] # remove .py return fullpath.replace(os.path.sep, '.') -blacklist = (".git", "build", "dist") -def get_pyfiles(basedir): - pyfiles = [] +def split_module_name(m): + """'fully.qualified.name' -> ('fully.qualified', 'name')""" + k = m.rfind('.') + if k == -1: + return ("", m) + return (m[:k], m[(k + 1):]) + +blacklist = (".git", "build", "dist", "test") +def find_py_files(basedir): + py_files = [] for root, dirs, files in os.walk(basedir): for x in blacklist: # don't visit blacklisted dirs if x in dirs: @@ -26,10 +37,14 @@ def get_pyfiles(basedir): for filename in files: if filename.endswith(".py"): fullpath = os.path.join(root, filename) - pyfiles.append(fullpath) - return pyfiles + py_files.append(fullpath) + return py_files def resolve(current_module, target_module, level): + """Return fully qualified name of the target_module in an import. + + Resolves relative imports (level > 0) using current_module as the starting point. + """ if level < 0: raise ValueError("Relative import level must be >= 0, got {}".format(level)) if level == 0: # absolute import @@ -45,14 +60,17 @@ def resolve(current_module, target_module, level): class ImportVisitor(ast.NodeVisitor): def __init__(self, basedir): - self.modules = {} # modname: {used0, used1, ...} + self.modules = {} # modname: {dep0, dep1, ...} + self.filenames = {} # modname: filename self.analyze(basedir) def analyze(self, basedir): - for fullpath in get_pyfiles(basedir): + for fullpath in find_py_files(basedir): with open(fullpath, "rt", encoding="utf-8") as f: content = f.read() - self.current_module = get_module_name(fullpath) + m = filename_to_module_name(fullpath) + self.current_module = m + self.filenames[m] = fullpath self.visit(ast.parse(content, fullpath)) def add_dependency(self, target_module): # source module is always self.current_module @@ -81,40 +99,62 @@ class ImportVisitor(ast.NodeVisitor): # TODO: If we want to include in the graph also targets that are not in the analyzed set, # TODO: then we could create nodes also for the modules listed in the *values* of self.modules. for m in self.modules: - n = pyan.node.Node(namespace="", # not used - name=m, + ns, mod = split_module_name(m) + fn = self.filenames[m] + # print("{}: ns={}, mod={}, fn={}".format(m, ns, mod, fn)) + n = pyan.node.Node(namespace=ns, + name=mod, ast_node=None, - filename="", # not used + filename=fn, flavor=pyan.node.Flavor.MODULE) n.defined = True - self.nodes[m] = n + # Pyan's analyzer.py allows several nodes to share the same short name, + # which is used as the key to self.nodes; but we use the fully qualified + # name as the key. Nevertheless, visgraph expects a format where the + # values in the visitor's `nodes` attribute are lists. + self.nodes[m] = [n] def add_uses_edge(from_node, to_node): - if to_node not in self.modules: - return if from_node not in self.uses_edges: self.uses_edges[from_node] = set() self.uses_edges[from_node].add(to_node) for m, deps in self.modules.items(): for d in deps: - add_uses_edge(m, d) + n_from = self.nodes.get(m) + n_to = self.nodes.get(d) + if n_from and n_to: + add_uses_edge(n_from[0], n_to[0]) # sanity check output for m, deps in self.uses_edges.items(): - assert m in self.nodes + assert m.get_name() in self.nodes for d in deps: - assert d in self.nodes + assert d.get_name() in self.nodes def main(): v = ImportVisitor(".") + + # plaintext report ms = v.modules for m in sorted(ms): print(m) for d in sorted(ms[m]): print(" {}".format(d)) - # v.prepare_graph() - # print(v.nodes, v.uses_edges) + + # dot report + v.prepare_graph() +# print(v.nodes, v.uses_edges) + logger = logging.getLogger(__name__) + + graph_options = {"colored": True, "nested": False, "grouped_alt": False, "grouped": False, + "annotated": True, "draw_defines": False, "draw_uses": True} + graph = pyan.visgraph.VisualGraph.from_visitor(v, options=graph_options, logger=logger) + writer = pyan.writers.DotWriter(graph, + options=['rankdir=TB'], + output="modvis_output.dot", + logger=logger) + writer.run() if __name__ == '__main__': main() From 6f0ccdca92b7eb861c7e439061965726994cf8f6 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 11:03:17 +0200 Subject: [PATCH 027/117] modvis: Color by package (actually os.path.dirname) --- modvis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modvis.py b/modvis.py index 9ef3b69..fe43c88 100644 --- a/modvis.py +++ b/modvis.py @@ -100,12 +100,12 @@ class ImportVisitor(ast.NodeVisitor): # TODO: then we could create nodes also for the modules listed in the *values* of self.modules. for m in self.modules: ns, mod = split_module_name(m) - fn = self.filenames[m] + package = os.path.dirname(self.filenames[m]) # print("{}: ns={}, mod={}, fn={}".format(m, ns, mod, fn)) n = pyan.node.Node(namespace=ns, name=mod, ast_node=None, - filename=fn, + filename=package, # HACK: visualizing at module level, so color by package. flavor=pyan.node.Flavor.MODULE) n.defined = True # Pyan's analyzer.py allows several nodes to share the same short name, From 3ddecff2f3f7419ae821ac72b4c07671ef23e7f7 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 11:21:11 +0200 Subject: [PATCH 028/117] modvis: Add pyan-like command-line interface (copy all relevant options) --- modvis.py | 154 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 120 insertions(+), 34 deletions(-) diff --git a/modvis.py b/modvis.py index fe43c88..5293552 100644 --- a/modvis.py +++ b/modvis.py @@ -4,6 +4,8 @@ import ast import os import logging +from glob import glob +from optparse import OptionParser # TODO: migrate to argparse import pyan.node import pyan.visgraph @@ -27,18 +29,18 @@ def split_module_name(m): return ("", m) return (m[:k], m[(k + 1):]) -blacklist = (".git", "build", "dist", "test") -def find_py_files(basedir): - py_files = [] - for root, dirs, files in os.walk(basedir): - for x in blacklist: # don't visit blacklisted dirs - if x in dirs: - dirs.remove(x) - for filename in files: - if filename.endswith(".py"): - fullpath = os.path.join(root, filename) - py_files.append(fullpath) - return py_files +# blacklist = (".git", "build", "dist", "test") +# def find_py_files(basedir): +# py_files = [] +# for root, dirs, files in os.walk(basedir): +# for x in blacklist: # don't visit blacklisted dirs +# if x in dirs: +# dirs.remove(x) +# for filename in files: +# if filename.endswith(".py"): +# fullpath = os.path.join(root, filename) +# py_files.append(fullpath) +# return py_files def resolve(current_module, target_module, level): """Return fully qualified name of the target_module in an import. @@ -59,18 +61,18 @@ def resolve(current_module, target_module, level): return '.'.join((base, target_module)) class ImportVisitor(ast.NodeVisitor): - def __init__(self, basedir): + def __init__(self, filenames): self.modules = {} # modname: {dep0, dep1, ...} - self.filenames = {} # modname: filename - self.analyze(basedir) + self.fullpaths = {} # modname: fullpath + self.analyze(filenames) - def analyze(self, basedir): - for fullpath in find_py_files(basedir): + def analyze(self, filenames): + for fullpath in filenames: with open(fullpath, "rt", encoding="utf-8") as f: content = f.read() m = filename_to_module_name(fullpath) self.current_module = m - self.filenames[m] = fullpath + self.fullpaths[m] = fullpath self.visit(ast.parse(content, fullpath)) def add_dependency(self, target_module): # source module is always self.current_module @@ -100,12 +102,18 @@ class ImportVisitor(ast.NodeVisitor): # TODO: then we could create nodes also for the modules listed in the *values* of self.modules. for m in self.modules: ns, mod = split_module_name(m) - package = os.path.dirname(self.filenames[m]) + package = os.path.dirname(self.fullpaths[m]) # print("{}: ns={}, mod={}, fn={}".format(m, ns, mod, fn)) + # HACK: The `filename` attribute of the node determines the visual color. + # HACK: We are visualizing at module level, so color by package. + # TODO: If we are analyzing files from several projects in the same run, + # TODO: it could be useful to decide the hue by the top-level directory name + # TODO: (after the './' if any), and lightness by the depth in each tree. + # TODO: This would be most similar to how Pyan does it for functions/classes. n = pyan.node.Node(namespace=ns, name=mod, ast_node=None, - filename=package, # HACK: visualizing at module level, so color by package. + filename=package, flavor=pyan.node.Flavor.MODULE) n.defined = True # Pyan's analyzer.py allows several nodes to share the same short name, @@ -133,27 +141,105 @@ class ImportVisitor(ast.NodeVisitor): assert d.get_name() in self.nodes def main(): - v = ImportVisitor(".") + usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" + desc = ('Analyse one or more Python source files and generate an' + 'approximate module dependency graph.') + parser = OptionParser(usage=usage, description=desc) + parser.add_option("--dot", + action="store_true", default=False, + help="output in GraphViz dot format") + parser.add_option("--tgf", + action="store_true", default=False, + help="output in Trivial Graph Format") + parser.add_option("--yed", + action="store_true", default=False, + help="output in yEd GraphML Format") + parser.add_option("-f", "--file", dest="filename", + help="write graph to FILE", metavar="FILE", default=None) + parser.add_option("-l", "--log", dest="logname", + help="write log to LOG", metavar="LOG") + parser.add_option("-v", "--verbose", + action="store_true", default=False, dest="verbose", + help="verbose output") + parser.add_option("-V", "--very-verbose", + action="store_true", default=False, dest="very_verbose", + help="even more verbose output (mainly for debug)") + parser.add_option("-c", "--colored", + action="store_true", default=False, dest="colored", + help="color nodes according to namespace [dot only]") + parser.add_option("-g", "--grouped", + action="store_true", default=False, dest="grouped", + help="group nodes (create subgraphs) according to namespace [dot only]") + parser.add_option("-e", "--nested-groups", + action="store_true", default=False, dest="nested_groups", + help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]") + parser.add_option("--dot-rankdir", default="TB", dest="rankdir", + help=( + "specifies the dot graph 'rankdir' property for " + "controlling the direction of the graph. " + "Allowed values: ['TB', 'LR', 'BT', 'RL']. " + "[dot only]")) + parser.add_option("-a", "--annotated", + action="store_true", default=False, dest="annotated", + help="annotate with module location") - # plaintext report - ms = v.modules - for m in sorted(ms): - print(m) - for d in sorted(ms[m]): - print(" {}".format(d)) + options, args = parser.parse_args() + filenames = [fn2 for fn in args for fn2 in glob(fn)] + if len(args) == 0: + parser.error('Need one or more filenames to process') - # dot report + if options.nested_groups: + options.grouped = True + + graph_options = { + 'draw_defines': False, # we have no defines edges + 'draw_uses': True, + 'colored': options.colored, + 'grouped_alt': False, + 'grouped': options.grouped, + 'nested_groups': options.nested_groups, + 'annotated': options.annotated} + + # TODO: use an int argument for verbosity + logger = logging.getLogger(__name__) + if options.very_verbose: + logger.setLevel(logging.DEBUG) + elif options.verbose: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.WARN) + logger.addHandler(logging.StreamHandler()) + if options.logname: + handler = logging.FileHandler(options.logname) + logger.addHandler(handler) + + # run the analysis + v = ImportVisitor(filenames) + + # # we could generate a plaintext report like this + # ms = v.modules + # for m in sorted(ms): + # print(m) + # for d in sorted(ms[m]): + # print(" {}".format(d)) + + # format graph report v.prepare_graph() # print(v.nodes, v.uses_edges) logger = logging.getLogger(__name__) - graph_options = {"colored": True, "nested": False, "grouped_alt": False, "grouped": False, - "annotated": True, "draw_defines": False, "draw_uses": True} graph = pyan.visgraph.VisualGraph.from_visitor(v, options=graph_options, logger=logger) - writer = pyan.writers.DotWriter(graph, - options=['rankdir=TB'], - output="modvis_output.dot", - logger=logger) + if options.dot: + writer = pyan.writers.DotWriter(graph, + options=['rankdir=' + options.rankdir], + output=options.filename, + logger=logger) + if options.tgf: + writer = pyan.writers.TgfWriter( + graph, output=options.filename, logger=logger) + if options.yed: + writer = pyan.writers.YedWriter( + graph, output=options.filename, logger=logger) writer.run() if __name__ == '__main__': From a623afc10abe4f9ba7871d9384bda0c51b289134 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 12:12:41 +0200 Subject: [PATCH 029/117] Fix empty old namespace in info log message --- pyan/visgraph.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyan/visgraph.py b/pyan/visgraph.py index 5b27086..ccc0544 100644 --- a/pyan/visgraph.py +++ b/pyan/visgraph.py @@ -189,9 +189,11 @@ class VisualGraph(object): # next namespace? if grouped and node.namespace != prev_namespace: - logger.info( - 'New namespace %s, old was %s' - % (node.namespace, prev_namespace)) + if not prev_namespace: + logger.info('New namespace %s' % (node.namespace)) + else: + logger.info('New namespace %s, old was %s' % (node.namespace, prev_namespace)) + prev_namespace = node.namespace label = node.get_namespace_label() subgraph = cls(label, node.namespace) From 551655e449f6f4aebfc05b5f500a87632aecb92a Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 12:13:28 +0200 Subject: [PATCH 030/117] Autopep8 visgraph --- pyan/visgraph.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/pyan/visgraph.py b/pyan/visgraph.py index ccc0544..40cfd79 100644 --- a/pyan/visgraph.py +++ b/pyan/visgraph.py @@ -19,7 +19,7 @@ class Colorizer: self.logger = logger or logging.getLogger(__name__) self.colored = colored - self._hues = [j/num_colors for j in range(num_colors)] + self._hues = [j / num_colors for j in range(num_colors)] self._idx_of = {} # top-level namespace: hue index self._idx = 0 @@ -40,32 +40,32 @@ class Colorizer: def get(self, node): # return (group number, hue index) idx = self._node_to_idx(node) - return (idx,self._hues[idx]) + return (idx, self._hues[idx]) def make_colors(self, node): # return (group number, fill color, text color) if self.colored: - idx,H = self.get(node) - L = max( [1.0 - 0.1*node.get_level(), 0.1] ) + idx, H = self.get(node) + L = max([1.0 - 0.1 * node.get_level(), 0.1]) S = 1.0 A = 0.7 # make nodes translucent (to handle possible overlaps) - fill_RGBA = self.htmlize_rgb(*colorsys.hls_to_rgb(H,L,S), A=A) + fill_RGBA = self.htmlize_rgb(*colorsys.hls_to_rgb(H, L, S), A=A) # black text on light nodes, white text on (very) dark nodes. text_RGB = "#000000" if L >= 0.5 else "#ffffff" else: - idx,_ = self.get(node) + idx, _ = self.get(node) fill_RGBA = self.htmlize_rgb(1.0, 1.0, 1.0, 0.7) text_RGB = "#000000" return idx, fill_RGBA, text_RGB @staticmethod - def htmlize_rgb(R,G,B,A=None): + def htmlize_rgb(R, G, B, A=None): if A is not None: - R,G,B,A = [int(255.0*x) for x in (R,G,B,A)] - return "#%02x%02x%02x%02x" % (R,G,B,A) + R, G, B, A = [int(255.0 * x) for x in (R, G, B, A)] + return "#%02x%02x%02x%02x" % (R, G, B, A) else: - R,G,B = [int(255.0*x) for x in (R,G,B)] - return "#%02x%02x%02x" % (R,G,B) + R, G, B = [int(255.0 * x) for x in (R, G, B)] + return "#%02x%02x%02x" % (R, G, B) class VisualNode(object): @@ -89,7 +89,7 @@ class VisualNode(object): self.fill_color, self.text_color, self.group] if s] if optionals: return ('VisualNode(' + repr(self.id) + - ', ' + ', '.join(optionals)+')') + ', ' + ', '.join(optionals) + ')') else: return 'VisualNode(' + repr(self.id) + ')' @@ -104,12 +104,12 @@ class VisualEdge(object): self.source = source self.target = target self.flavor = flavor - self.color = color + self.color = color def __repr__(self): return ( - 'Edge('+self.source.label+' '+self.flavor+' ' + - self.target.label+')') + 'Edge(' + self.source.label + ' ' + self.flavor + ' ' + + self.target.label + ')') class VisualGraph(object): @@ -143,12 +143,12 @@ class VisualGraph(object): if annotated: if grouped: # group label includes namespace already - labeler = lambda n: n.get_annotated_name() + def labeler(n): return n.get_annotated_name() else: # the node label is the only place to put the namespace info - labeler = lambda n: n.get_long_annotated_name() + def labeler(n): return n.get_long_annotated_name() else: - labeler = lambda n: n.get_short_name() + def labeler(n): return n.get_short_name() logger = logger or logging.getLogger(__name__) @@ -165,7 +165,7 @@ class VisualGraph(object): for node in visited_nodes: filenames.add(node.filename) return filenames - colorizer = Colorizer(num_colors=len(find_filenames())+1, + colorizer = Colorizer(num_colors=len(find_filenames()) + 1, colored=colored, logger=logger) nodes_dict = dict() From 5a42367628b0fd1410c3eb3b88f6c863da2eea1d Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 12:37:52 +0200 Subject: [PATCH 031/117] modvis: Add hashbang --- modvis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modvis.py b/modvis.py index 5293552..cfcb941 100644 --- a/modvis.py +++ b/modvis.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # -*- coding: utf-8; -*- """A simple import analyzer. Visualize dependencies between modules.""" From e4778421de54c4e8708bf7985866844af5785b2e Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 12:38:45 +0200 Subject: [PATCH 032/117] modvis: fix "from ... import d" when in module a.b.c --- modvis.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modvis.py b/modvis.py index cfcb941..f862c9e 100644 --- a/modvis.py +++ b/modvis.py @@ -53,11 +53,14 @@ def resolve(current_module, target_module, level): if level == 0: # absolute import return target_module # level > 0 (let's have some simplistic support for relative imports) + if level > current_module.count(".") + 1: # foo.bar.baz -> max level 3, pointing to top level + raise ValueError("Relative import level {} too large for module name {}".format(level, current_module)) base = current_module for _ in range(level): - k = base.rfind('.') + k = base.rfind(".") if k == -1: - raise ValueError("Relative import level {} too large for module name {}".format(level, current_module)) + base = "" + break base = base[:k] return '.'.join((base, target_module)) From 71f6f1507cfdff0efa08e8ed8097cee642d38425 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 12:40:51 +0200 Subject: [PATCH 033/117] modvis improvements - Use logger for debug messages - Support "from ... import foo" syntax (same AST node type as for "from ...foo import stuff", but the semantics of the attributes are different) - Add a uses-relation also to tgt.__init__, since tgt could be a package (this is pruned from output if tgt was actually a module, or was a package and didn't have an __init__ module) --- modvis.py | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/modvis.py b/modvis.py index f862c9e..0413cf6 100644 --- a/modvis.py +++ b/modvis.py @@ -65,9 +65,10 @@ def resolve(current_module, target_module, level): return '.'.join((base, target_module)) class ImportVisitor(ast.NodeVisitor): - def __init__(self, filenames): + def __init__(self, filenames, logger): self.modules = {} # modname: {dep0, dep1, ...} self.fullpaths = {} # modname: fullpath + self.logger = logger self.analyze(filenames) def analyze(self, filenames): @@ -84,15 +85,40 @@ class ImportVisitor(ast.NodeVisitor): if m not in self.modules: self.modules[m] = set() self.modules[m].add(target_module) + # Just in case the target is a package (we don't know that), add a + # dependency on its __init__ module. If there's no matching __init__ + # (either no __init__.py provided, or the target is a module), + # this is harmless - we just generate a spurious dependency on a + # module that doesn't even exist. + # + # Since nonexistent modules are not in the analyzed set (i.e. do not + # appear as keys of self.modules), prepare_graph will ignore them. + # + # TODO: This would be a problem for a simple plain-text output that doesn't use the graph. + self.modules[m].add(target_module + ".__init__") def visit_Import(self, node): - # print(self.current_module, "Import", [alias.name for alias in node.names]) + self.logger.debug("{}:{}: Import {}".format(self.current_module, node.lineno, [alias.name for alias in node.names])) for alias in node.names: self.add_dependency(alias.name) # alias.asname not relevant for our purposes def visit_ImportFrom(self, node): - # print(self.current_module, "ImportFrom", node.module, node.level) - self.add_dependency(resolve(self.current_module, node.module, node.level)) + # from foo import some_symbol + if node.module: + self.logger.debug("{}:{}: ImportFrom '{}', relative import level {}".format(self.current_module, node.lineno, node.module, node.level)) + absname = resolve(self.current_module, node.module, node.level) + if node.level > 0: + self.logger.debug(" resolved relative import to '{}'".format(absname)) + self.add_dependency(absname) + + # from . import foo --> module = None; now the **names** refer to modules + else: + for alias in node.names: + self.logger.debug("{}:{}: ImportFrom '{}', target module '{}', relative import level {}".format(self.current_module, node.lineno, '.' * node.level, alias.name, node.level)) + absname = resolve(self.current_module, alias.name, node.level) + if node.level > 0: + self.logger.debug(" resolved relative import to '{}'".format(absname)) + self.add_dependency(absname) # -------------------------------------------------------------------------------- @@ -218,7 +244,7 @@ def main(): logger.addHandler(handler) # run the analysis - v = ImportVisitor(filenames) + v = ImportVisitor(filenames, logger) # # we could generate a plaintext report like this # ms = v.modules @@ -238,13 +264,15 @@ def main(): options=['rankdir=' + options.rankdir], output=options.filename, logger=logger) + writer.run() if options.tgf: writer = pyan.writers.TgfWriter( graph, output=options.filename, logger=logger) + writer.run() if options.yed: writer = pyan.writers.YedWriter( graph, output=options.filename, logger=logger) - writer.run() + writer.run() if __name__ == '__main__': main() From d74e28f0ab62493791554179640bd09795c7b254 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 13:29:26 +0200 Subject: [PATCH 034/117] modvis: add option -C/--cycles, a simple import cycle detector --- modvis.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/modvis.py b/modvis.py index 0413cf6..d754f4e 100644 --- a/modvis.py +++ b/modvis.py @@ -122,7 +122,42 @@ class ImportVisitor(ast.NodeVisitor): # -------------------------------------------------------------------------------- + def detect_cycles(self): + """Postprocessing. Detect import cycles. + + Return format is `[(prefix, cycle), ...]` where `prefix` is the + non-cyclic prefix of the import chain, and `cycle` contains only + the cyclic part (where the first and last elements are the same). + """ + class CycleDetected(Exception): + def __init__(self, module_names): + self.module_names = module_names + cycles = [] + for root in self.modules: + seen = set() + def walk(m, trace=None): + if m not in self.modules: + return + trace = trace or [] + trace.append(m) + if m in seen: + raise CycleDetected(module_names=trace) + seen.add(m) + deps = self.modules[m] + for d in deps: + walk(d, trace=trace) + try: + walk(root) + except CycleDetected as exc: + # Report the non-cyclic prefix and the cycle separately + names = exc.module_names + offender = names[-1] + k = names.index(offender) + cycles.append((names[:k], names[k:])) + return cycles + def prepare_graph(self): # same format as in pyan.analyzer + """Postprocessing. Prepare data for pyan.visgraph for graph file generation.""" self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) self.uses_edges = {} # we have no defines_edges, which doesn't matter as long as we don't enable that option in visgraph. @@ -203,6 +238,9 @@ def main(): parser.add_option("-e", "--nested-groups", action="store_true", default=False, dest="nested_groups", help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]") + parser.add_option("-C", "--cycles", + action="store_true", default=False, dest="cycles", + help="detect import cycles and print report to stdout") parser.add_option("--dot-rankdir", default="TB", dest="rankdir", help=( "specifies the dot graph 'rankdir' property for " @@ -246,6 +284,18 @@ def main(): # run the analysis v = ImportVisitor(filenames, logger) + if options.cycles: + cycles = v.detect_cycles() + if not cycles: + print("All good! No import cycles detected.") + else: + unique_cycles = set() + for prefix, cycle in cycles: + unique_cycles.add(tuple(cycle)) + print("Detected the following import cycles:") + for c in sorted(unique_cycles): + print(" {}".format(c)) + # # we could generate a plaintext report like this # ms = v.modules # for m in sorted(ms): From 587be1f08ce0937ad06b85a7d44dea1f5aa18d92 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 14:22:44 +0200 Subject: [PATCH 035/117] modvis: fix cycle detector; skip graph preparation when not graphing --- modvis.py | 75 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/modvis.py b/modvis.py index d754f4e..d9da13d 100644 --- a/modvis.py +++ b/modvis.py @@ -85,17 +85,24 @@ class ImportVisitor(ast.NodeVisitor): if m not in self.modules: self.modules[m] = set() self.modules[m].add(target_module) - # Just in case the target is a package (we don't know that), add a - # dependency on its __init__ module. If there's no matching __init__ - # (either no __init__.py provided, or the target is a module), - # this is harmless - we just generate a spurious dependency on a - # module that doesn't even exist. + # Just in case the target (or one or more of its parents) is a package + # (we don't know that), add a dependency on the relevant __init__ module. + # + # If there's no matching __init__ (either no __init__.py provided, or + # the target is just a module), this is harmless - we just generate a + # spurious dependency on a module that doesn't even exist. # # Since nonexistent modules are not in the analyzed set (i.e. do not # appear as keys of self.modules), prepare_graph will ignore them. # # TODO: This would be a problem for a simple plain-text output that doesn't use the graph. - self.modules[m].add(target_module + ".__init__") + modpath = target_module.split(".") + for k in range(1, len(modpath) + 1): + base = ".".join(modpath[:k]) + possible_init = base + ".__init__" + if possible_init != m: # will happen when current_module is somepackage.__init__ itself + self.modules[m].add(possible_init) + self.logger.debug(" added possible implicit use of '{}'".format(possible_init)) def visit_Import(self, node): self.logger.debug("{}:{}: Import {}".format(self.current_module, node.lineno, [alias.name for alias in node.names])) @@ -129,32 +136,28 @@ class ImportVisitor(ast.NodeVisitor): non-cyclic prefix of the import chain, and `cycle` contains only the cyclic part (where the first and last elements are the same). """ - class CycleDetected(Exception): - def __init__(self, module_names): - self.module_names = module_names cycles = [] + def walk(m, seen=None, trace=None): + trace = (trace or []) + [m] + seen = seen or set() + if m in seen: + cycles.append(trace) + return + seen = seen | {m} + deps = self.modules[m] + for d in deps: + if d in self.modules: + walk(d, seen, trace) for root in self.modules: - seen = set() - def walk(m, trace=None): - if m not in self.modules: - return - trace = trace or [] - trace.append(m) - if m in seen: - raise CycleDetected(module_names=trace) - seen.add(m) - deps = self.modules[m] - for d in deps: - walk(d, trace=trace) - try: - walk(root) - except CycleDetected as exc: - # Report the non-cyclic prefix and the cycle separately - names = exc.module_names - offender = names[-1] - k = names.index(offender) - cycles.append((names[:k], names[k:])) - return cycles + walk(root) + + # For each detected cycle, report the non-cyclic prefix and the cycle separately + out = [] + for cycle in cycles: + offender = cycle[-1] + k = cycle.index(offender) + out.append((cycle[:k], cycle[k:])) + return out def prepare_graph(self): # same format as in pyan.analyzer """Postprocessing. Prepare data for pyan.visgraph for graph file generation.""" @@ -304,24 +307,24 @@ def main(): # print(" {}".format(d)) # format graph report - v.prepare_graph() -# print(v.nodes, v.uses_edges) - logger = logging.getLogger(__name__) + make_graph = options.dot or options.tgf or options.yed + if make_graph: + v.prepare_graph() + # print(v.nodes, v.uses_edges) + graph = pyan.visgraph.VisualGraph.from_visitor(v, options=graph_options, logger=logger) - graph = pyan.visgraph.VisualGraph.from_visitor(v, options=graph_options, logger=logger) if options.dot: writer = pyan.writers.DotWriter(graph, options=['rankdir=' + options.rankdir], output=options.filename, logger=logger) - writer.run() if options.tgf: writer = pyan.writers.TgfWriter( graph, output=options.filename, logger=logger) - writer.run() if options.yed: writer = pyan.writers.YedWriter( graph, output=options.filename, logger=logger) + if make_graph: writer.run() if __name__ == '__main__': From c356e90dbd2ff04c6a8b17474d6b1527ee8e4023 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 15:28:22 +0200 Subject: [PATCH 036/117] modvis: improve usability of cycle detector output --- modvis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modvis.py b/modvis.py index d9da13d..92a1880 100644 --- a/modvis.py +++ b/modvis.py @@ -145,10 +145,10 @@ class ImportVisitor(ast.NodeVisitor): return seen = seen | {m} deps = self.modules[m] - for d in deps: + for d in sorted(deps): if d in self.modules: walk(d, seen, trace) - for root in self.modules: + for root in sorted(self.modules): walk(root) # For each detected cycle, report the non-cyclic prefix and the cycle separately @@ -290,12 +290,12 @@ def main(): if options.cycles: cycles = v.detect_cycles() if not cycles: - print("All good! No import cycles detected.") + print("No import cycles detected.") else: unique_cycles = set() for prefix, cycle in cycles: unique_cycles.add(tuple(cycle)) - print("Detected the following import cycles:") + print("Detected the following import cycles (n_results={}):".format(len(unique_cycles))) for c in sorted(unique_cycles): print(" {}".format(c)) From 0a6356c6c57e05078396e3e7b49d53270740fdee Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 15:28:44 +0200 Subject: [PATCH 037/117] modvis: improve docstrings and comments --- modvis.py | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/modvis.py b/modvis.py index 92a1880..469e60b 100644 --- a/modvis.py +++ b/modvis.py @@ -46,7 +46,15 @@ def split_module_name(m): def resolve(current_module, target_module, level): """Return fully qualified name of the target_module in an import. - Resolves relative imports (level > 0) using current_module as the starting point. + If level == 0, the import is absolute, hence target_module is already the + fully qualified name (and will be returned as-is). + + Relative imports (level > 0) are resolved using current_module as the + starting point. Usually this is good enough (especially if you analyze your + project by invoking modvis in its top-level directory). + + For the exact implications, see the section "Import sibling packages" in: + https://alex.dzyoba.com/blog/python-import/ """ if level < 0: raise ValueError("Relative import level must be >= 0, got {}".format(level)) @@ -287,6 +295,33 @@ def main(): # run the analysis v = ImportVisitor(filenames, logger) + # Postprocessing: detect import cycles + # + # NOTE: Because this is a static analysis, it doesn't care about the order + # the code runs in any particular invocation of the software. Every + # analyzed module is considered as a possible entry point to the program, + # and all cycles (considering *all* possible branches *at any step* of + # *each* import chain) will be mapped recursively. + # + # Obviously, this easily leads to a combinatoric explosion. In a mid-size + # project (~20k SLOC), the analysis may find thousands of unique import + # cycles, most of which are harmless. + # + # Many cycles appear due to package A importing something from package B + # (possibly from one of its submodules) and vice versa, when both packages + # have an __init__ module. If they don't actually try to import any names + # that only become defined after the init has finished running, it's + # usually fine. + # + # (Init modules often import names from their submodules to the package's + # top-level namespace; those names can be reliably accessed only after the + # init module has finished running. But importing names directly from the + # submodule where they are defined is fine also during the init.) + # + # But if your program is crashing due to a cyclic import, you already know + # in any case *which* import cycle is causing it, just by looking at the + # stack trace. So this analysis is just extra information that says what + # other cycles exist, if any. if options.cycles: cycles = v.detect_cycles() if not cycles: @@ -299,14 +334,14 @@ def main(): for c in sorted(unique_cycles): print(" {}".format(c)) - # # we could generate a plaintext report like this + # # we could generate a plaintext report like this (with caveats; see TODO above) # ms = v.modules # for m in sorted(ms): # print(m) # for d in sorted(ms[m]): # print(" {}".format(d)) - # format graph report + # Postprocessing: format graph report make_graph = options.dot or options.tgf or options.yed if make_graph: v.prepare_graph() From 7b53c214ab2b67ba6a6e33a638a0867ba5a08b0c Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 15:53:56 +0200 Subject: [PATCH 038/117] modvis: add relevant SO discussion link to docstring --- modvis.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modvis.py b/modvis.py index 469e60b..094cbfa 100644 --- a/modvis.py +++ b/modvis.py @@ -55,6 +55,8 @@ def resolve(current_module, target_module, level): For the exact implications, see the section "Import sibling packages" in: https://alex.dzyoba.com/blog/python-import/ + and this SO discussion: + https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time """ if level < 0: raise ValueError("Relative import level must be >= 0, got {}".format(level)) From e918b7925fb0a455233efbf59ecc2ccd2ed6655d Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 31 Oct 2019 16:06:37 +0200 Subject: [PATCH 039/117] modvis: compute cycle statistics --- modvis.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/modvis.py b/modvis.py index 094cbfa..e98fbba 100644 --- a/modvis.py +++ b/modvis.py @@ -332,7 +332,20 @@ def main(): unique_cycles = set() for prefix, cycle in cycles: unique_cycles.add(tuple(cycle)) - print("Detected the following import cycles (n_results={}):".format(len(unique_cycles))) + print("Detected the following import cycles (n_results={}).".format(len(unique_cycles))) + def stats(): + lengths = [len(x) - 1 for x in unique_cycles] # number of modules in the cycle + def mean(lst): + return sum(lst) / len(lst) + def median(lst): + tmp = list(sorted(lst)) + n = len(lst) + if n % 2 == 1: + return tmp[n // 2] # e.g. tmp[5] if n = 11 + else: + return (tmp[n // 2 - 1] + tmp[n // 2]) / 2 # e.g. avg of tmp[4] and tmp[5] if n = 10 + return min(lengths), mean(lengths), median(lengths), max(lengths) + print("Number of modules in a cycle: min = {}, average = {:0.2g}, median = {:0.2g}, max = {}".format(*stats())) for c in sorted(unique_cycles): print(" {}".format(c)) From aa0b8e842b6187f46237e25c6dd6a6217c9f98da Mon Sep 17 00:00:00 2001 From: Rakan Alanazi Date: Wed, 25 Dec 2019 12:08:37 -0600 Subject: [PATCH 040/117] resolve relative imports --- pyan/analyzer.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 32563e1..44d4adc 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -378,6 +378,16 @@ class CallGraphVisitor(ast.NodeVisitor): # # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time from_node = self.get_node_of_current_namespace() + # resolve relative imports 'None' such as "from . import foo" + if node.module is None: + self.logger.debug("Old ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + tgt_level = node.level + current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] + tgt_name = current_module_namespace + node.module = tgt_name + node.level = 0 + self.logger.debug("New ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + if node.module: # import some names from a module # TODO: This works only for absolute imports. # @@ -387,6 +397,14 @@ class CallGraphVisitor(ast.NodeVisitor): # with node.level). # # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#ImportFrom + # pyan can handel Relative imports such as "from .mod import foo" and "from ..mod import foo" + if node.level != 0: + self.logger.debug("Old ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + tgt_level = node.level + current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] + node.module = current_module_namespace+'.'+node.module + self.logger.debug("New ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + tgt_name = node.module to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace @@ -407,9 +425,9 @@ class CallGraphVisitor(ast.NodeVisitor): self.set_value(new_name, tgt_id) self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) - else: # module name missing = "from . import ..." - for import_item in node.names: # the names are modules - self.analyze_module_import(import_item, node) + #else: # module name missing = "from . import ..." + # for import_item in node.names: # the names are modules + # self.analyze_module_import(import_item, node) def analyze_module_import(self, import_item, ast_node): """Analyze a names AST node inside an Import or ImportFrom AST node. @@ -1558,8 +1576,10 @@ class CallGraphVisitor(ast.NodeVisitor): # What about incoming uses edges? E.g. consider a lambda that is saved # in an instance variable, then used elsewhere. How do we want the # graph to look like in that case? - - for name in self.nodes: + + # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) + # temporary solution is adding list to force a copy of 'self.nodes' + for name in list(self.nodes): if name in ('lambda', 'listcomp', 'setcomp', 'dictcomp', 'genexpr'): for n in self.nodes[name]: pn = self.get_parent_node(n) From 461e95c4bf62805eb72fe9d33fbc2e38457d0556 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Fri, 21 Feb 2020 13:32:20 +0000 Subject: [PATCH 041/117] do not raise error for async node --- pyan/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 32563e1..c0bee1b 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -811,7 +811,7 @@ class CallGraphVisitor(ast.NodeVisitor): or None if not applicable; and flavor is a Flavor, specifically one of FUNCTION, METHOD, STATICMETHOD or CLASSMETHOD.""" - if not isinstance(ast_node, ast.FunctionDef): + if not isinstance(ast_node, (ast.AsyncFunctionDef , ast.FunctionDef)): raise TypeError("Expected ast.FunctionDef; got %s" % (type(ast_node))) # Visit decorators From 290a75837837c13dce8bf610545ef28ef1dbbdd9 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Fri, 21 Feb 2020 19:21:51 +0000 Subject: [PATCH 042/117] visit nested function calls --- pyan/analyzer.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 32563e1..20c0eee 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -541,16 +541,9 @@ class CallGraphVisitor(ast.NodeVisitor): self.last_value = to_node - # Object unknown, add uses edge to a wildcard by attr name. + # pass on else: - tgt_name = node.attr - from_node = self.get_node_of_current_namespace() - to_node = self.get_node(None, tgt_name, node, flavor=Flavor.UNKNOWN) - self.logger.debug("Use from %s to %s (target obj %s not resolved; maybe fwd ref, function argument, or unanalyzed import)" % (from_node, to_node, objname)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (target obj %s not resolved; maybe fwd ref, function argument, or unanalyzed import)" % (from_node, to_node, objname)) - - self.last_value = to_node + self.visit(node.value) # name access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Name(self, node): From 1fc8ceb47589d7b76f3ff441ecb86b8e489e3c2b Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 27 Feb 2020 15:31:53 +0100 Subject: [PATCH 043/117] ensure package is called pyan (using the 3 is not necessary given that pyan will not work on python 3) --- pyan3 => main | 0 setup.py | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) rename pyan3 => main (100%) diff --git a/pyan3 b/main similarity index 100% rename from pyan3 rename to main diff --git a/setup.py b/setup.py index 7368d51..9d99565 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ from setuptools import setup # # This is also the top level of its source tree, relative to the top-level project directory setup.py resides in. # -libname = "pyan3" +libname = "pyan" # Short description for package list on PyPI # @@ -103,7 +103,7 @@ except FileNotFoundError: ######################################################### setup( - name="pyan3", + name="pyan", version=version, author="Juha Jeronen", author_email="juha.m.jeronen@gmail.com", @@ -152,7 +152,7 @@ setup( # packages=["pyan"], - scripts=["pyan3"], + scripts=["main"], zip_safe=True, From 5ca5a63dccc89f90952f1afb176a612e34b123ce Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Wed, 25 Mar 2020 13:55:23 +0000 Subject: [PATCH 044/117] enable writing to svg or html --- README.md | 8 ++++ pyan/analyzer.py | 89 +++++++++++++++++++++++++++++++++++++++++++++ pyan/callgraph.html | 70 +++++++++++++++++++++++++++++++++++ pyan/main.py | 25 ++++++++++++- pyan/writers.py | 67 ++++++++++++++++++++++++++++++++-- setup.py | 4 +- 6 files changed, 257 insertions(+), 6 deletions(-) create mode 100644 pyan/callgraph.html diff --git a/README.md b/README.md index 3f08a16..a3c41a7 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,14 @@ Then render using your favorite GraphViz filter, mainly `dot` or `fdp`: `dot -Tsvg myuses.dot >myuses.svg` +Or use directly + +`pyan3 *.py --uses --no-defines --colored --grouped --annotated --svg >myuses.svg` + +You can also export as an interactive HTML + +`pyan3 *.py --uses --no-defines --colored --grouped --annotated --html >myuses.html` + #### Troubleshooting If GraphViz says *trouble in init_rank*, try adding `-Gnewrank=true`, as in: diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 32563e1..6ba2e01 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -161,6 +161,95 @@ class CallGraphVisitor(ast.NodeVisitor): # Python docs: # https://docs.python.org/3/library/ast.html#abstract-grammar + + def filter(self, node=None, namespace=None, max_iter=1000): + """ + filter callgraph nodes that related to `node` or are in `namespace` + + Args: + node (Union[None, Node]): pyan node for which related nodes should be found, if none, filter only for namespace + namespace (Union[str, None]): namespace to search in (name of top level module), + if None, determines namespace from `node` + max_iter (int): maximum number of iterations and nodes to iterate + + Returns: + self + """ + # filter the nodes to avoid cluttering the callgraph with irrelevant information + filtered_nodes = self.get_related_nodes(node, namespace=namespace, max_iter=max_iter) + + self.nodes = { + name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items() + } + self.uses_edges = { + node: [n for n in nodes if n in filtered_nodes] + for node, nodes in self.uses_edges.items() + if node in filtered_nodes + } + self.defines_edges = { + node: [n for n in nodes if n in filtered_nodes] + for node, nodes in self.defines_edges.items() + if node in filtered_nodes + } + return self + + def get_related_nodes(self, node=None, namespace=None, max_iter=1000): + """ + get nodes that related to `node` or are in `namespace` + + Args: + node (Union[None, Node]): pyan node for which related nodes should be found, if none, filter only for namespace + namespace (Union[str, None]): namespace to search in (name of top level module), + if None, determines namespace from `node` + max_iter (int): maximum number of iterations and nodes to iterate + + Returns: + set: set of nodes related to `node` including `node` itself + """ + # check if searching through all nodes is necessary + if node is None: + queue = [] + if namespace is None: + new_nodes = set(self.nodes.values()) + else: + new_nodes = {n for n in self.nodes.values() if namespace in n.namespace} + + else: + new_nodes = set() + if namespace is None: + namespace = node.namespace.strip(".").split(".", 1)[0] + queue = [node] + + # use queue system to search through nodes + # essentially add a node to the queue and then search all connected nodes which are in turn added to the queue + # until the queue itself is empty or the maximum limit of max_iter searches have been hit + i = max_iter + while len(queue) > 0: + item = queue.pop() + if item not in new_nodes: + new_nodes.add(item) + i -= 1 + if i < 0: + break + queue.extend( + [ + n + for n in self.uses_edges.get(item, []) + if n in self.uses_edges and n not in new_nodes and namespace in n.namespace + ] + ) + queue.extend( + [ + n + for n in self.defines_edges.get(item, []) + if n in self.defines_edges + and n not in new_nodes + and namespace in n.namespace + ] + ) + + return new_nodes + def visit_Module(self, node): self.logger.debug("Module %s, %s" % (self.module_name, self.filename)) diff --git a/pyan/callgraph.html b/pyan/callgraph.html new file mode 100644 index 0000000..54a2598 --- /dev/null +++ b/pyan/callgraph.html @@ -0,0 +1,70 @@ + + + + + + + + + +

Click node to highlight; Shift-scroll to zoom; Esc to unhighlight

+
+ + + + + + + + + diff --git a/pyan/main.py b/pyan/main.py index e78e17c..5e35e4b 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -15,7 +15,8 @@ from optparse import OptionParser # TODO: migrate to argparse from .analyzer import CallGraphVisitor from .visgraph import VisualGraph -from .writers import TgfWriter, DotWriter, YedWriter +from .writers import TgfWriter, DotWriter, YedWriter, HTMLWriter, SVGWriter + def main(): usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" @@ -29,6 +30,12 @@ def main(): parser.add_option("--tgf", action="store_true", default=False, help="output in Trivial Graph Format") + parser.add_option("--svg", + action="store_true", default=False, + help="output in HTML Format") + parser.add_option("--html", + action="store_true", default=False, + help="output in SVG Format") parser.add_option("--yed", action="store_true", default=False, help="output in yEd GraphML Format") @@ -117,6 +124,22 @@ def main(): logger=logger) writer.run() + if options.html: + writer = HTMLWriter( + graph, + options=['rankdir='+options.rankdir], + output=options.filename, + logger=logger) + writer.run() + + if options.svg: + writer = SVGWriter( + graph, + options=['rankdir='+options.rankdir], + output=options.filename, + logger=logger) + writer.run() + if options.tgf: writer = TgfWriter( graph, output=options.filename, logger=logger) diff --git a/pyan/writers.py b/pyan/writers.py index 94ce89f..f5af1e8 100644 --- a/pyan/writers.py +++ b/pyan/writers.py @@ -1,9 +1,12 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """Graph markup writers.""" - +import os +import subprocess import sys import logging +import io +from jinja2 import Template class Writer(object): @@ -29,14 +32,17 @@ class Writer(object): def run(self): self.log('%s running' % type(self)) try: - self.outstream = open(self.output, 'w') + if isinstance(self.output, io.StringIO): # write to stream + self.outstream = self.output + else: + self.outstream = open(self.output, 'w') # write to file except TypeError: self.outstream = sys.stdout self.start_graph() self.write_subgraph(self.graph) self.write_edges() self.finish_graph() - if self.output: + if self.output and not isinstance(self.output, io.StringIO): self.outstream.close() def write_subgraph(self, graph): @@ -169,6 +175,61 @@ class DotWriter(Writer): self.write('}') # terminate "digraph G {" +class SVGWriter(DotWriter): + + def run(self): + # write dot file + self.log('%s running' % type(self)) + self.outstream = io.StringIO() + self.start_graph() + self.write_subgraph(self.graph) + self.write_edges() + self.finish_graph() + + # convert to svg + svg = subprocess.run( + f"dot -Tsvg", + shell=True, + stdout=subprocess.PIPE, + input=self.outstream.getvalue().encode() + ).stdout.decode() + + if self.output: + if isinstance(self.output, io.StringIO): + self.output.write(svg) + else: + with open(self.output, "w") as f: + f.write(svg) + else: + print(svg) + + +class HTMLWriter(SVGWriter): + + def run(self): + with io.StringIO() as svg_stream: + # run SVGWriter with stream as output + output = self.output + self.output = svg_stream + super().run() + svg = svg_stream.getvalue() + self.output = output + + # insert svg into html + with open(os.path.join(os.path.dirname(__file__), "callgraph.html"), "r") as f: + template = Template(f.read()) + + html = template.render(svg=svg) + if self.output: + if isinstance(self.output, io.StringIO): + self.output.write(html) + else: + with open(self.output, "w") as f: + f.write(html) + else: + print(html) + + class YedWriter(Writer): def __init__(self, graph, output=None, logger=None, tabstop=2): Writer.__init__( diff --git a/setup.py b/setup.py index 9d99565..aad7e06 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for GitHub proje # # http://stackoverflow.com/questions/13628979/setuptools-how-to-make-package-contain-extra-data-folder-and-all-folders-inside # -datafiles = [] +datafiles = ["pyan/callgraph.html"] #getext = lambda filename: os.path.splitext(filename)[1] #for datadir in datadirs: # datafiles.extend( [(root, [os.path.join(root, f) for f in files if getext(f) in dataexts]) @@ -137,7 +137,7 @@ setup( # http://setuptools.readthedocs.io/en/latest/setuptools.html # setup_requires=[], - install_requires=[], + install_requires=["jinja2"], provides=["pyan"], # keywords for PyPI (in case you upload your project) From cb490969927e47ec4944ce064ea024f552fd93f2 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Wed, 25 Mar 2020 17:19:20 +0000 Subject: [PATCH 045/117] update readme and enable calling pyan directly from script or cli for simplified access --- README.md | 14 ++++++-- pyan/__init__.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ pyan/analyzer.py | 7 ++-- pyan/main.py | 14 +++++++- 4 files changed, 118 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a3c41a7..42d06ad 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ See `pyan --help`. Example: -`pyan3 *.py --uses --no-defines --colored --grouped --annotated --dot >myuses.dot` +`pyan *.py --uses --no-defines --colored --grouped --annotated --dot >myuses.dot` Then render using your favorite GraphViz filter, mainly `dot` or `fdp`: @@ -43,11 +43,19 @@ Then render using your favorite GraphViz filter, mainly `dot` or `fdp`: Or use directly -`pyan3 *.py --uses --no-defines --colored --grouped --annotated --svg >myuses.svg` +`pyan *.py --uses --no-defines --colored --grouped --annotated --svg >myuses.svg` You can also export as an interactive HTML -`pyan3 *.py --uses --no-defines --colored --grouped --annotated --html >myuses.html` +`pyan *.py --uses --no-defines --colored --grouped --annotated --html > myuses.html` + +Alternatively, you can call `pyan` from a script + +```shell script +import pyan +from IPython.display import HTML +HTML(pyan.create_callgraph(filenames="**/*.py", format="html")) +``` #### Troubleshooting diff --git a/pyan/__init__.py b/pyan/__init__.py index 0995ca2..8e1ecbe 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -1,6 +1,95 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import io +from glob import glob from .main import main +from .analyzer import CallGraphVisitor +from .writers import SVGWriter, HTMLWriter, DotWriter +from .visgraph import VisualGraph __version__ = "1.0.5" + + +def create_callgraph( + filenames="**/*.py", + function=None, + namespace=None, + format="dot", + rankdir="LR", + nested_groups=True, + draw_defines=True, + draw_uses=True, + colored=True, + grouped_alt=False, + annotated=False, + grouped=True, +): + """ + create callgraph based on static code analysis + + Args: + filenames (Union[List[str], str]): glob pattern or list of glob patterns + to identify filenames to parse (`**` for multiple directories) + example: **/*.py for all python files + function (Union[str, None]): if defined, function name to filter for, e.g. "my_module.my_function" + to only include calls that are related to `my_function` + namespace (Union[str, None]): if defined, namespace to filter for, e.g. "my_module", it is highly + recommended to define this filter + format (str): format to write callgraph to, of of "dot", "svg", "html". you need to have graphviz + installed for svg or html output + rankdir (str): direction of graph, e.g. "LR" for horizontal or "TB" for vertical + nested_groups (bool): if to group by modules and submodules + draw_defines (bool): if to draw defines edges (functions that are defines) + draw_uses (bool): if to draw uses edges (functions that are used) + colored (bool): if to color graph + grouped_alt (bool): if to use alternative grouping + annotated (bool): if to annotate graph with filenames + grouped (bool): if to group by modules + + Returns: + str: callgraph + """ + if isinstance(filenames, str): + filenames = [filenames] + filenames = [fn2 for fn in filenames for fn2 in glob(fn, recursive=True)] + + if nested_groups: + grouped = True + graph_options = { + "draw_defines": draw_defines, + "draw_uses": draw_uses, + "colored": colored, + "grouped_alt": grouped_alt, + "grouped": grouped, + "nested_groups": nested_groups, + "annotated": annotated, + } + + v = CallGraphVisitor(filenames) + if function or namespace: + if function: + function_name = function.split(".")[-1] + namespace = ".".join(function.split(".")[:-1]) + node = v.get_node(namespace, function_name) + else: + node = None + v.filter(node=node, namespace=namespace) + graph = VisualGraph.from_visitor(v, options=graph_options) + + stream = io.StringIO() + if format == "dot": + writer = DotWriter(graph, options=["rankdir=" + rankdir], output=stream) + writer.run() + + elif format == "html": + writer = HTMLWriter(graph, options=["rankdir=" + rankdir], output=stream) + writer.run() + + elif format == "svg": + writer = SVGWriter(graph, options=["rankdir=" + rankdir], output=stream) + writer.run() + else: + raise ValueError(f"format {format} is unknown") + + return stream.getvalue() diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 6ba2e01..949e1ef 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -210,9 +210,12 @@ class CallGraphVisitor(ast.NodeVisitor): if node is None: queue = [] if namespace is None: - new_nodes = set(self.nodes.values()) + new_nodes = {n for items in self.nodes.values() for n in items} else: - new_nodes = {n for n in self.nodes.values() if namespace in n.namespace} + new_nodes = { + n for items in self.nodes.values() for n in items + if n.namespace is not None and namespace in n.namespace + } else: new_nodes = set() diff --git a/pyan/main.py b/pyan/main.py index 5e35e4b..9e10104 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -39,8 +39,12 @@ def main(): parser.add_option("--yed", action="store_true", default=False, help="output in yEd GraphML Format") - parser.add_option("-f", "--file", dest="filename", + parser.add_option("--file", dest="filename", help="write graph to FILE", metavar="FILE", default=None) + parser.add_option("--namespace", dest="namespace", + help="filter for NAMESPACE", metavar="NAMESPACE", default=None) + parser.add_option("--function", dest="function", + help="filter for FUNCTION", metavar="FUNCTION", default=None) parser.add_option("-l", "--log", dest="logname", help="write log to LOG", metavar="LOG") parser.add_option("-v", "--verbose", @@ -114,6 +118,14 @@ def main(): logger.addHandler(handler) v = CallGraphVisitor(filenames, logger) + if options.function or options.namespace: + if options.function: + function_name = options.function.split(".")[-1] + namespace = ".".join(options.function.split(".")[:-1]) + node = v.get_node(namespace, function_name) + else: + node = None + v.filter(node=node, namespace=options.namespace) graph = VisualGraph.from_visitor(v, options=graph_options, logger=logger) if options.dot: From f54675d85a24755eed1dc53d3b14f01acb9c4059 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Wed, 25 Mar 2020 19:12:04 +0000 Subject: [PATCH 046/117] ensure html is installed (cherry picked from commit 52737fcd04c11a0a26cddc792d9b9f17f9f0fe6e) --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index aad7e06..7480ac6 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for GitHub proje # # http://stackoverflow.com/questions/13628979/setuptools-how-to-make-package-contain-extra-data-folder-and-all-folders-inside # -datafiles = ["pyan/callgraph.html"] +datafiles = [] #getext = lambda filename: os.path.splitext(filename)[1] #for datadir in datadirs: # datafiles.extend( [(root, [os.path.join(root, f) for f in files if getext(f) in dataexts]) @@ -155,6 +155,8 @@ setup( scripts=["main"], zip_safe=True, + package_data={'pyan': ["callgraph.html"]}, + include_package_data=True, # Custom data files not inside a Python package data_files=datafiles From e6a20b84435cdafbad34328d8b78b0d7e88cbe40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Eduardo=20Montenegro=20Cavalcanti=20de=20Olive?= =?UTF-8?q?ira?= Date: Tue, 28 Apr 2020 22:09:08 -0300 Subject: [PATCH 047/117] Add repository info and some badges --- README.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3f08a16..0838efc 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,21 @@ -# Pyan3: Offline call graph generator for Python 3 +# Pyan3 -Generate approximate call graphs for Python programs. +Offline call graph generator for Python 3 + +[![Build Status](https://travis-ci.com/edumco/pyan.svg?branch=master)](https://travis-ci.com/edumco/pyan) +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fedumco%2Fpyan.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fedumco%2Fpyan?ref=badge_shield) +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/7cba5ba5d3694a42a1252243e3634b5e)](https://www.codacy.com/manual/edumco/pyan?utm_source=github.com&utm_medium=referral&utm_content=edumco/pyan&utm_campaign=Badge_Grade) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyan3) Pyan takes one or more Python source files, performs a (rather superficial) static analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output for rendering by GraphViz or yEd. -*And now it is available for Python 3!* +This project has 2 official repositories: +- The original stable [davidfraser/pyan](https://github.com/davidfraser/pyan). +- The development repository [Technologicat/pyan](https://github.com/Technologicat/pyan) -**Note**: This is the repository for the Python 3 version of Pyan. The previous Python 2-compatible version is tagged as `pre-python3` in [davidfraser's maintenance repository](https://github.com/davidfraser/pyan). +> The PyPI package [pyan3](https://pypi.org/project/pyan3/) is built from development + +## About [![Example output](graph0.png "Example: GraphViz rendering of Pyan output (click for .svg)")](graph0.svg) From 917cd22269e1ccb2d001b5e92fa3a721c2632465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Eduardo=20Montenegro=20Cavalcanti=20de=20Olive?= =?UTF-8?q?ira?= Date: Thu, 30 Apr 2020 21:39:11 -0300 Subject: [PATCH 048/117] Removes trailing space --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0838efc..5ce0874 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Pyan3 +# Pyan3 Offline call graph generator for Python 3 From a11e505149589fde7ac31b38339784d2399e5e41 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 26 Mar 2020 15:21:12 +0000 Subject: [PATCH 049/117] fix filter for function (cherry picked from commit 4dc0e2be3df979de2dd091974a0fa3612f3baca9) --- pyan/__init__.py | 4 ++-- pyan/analyzer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 8e1ecbe..f482953 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -70,8 +70,8 @@ def create_callgraph( if function or namespace: if function: function_name = function.split(".")[-1] - namespace = ".".join(function.split(".")[:-1]) - node = v.get_node(namespace, function_name) + function_namespace = ".".join(function.split(".")[:-1]) + node = v.get_node(function_namespace, function_name) else: node = None v.filter(node=node, namespace=namespace) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 949e1ef..1d9e007 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -182,12 +182,12 @@ class CallGraphVisitor(ast.NodeVisitor): name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items() } self.uses_edges = { - node: [n for n in nodes if n in filtered_nodes] + node: {n for n in nodes if n in filtered_nodes} for node, nodes in self.uses_edges.items() if node in filtered_nodes } self.defines_edges = { - node: [n for n in nodes if n in filtered_nodes] + node: {n for n in nodes if n in filtered_nodes} for node, nodes in self.defines_edges.items() if node in filtered_nodes } From c70474dfa2adcbd9efe4a7029266e748045a4612 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 May 2020 18:18:55 +0100 Subject: [PATCH 050/117] register cli correctly --- main | 11 ----------- setup.py | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) delete mode 100755 main diff --git a/main b/main deleted file mode 100755 index fe3aa49..0000000 --- a/main +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import re -import sys - -from pyan import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/setup.py b/setup.py index 7480ac6..c142ef1 100644 --- a/setup.py +++ b/setup.py @@ -152,7 +152,7 @@ setup( # packages=["pyan"], - scripts=["main"], + entry_points={'console_scripts': ["pyan=pyan.main:main"]}, zip_safe=True, package_data={'pyan': ["callgraph.html"]}, From 55286db2e78945e4f024974ec8e4db00d7e39dec Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 May 2020 18:19:14 +0100 Subject: [PATCH 051/117] add source in callgraph.html --- pyan/callgraph.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyan/callgraph.html b/pyan/callgraph.html index 54a2598..141fca0 100644 --- a/pyan/callgraph.html +++ b/pyan/callgraph.html @@ -18,6 +18,8 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * Original from https://github.com/mountainstorm/jquery.graphviz.svg/blob/master/demo.html --> From cc7e0a931489d3f543794efc3f56e3403b24cb66 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 May 2020 18:33:25 +0100 Subject: [PATCH 052/117] add type annotations to functions --- pyan/__init__.py | 50 ++++++++++++++++++++++++------------------------ pyan/analyzer.py | 17 ++++++++-------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index f482953..8a09cfc 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -12,40 +12,40 @@ __version__ = "1.0.5" def create_callgraph( - filenames="**/*.py", - function=None, - namespace=None, - format="dot", - rankdir="LR", - nested_groups=True, - draw_defines=True, - draw_uses=True, - colored=True, - grouped_alt=False, - annotated=False, - grouped=True, -): + filenames: Union[List[str], str] = "**/*.py", + function: Union[str, None] = None, + namespace: Union[str, None] = None, + format: str = "dot", + rankdir: str = "LR", + nested_groups: bool = True, + draw_defines: bool = True, + draw_uses: bool = True, + colored: bool = True, + grouped_alt: bool = False, + annotated: bool = False, + grouped: bool = True, +) -> str: """ create callgraph based on static code analysis Args: - filenames (Union[List[str], str]): glob pattern or list of glob patterns + filenames: glob pattern or list of glob patterns to identify filenames to parse (`**` for multiple directories) example: **/*.py for all python files - function (Union[str, None]): if defined, function name to filter for, e.g. "my_module.my_function" + function: if defined, function name to filter for, e.g. "my_module.my_function" to only include calls that are related to `my_function` - namespace (Union[str, None]): if defined, namespace to filter for, e.g. "my_module", it is highly + namespace: if defined, namespace to filter for, e.g. "my_module", it is highly recommended to define this filter - format (str): format to write callgraph to, of of "dot", "svg", "html". you need to have graphviz + format: format to write callgraph to, of of "dot", "svg", "html". you need to have graphviz installed for svg or html output - rankdir (str): direction of graph, e.g. "LR" for horizontal or "TB" for vertical - nested_groups (bool): if to group by modules and submodules - draw_defines (bool): if to draw defines edges (functions that are defines) - draw_uses (bool): if to draw uses edges (functions that are used) - colored (bool): if to color graph - grouped_alt (bool): if to use alternative grouping - annotated (bool): if to annotate graph with filenames - grouped (bool): if to group by modules + rankdir: direction of graph, e.g. "LR" for horizontal or "TB" for vertical + nested_groups: if to group by modules and submodules + draw_defines: if to draw defines edges (functions that are defines) + draw_uses: if to draw uses edges (functions that are used) + colored: if to color graph + grouped_alt: if to use alternative grouping + annotated: if to annotate graph with filenames + grouped: if to group by modules Returns: str: callgraph diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 1d9e007..b2fc1be 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -5,6 +5,7 @@ import logging import ast import symtable +from typing import Union from .node import Node, Flavor from .anutils import tail, get_module_name, format_alias, \ @@ -162,15 +163,15 @@ class CallGraphVisitor(ast.NodeVisitor): # https://docs.python.org/3/library/ast.html#abstract-grammar - def filter(self, node=None, namespace=None, max_iter=1000): + def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000): """ filter callgraph nodes that related to `node` or are in `namespace` Args: - node (Union[None, Node]): pyan node for which related nodes should be found, if none, filter only for namespace - namespace (Union[str, None]): namespace to search in (name of top level module), + node: pyan node for which related nodes should be found, if none, filter only for namespace + namespace: namespace to search in (name of top level module), if None, determines namespace from `node` - max_iter (int): maximum number of iterations and nodes to iterate + max_iter: maximum number of iterations and nodes to iterate Returns: self @@ -193,15 +194,15 @@ class CallGraphVisitor(ast.NodeVisitor): } return self - def get_related_nodes(self, node=None, namespace=None, max_iter=1000): + def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int =1000) -> set: """ get nodes that related to `node` or are in `namespace` Args: - node (Union[None, Node]): pyan node for which related nodes should be found, if none, filter only for namespace - namespace (Union[str, None]): namespace to search in (name of top level module), + node: pyan node for which related nodes should be found, if none, filter only for namespace + namespace: namespace to search in (name of top level module), if None, determines namespace from `node` - max_iter (int): maximum number of iterations and nodes to iterate + max_iter: maximum number of iterations and nodes to iterate Returns: set: set of nodes related to `node` including `node` itself From e02a95654ebdbb4d81f09aaf2083333d1f495f67 Mon Sep 17 00:00:00 2001 From: Mantas Date: Fri, 8 May 2020 15:32:06 +0300 Subject: [PATCH 053/117] Set up pyan3 as entry point Fixes https://github.com/Technologicat/pyan/issues/23 --- README.md | 7 ++++++- pyan/__main__.py | 5 +++++ pyan3 | 11 ----------- setup.py | 10 +++++++--- visualize_pyan_architecture.sh | 2 +- 5 files changed, 19 insertions(+), 16 deletions(-) create mode 100644 pyan/__main__.py delete mode 100755 pyan3 diff --git a/README.md b/README.md index 5ce0874..b8996ca 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,14 @@ The static analysis approach Pyan takes is different from running the code and s In Pyan3, the analyzer was ported from `compiler` ([good riddance](https://stackoverflow.com/a/909172)) to a combination of `ast` and `symtable`, and slightly extended. +# Install + + pip install pyan3 + + # Usage -See `pyan --help`. +See `pyan3 --help`. Example: diff --git a/pyan/__main__.py b/pyan/__main__.py new file mode 100644 index 0000000..307d92e --- /dev/null +++ b/pyan/__main__.py @@ -0,0 +1,5 @@ +import pyan + + +if __name__ == "__main__": + pyan.main() diff --git a/pyan3 b/pyan3 deleted file mode 100755 index fe3aa49..0000000 --- a/pyan3 +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import re -import sys - -from pyan import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/setup.py b/setup.py index 7368d51..d943c60 100644 --- a/setup.py +++ b/setup.py @@ -152,10 +152,14 @@ setup( # packages=["pyan"], - scripts=["pyan3"], - zip_safe=True, # Custom data files not inside a Python package - data_files=datafiles + data_files=datafiles, + + entry_points={ + 'console_scripts': [ + 'pyan3 = pyan.main:main', + ] + }, ) diff --git a/visualize_pyan_architecture.sh b/visualize_pyan_architecture.sh index f7471c4..22c6334 100755 --- a/visualize_pyan_architecture.sh +++ b/visualize_pyan_architecture.sh @@ -1,4 +1,4 @@ #!/bin/bash echo -ne "Pyan architecture: generating architecture.{dot,svg}\n" -./pyan.py pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log +python3 -m pyan pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log dot -Tsvg architecture.dot >architecture.svg From 20798ee699c3150c18d19c533d86f27d7386d278 Mon Sep 17 00:00:00 2001 From: Mantas Date: Fri, 8 May 2020 16:32:13 +0300 Subject: [PATCH 054/117] Check if list of file globs matches at least one file When given glob does not match any files, pyan3 exits without telling anything and generated an empty graph. That might be confusing, when an incorrect file path is given. To avoid confusion, added additional check if glob matches at least one file. Now it works like this: > pyan3 --dot -f /tmp/graph.dot does/not/exist.py Usage: pyan3 FILENAME... [--dot|--tgf|--yed] pyan3: error: No files found matching given glob: does/not/exist.py --- pyan/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyan/main.py b/pyan/main.py index e78e17c..7c7e275 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -80,6 +80,8 @@ def main(): filenames = [fn2 for fn in args for fn2 in glob(fn)] if len(args) == 0: parser.error('Need one or more filenames to process') + if len(args) > 0 and len(filenames) == 0: + parser.error('No files found matching given glob: %s' % ' '.join(args)) if options.nested_groups: options.grouped = True From 24442a71d213fbeded73bd141e11b0f525591375 Mon Sep 17 00:00:00 2001 From: Rakan Alanazi Date: Sat, 9 May 2020 02:11:07 -0500 Subject: [PATCH 055/117] resolve relative imports --- pyan/analyzer.py | 54 +++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 44d4adc..d35a10e 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -380,15 +380,13 @@ class CallGraphVisitor(ast.NodeVisitor): from_node = self.get_node_of_current_namespace() # resolve relative imports 'None' such as "from . import foo" if node.module is None: - self.logger.debug("Old ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] tgt_name = current_module_namespace - node.module = tgt_name - node.level = 0 - self.logger.debug("New ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) - if node.module: # import some names from a module + else: # import some names from a module # TODO: This works only for absolute imports. # # Relative imports such as "from .mod import foo" and @@ -397,37 +395,33 @@ class CallGraphVisitor(ast.NodeVisitor): # with node.level). # # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#ImportFrom - # pyan can handel Relative imports such as "from .mod import foo" and "from ..mod import foo" + # pyan can handle Relative imports such as "from .mod import foo" and "from ..mod import foo" if node.level != 0: - self.logger.debug("Old ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug("ImportFrom (original): from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] - node.module = current_module_namespace+'.'+node.module - self.logger.debug("New ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) - - tgt_name = node.module - - to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace - self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) - - if tgt_name in self.module_names: - mod_name = self.module_names[tgt_name] + tgt_name = current_module_namespace + '.' + node.module + self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) else: - mod_name = tgt_name + tgt_name = node.module - for import_item in node.names: # the names are items inside the module - name = import_item.name - new_name = import_item.asname if import_item.asname is not None else name + to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace + self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) + if self.add_uses_edge(from_node, to_node): + self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) + + if tgt_name in self.module_names: + mod_name = self.module_names[tgt_name] + else: + mod_name = tgt_name + + for import_item in node.names: # the names are items inside the module + name = import_item.name + new_name = import_item.asname if import_item.asname is not None else name # we imported the identifier name from the module mod_name - tgt_id = self.get_node(mod_name, name, node, flavor=Flavor.IMPORTEDITEM) - self.set_value(new_name, tgt_id) - self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) - - #else: # module name missing = "from . import ..." - # for import_item in node.names: # the names are modules - # self.analyze_module_import(import_item, node) + tgt_id = self.get_node(mod_name, name, node, flavor=Flavor.IMPORTEDITEM) + self.set_value(new_name, tgt_id) + self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) def analyze_module_import(self, import_item, ast_node): """Analyze a names AST node inside an Import or ImportFrom AST node. From 550e4e6864fa726ed770dd6a47482607465888f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Eduardo=20Montenegro=20Cavalcanti=20de=20Olive?= =?UTF-8?q?ira?= Date: Mon, 18 May 2020 15:08:13 -0300 Subject: [PATCH 056/117] Renames pyan.py for pyan3 --- visualize_pyan_architecture.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visualize_pyan_architecture.sh b/visualize_pyan_architecture.sh index f7471c4..6fa76db 100755 --- a/visualize_pyan_architecture.sh +++ b/visualize_pyan_architecture.sh @@ -1,4 +1,4 @@ #!/bin/bash echo -ne "Pyan architecture: generating architecture.{dot,svg}\n" -./pyan.py pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log +./pyan3 pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log dot -Tsvg architecture.dot >architecture.svg From e32f400871c26f1670d55c2881917b5a6b8b2045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Eduardo=20Montenegro=20Cavalcanti=20de=20Olive?= =?UTF-8?q?ira?= Date: Mon, 18 May 2020 15:14:00 -0300 Subject: [PATCH 057/117] Adds wheel to the setup requirement --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7368d51..34f8123 100644 --- a/setup.py +++ b/setup.py @@ -136,7 +136,7 @@ setup( # See # http://setuptools.readthedocs.io/en/latest/setuptools.html # - setup_requires=[], + setup_requires=["wheel"], install_requires=[], provides=["pyan"], From 4475e767cf51878b30a96ceb15603a0c64be5ed0 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Tue, 15 Sep 2020 12:35:44 +0300 Subject: [PATCH 058/117] Update visualize_pyan_architecture.sh --- visualize_pyan_architecture.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visualize_pyan_architecture.sh b/visualize_pyan_architecture.sh index 36ed24f..22c6334 100755 --- a/visualize_pyan_architecture.sh +++ b/visualize_pyan_architecture.sh @@ -1,4 +1,4 @@ #!/bin/bash echo -ne "Pyan architecture: generating architecture.{dot,svg}\n" python3 -m pyan pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log -dot -Tsvg architecture.dot >architecture.svg \ No newline at end of file +dot -Tsvg architecture.dot >architecture.svg From bee5eda0d0fe4c28555bbec4e7c65caafe081dc0 Mon Sep 17 00:00:00 2001 From: Ioannis Filippidis Date: Tue, 15 Sep 2020 17:26:51 +0300 Subject: [PATCH 059/117] DOC: rm trailing whitespace --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 647da60..7dda6f8 100644 --- a/README.md +++ b/README.md @@ -211,4 +211,3 @@ This Python 3 port, analyzer expansion, and additional refactoring by Juha Jeron # License [GPL v2](LICENSE.md), as per [comments here](https://ejrh.wordpress.com/2012/08/18/coloured-call-graphs/). - From 7b8de6e81cbd3bbb80ba3f0bbcb2f048dfcb5692 Mon Sep 17 00:00:00 2001 From: Ioannis Filippidis Date: Tue, 15 Sep 2020 17:27:12 +0300 Subject: [PATCH 060/117] STY: rm trailing whitespace --- pyan/analyzer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 34b2498..cbcef9b 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -474,7 +474,7 @@ class CallGraphVisitor(ast.NodeVisitor): # resolve relative imports 'None' such as "from . import foo" if node.module is None: self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) - tgt_level = node.level + tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] tgt_name = current_module_namespace self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) @@ -491,7 +491,7 @@ class CallGraphVisitor(ast.NodeVisitor): # pyan can handle Relative imports such as "from .mod import foo" and "from ..mod import foo" if node.level != 0: self.logger.debug("ImportFrom (original): from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) - tgt_level = node.level + tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] tgt_name = current_module_namespace + '.' + node.module self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) @@ -1656,7 +1656,7 @@ class CallGraphVisitor(ast.NodeVisitor): # What about incoming uses edges? E.g. consider a lambda that is saved # in an instance variable, then used elsewhere. How do we want the # graph to look like in that case? - + # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) # temporary solution is adding list to force a copy of 'self.nodes' for name in list(self.nodes): From 315091a39986fbd5674e0164dbc7c9d9ee341bb5 Mon Sep 17 00:00:00 2001 From: Ioannis Filippidis Date: Tue, 15 Sep 2020 17:37:22 +0300 Subject: [PATCH 061/117] PEP8: shorter lines --- setup.py | 63 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/setup.py b/setup.py index 365e127..b070168 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,8 @@ from setuptools import setup # Name of the top-level package of the library. # -# This is also the top level of its source tree, relative to the top-level project directory setup.py resides in. +# This is also the top level of its source tree, +# relative to the top-level project directory setup.py resides in. # libname = "pyan" @@ -38,14 +39,18 @@ SHORTDESC = "Offline call graph generator for Python 3" # Long description for package homepage on PyPI # -DESC = """Generate approximate call graphs for Python programs. - -Pyan takes one or more Python source files, performs a (rather superficial) static analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output for rendering by GraphViz or yEd. -""" +DESC = ( + 'Generate approximate call graphs for Python programs.\n' + '\n' + 'Pyan takes one or more Python source files, performs a ' + '(rather superficial) static analysis, and constructs a directed graph of ' + 'the objects in the combined source, and how they define or ' + 'use each other. The graph can be output for rendering by GraphViz or yEd.') # Set up data files for packaging. # -# Directories (relative to the top-level directory where setup.py resides) in which to look for data files. +# Directories (relative to the top-level directory where setup.py resides) in +# which to look for data files. datadirs = () # File extensions to be considered as data files. (Literal, no wildcards.) @@ -53,8 +58,10 @@ dataexts = (".py", ".ipynb", ".sh", ".lyx", ".tex", ".txt", ".pdf") # Standard documentation to detect (and package if it exists). # -standard_docs = ["README", "LICENSE", "TODO", "CHANGELOG", "AUTHORS"] # just the basename without file extension -standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for GitHub projects, but other projects may use .rst or .txt (or even blank). +standard_docs = ["README", "LICENSE", "TODO", "CHANGELOG", "AUTHORS"] + # just the basename without file extension +standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for + # GitHub projects, but other projects may use .rst or .txt (or even blank). ######################################################### # Init @@ -62,12 +69,13 @@ standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for GitHub proje # Gather user-defined data files # -# http://stackoverflow.com/questions/13628979/setuptools-how-to-make-package-contain-extra-data-folder-and-all-folders-inside +# https://stackoverflow.com/q/13628979/1959808 # datafiles = [] #getext = lambda filename: os.path.splitext(filename)[1] #for datadir in datadirs: -# datafiles.extend( [(root, [os.path.join(root, f) for f in files if getext(f) in dataexts]) +# datafiles.extend( [(root, [os.path.join(root, f) +# for f in files if getext(f) in dataexts]) # for root, dirs, files in os.walk(datadir)] ) # Add standard documentation (README et al.), if any, to data files @@ -75,15 +83,17 @@ datafiles = [] detected_docs = [] for docname in standard_docs: for ext in standard_doc_exts: - filename = "".join((docname, ext)) # relative to the directory in which setup.py resides + filename = "".join((docname, ext)) # relative to the directory in + # which setup.py resides if os.path.isfile(filename): detected_docs.append(filename) datafiles.append(('.', detected_docs)) # Extract __version__ from the package __init__.py -# (since it's not a good idea to actually run __init__.py during the build process). +# (since it's not a good idea to actually run __init__.py during the +# build process). # -# http://stackoverflow.com/questions/2058802/how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package +# https://stackoverflow.com/q/2058802/1959808 # init_py_path = os.path.join('pyan', '__init__.py') version = '0.0.unknown' @@ -94,9 +104,17 @@ try: version = ast.parse(line).body[0].value.s break else: - print("WARNING: Version information not found in '%s', using placeholder '%s'" % (init_py_path, version), file=sys.stderr) + print(( + "WARNING: Version information not found in " + "'{path}', using placeholder '{version}'").format( + path=init_py_path, version=version), + file=sys.stderr) except FileNotFoundError: - print("WARNING: Could not find file '%s', using placeholder version information '%s'" % (init_py_path, version), file=sys.stderr) + print(( + "WARNING: Could not find file '{path}', " + "using placeholder version information '{version}'").format( + path=init_py_path, version=version), + file=sys.stderr) ######################################################### # Call setup() @@ -114,7 +132,8 @@ setup( license="GPL 2.0", - # free-form text field; http://stackoverflow.com/questions/34994130/what-platforms-argument-to-setup-in-setup-py-does + # free-form text field; + # https://stackoverflow.com/q/34994130/1959808 platforms=["Linux"], # See @@ -125,7 +144,8 @@ setup( classifiers=["Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", - "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", + ("License :: OSI Approved :: " + "GNU General Public License v2 (GPLv2)"), "Operating System :: POSIX :: Linux", "Programming Language :: Python", "Programming Language :: Python :: 3", @@ -142,13 +162,16 @@ setup( # keywords for PyPI (in case you upload your project) # - # e.g. the keywords your project uses as topics on GitHub, minus "python" (if there) + # e.g. the keywords your project uses as topics on GitHub, + # minus "python" (if there) # keywords=["call-graph", "static-code-analysis"], - # Declare packages so that python -m setup build will copy .py files (especially __init__.py). + # Declare packages so that python -m setup build will copy .py files + # (especially __init__.py). # - # This **does not** automatically recurse into subpackages, so they must also be declared. + # This **does not** automatically recurse into subpackages, + # so they must also be declared. # packages=["pyan"], From 4ad92e682b9e694d25b9839bb67c004fca90ff96 Mon Sep 17 00:00:00 2001 From: Ioannis Filippidis Date: Wed, 16 Sep 2020 07:21:44 +0300 Subject: [PATCH 062/117] BUG: add missing import from `typing` --- pyan/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyan/__init__.py b/pyan/__init__.py index 8a09cfc..30236fd 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import io from glob import glob +from typing import List, Union from .main import main from .analyzer import CallGraphVisitor From d524f657d2abc939676785f3abb2962cac5cc87a Mon Sep 17 00:00:00 2001 From: Ioannis Filippidis Date: Wed, 16 Sep 2020 07:23:11 +0300 Subject: [PATCH 063/117] MAI: correct help text of argument parser options --- pyan/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyan/main.py b/pyan/main.py index 67415e0..cfda6be 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -32,10 +32,10 @@ def main(): help="output in Trivial Graph Format") parser.add_option("--svg", action="store_true", default=False, - help="output in HTML Format") + help="output in SVG Format") parser.add_option("--html", action="store_true", default=False, - help="output in SVG Format") + help="output in HTML Format") parser.add_option("--yed", action="store_true", default=False, help="output in yEd GraphML Format") From 24cce1ea49255a94a1b15342a46afd68effd9fda Mon Sep 17 00:00:00 2001 From: Jan Malek Date: Sun, 4 Oct 2020 13:10:08 +0100 Subject: [PATCH 064/117] Argparse argument-handling for main.py Updates the default CLI argument handling from Optparse to Argparse. Also adds an optional argument to inject the CLI args as a list of strings to facilitate testing/programmatic usage. --- pyan/main.py | 356 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 240 insertions(+), 116 deletions(-) diff --git a/pyan/main.py b/pyan/main.py index 67415e0..137fdda 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- """ pyan.py - Generate approximate call graphs for Python programs. - This program takes one or more Python source files, does a superficial analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output @@ -11,157 +10,282 @@ import logging from glob import glob -from optparse import OptionParser # TODO: migrate to argparse +from argparse import ArgumentParser from .analyzer import CallGraphVisitor from .visgraph import VisualGraph from .writers import TgfWriter, DotWriter, YedWriter, HTMLWriter, SVGWriter +def main(cli_args=None): + usage = """%(prog)s FILENAME... [--dot|--tgf|--yed|--svg|--html]""" + desc = ( + 'Analyse one or more Python source files and generate an' + 'approximate call graph of the modules, classes and functions' + ' within them.' + ) -def main(): - usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" - desc = ('Analyse one or more Python source files and generate an' - 'approximate call graph of the modules, classes and functions' - ' within them.') - parser = OptionParser(usage=usage, description=desc) - parser.add_option("--dot", - action="store_true", default=False, - help="output in GraphViz dot format") - parser.add_option("--tgf", - action="store_true", default=False, - help="output in Trivial Graph Format") - parser.add_option("--svg", - action="store_true", default=False, - help="output in HTML Format") - parser.add_option("--html", - action="store_true", default=False, - help="output in SVG Format") - parser.add_option("--yed", - action="store_true", default=False, - help="output in yEd GraphML Format") - parser.add_option("--file", dest="filename", - help="write graph to FILE", metavar="FILE", default=None) - parser.add_option("--namespace", dest="namespace", - help="filter for NAMESPACE", metavar="NAMESPACE", default=None) - parser.add_option("--function", dest="function", - help="filter for FUNCTION", metavar="FUNCTION", default=None) - parser.add_option("-l", "--log", dest="logname", - help="write log to LOG", metavar="LOG") - parser.add_option("-v", "--verbose", - action="store_true", default=False, dest="verbose", - help="verbose output") - parser.add_option("-V", "--very-verbose", - action="store_true", default=False, dest="very_verbose", - help="even more verbose output (mainly for debug)") - parser.add_option("-d", "--defines", - action="store_true", default=True, dest="draw_defines", - help="add edges for 'defines' relationships [default]") - parser.add_option("-n", "--no-defines", - action="store_false", default=True, dest="draw_defines", - help="do not add edges for 'defines' relationships") - parser.add_option("-u", "--uses", - action="store_true", default=True, dest="draw_uses", - help="add edges for 'uses' relationships [default]") - parser.add_option("-N", "--no-uses", - action="store_false", default=True, dest="draw_uses", - help="do not add edges for 'uses' relationships") - parser.add_option("-c", "--colored", - action="store_true", default=False, dest="colored", - help="color nodes according to namespace [dot only]") - parser.add_option("-G", "--grouped-alt", - action="store_true", default=False, dest="grouped_alt", - help="suggest grouping by adding invisible defines edges [only useful with --no-defines]") - parser.add_option("-g", "--grouped", - action="store_true", default=False, dest="grouped", - help="group nodes (create subgraphs) according to namespace [dot only]") - parser.add_option("-e", "--nested-groups", - action="store_true", default=False, dest="nested_groups", - help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]") - parser.add_option("--dot-rankdir", default="TB", dest="rankdir", - help=( - "specifies the dot graph 'rankdir' property for " - "controlling the direction of the graph. " - "Allowed values: ['TB', 'LR', 'BT', 'RL']. " - "[dot only]")) - parser.add_option("-a", "--annotated", - action="store_true", default=False, dest="annotated", - help="annotate with module and source line number") + parser = ArgumentParser(usage=usage, description=desc) - options, args = parser.parse_args() - filenames = [fn2 for fn in args for fn2 in glob(fn)] - if len(args) == 0: + parser.add_argument( + "--dot", + default=False, + help="output in GraphViz dot format" + ) + + parser.add_argument( + "--tgf", + action="store_true", + default=False, + help="output in Trivial Graph Format" + ) + + parser.add_argument( + "--svg", + default=False, + help="output in SVG Format" + ) + + parser.add_argument( + "--html", + action="store_true", + default=False, + help="output in HTML Format" + ) + + parser.add_argument( + "--yed", + action="store_true", + default=False, + help="output in yEd GraphML Format" + ) + + parser.add_argument( + "--file", + dest="filename", + help="write graph to FILE", + metavar="FILE", + default=None + ) + + parser.add_argument( + "--namespace", + dest="namespace", + help="filter for NAMESPACE", + metavar="NAMESPACE", + default=None + ) + + parser.add_argument( + "--function", + dest="function", + help="filter for FUNCTION", + metavar="FUNCTION", + default=None + ) + + parser.add_argument( + "-l", "--log", + dest="logname", + help="write log to LOG", + metavar="LOG" + ) + + parser.add_argument( + "-v", "--verbose", + action="store_true", + default=False, + dest="verbose", + help="verbose output" + ) + + parser.add_argument( + "-V", "--very-verbose", + action="store_true", + default=False, + dest="very_verbose", + help="even more verbose output (mainly for debug)" + ) + + parser.add_argument( + "-d", "--defines", + action="store_true", + dest="draw_defines", + help="add edges for 'defines' relationships [default]" + ) + + parser.add_argument( + "-n", "--no-defines", + action="store_false", + default=True, + dest="draw_defines", + help="do not add edges for 'defines' relationships" + ) + + parser.add_argument( + "-u", "--uses", + action="store_true", + default=True, + dest="draw_uses", + help="add edges for 'uses' relationships [default]" + ) + + parser.add_argument( + "-N", "--no-uses", + action="store_false", + default=True, + dest="draw_uses", + help="do not add edges for 'uses' relationships" + ) + + parser.add_argument( + "-c", "--colored", + action="store_true", + default=False, + dest="colored", + help="color nodes according to namespace [dot only]" + ) + + parser.add_argument( + "-G", "--grouped-alt", + action="store_true", + default=False, + dest="grouped_alt", + help="suggest grouping by adding invisible defines edges [only useful with --no-defines]" + ) + + parser.add_argument( + "-g", "--grouped", + action="store_true", + default=False, + dest="grouped", + help="group nodes (create subgraphs) according to namespace [dot only]" + ) + + parser.add_argument( + "-e", "--nested-groups", + action="store_true", + default=False, + dest="nested_groups", + help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]" + ) + + parser.add_argument( + "--dot-rankdir", + default="TB", + dest="rankdir", + help=( + "specifies the dot graph 'rankdir' property for " + "controlling the direction of the graph. " + "Allowed values: ['TB', 'LR', 'BT', 'RL']. " + "[dot only]" + ) + ) + + parser.add_argument( + "-a", "--annotated", + action="store_true", + default=False, + dest="annotated", + help="annotate with module and source line number" + ) + + known_args, unknown_args = parser.parse_known_args(cli_args) + + filenames = [fn2 for fn in unknown_args for fn2 in glob(fn)] + + if len(unknown_args) == 0: parser.error('Need one or more filenames to process') - if len(args) > 0 and len(filenames) == 0: - parser.error('No files found matching given glob: %s' % ' '.join(args)) + elif len(filenames) == 0: + parser.error('No files found matching given glob: %s' % ' '.join(unknown_args)) - if options.nested_groups: - options.grouped = True + if known_args.nested_groups: + known_args.grouped = True graph_options = { - 'draw_defines': options.draw_defines, - 'draw_uses': options.draw_uses, - 'colored': options.colored, - 'grouped_alt' : options.grouped_alt, - 'grouped': options.grouped, - 'nested_groups': options.nested_groups, - 'annotated': options.annotated} + 'draw_defines': known_args.draw_defines, + 'draw_uses': known_args.draw_uses, + 'colored': known_args.colored, + 'grouped_alt' : known_args.grouped_alt, + 'grouped': known_args.grouped, + 'nested_groups': known_args.nested_groups, + 'annotated': known_args.annotated + } # TODO: use an int argument for verbosity logger = logging.getLogger(__name__) - if options.very_verbose: + + if known_args.very_verbose: logger.setLevel(logging.DEBUG) - elif options.verbose: + + elif known_args.verbose: logger.setLevel(logging.INFO) + else: logger.setLevel(logging.WARN) + logger.addHandler(logging.StreamHandler()) - if options.logname: - handler = logging.FileHandler(options.logname) + + if known_args.logname: + handler = logging.FileHandler(known_args.logname) logger.addHandler(handler) v = CallGraphVisitor(filenames, logger) - if options.function or options.namespace: - if options.function: - function_name = options.function.split(".")[-1] - namespace = ".".join(options.function.split(".")[:-1]) + + if known_args.function or known_args.namespace: + + if known_args.function: + function_name = known_args.function.split(".")[-1] + namespace = ".".join(known_args.function.split(".")[:-1]) node = v.get_node(namespace, function_name) + else: node = None - v.filter(node=node, namespace=options.namespace) + + v.filter(node=node, namespace=known_args.namespace) + graph = VisualGraph.from_visitor(v, options=graph_options, logger=logger) - if options.dot: + writer = None + + if known_args.dot: writer = DotWriter( - graph, - options=['rankdir='+options.rankdir], - output=options.filename, - logger=logger) - writer.run() + graph, + options=['rankdir='+known_args.rankdir], + output=known_args.filename, + logger=logger + ) - if options.html: + if known_args.html: writer = HTMLWriter( - graph, - options=['rankdir='+options.rankdir], - output=options.filename, - logger=logger) - writer.run() + graph, + options=['rankdir='+known_args.rankdir], + output=known_args.filename, + logger=logger + ) - if options.svg: + if known_args.svg: writer = SVGWriter( - graph, - options=['rankdir='+options.rankdir], - output=options.filename, - logger=logger) - writer.run() + graph, + options=['rankdir='+known_args.rankdir], + output=known_args.filename, + logger=logger + ) - if options.tgf: + if known_args.tgf: writer = TgfWriter( - graph, output=options.filename, logger=logger) - writer.run() + graph, + output=known_args.filename, + logger=logger + ) - if options.yed: + if known_args.yed: writer = YedWriter( - graph, output=options.filename, logger=logger) + graph, + output=known_args.filename, + logger=logger + ) + + if writer: writer.run() From f16d2995dcfb2195498fbdbbb83f812946fb7286 Mon Sep 17 00:00:00 2001 From: Jan Malek Date: Sun, 4 Oct 2020 13:11:29 +0100 Subject: [PATCH 065/117] Restores a stray bit of whitespace. --- pyan/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyan/main.py b/pyan/main.py index 137fdda..b931e43 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """ pyan.py - Generate approximate call graphs for Python programs. + This program takes one or more Python source files, does a superficial analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output From 6bc63e7fc12ae4d0150a482ba34fa873e64d4ed7 Mon Sep 17 00:00:00 2001 From: Jan Malek Date: Sun, 4 Oct 2020 13:17:36 +0100 Subject: [PATCH 066/117] Restores some store_trues that got eaten. --- pyan/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyan/main.py b/pyan/main.py index b931e43..c5af354 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ pyan.py - Generate approximate call graphs for Python programs. - + This program takes one or more Python source files, does a superficial analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output @@ -29,6 +29,7 @@ def main(cli_args=None): parser.add_argument( "--dot", + action="store_true", default=False, help="output in GraphViz dot format" ) @@ -42,6 +43,7 @@ def main(cli_args=None): parser.add_argument( "--svg", + action="store_true", default=False, help="output in SVG Format" ) From 038757ba7fb99351e3349cae3bc29865953532ae Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 13 Oct 2020 14:49:17 +0100 Subject: [PATCH 067/117] resolve from imports including from . import foo --- pyan/__init__.py | 1 + pyan/analyzer.py | 56 +++++++++++------------------------------------- 2 files changed, 14 insertions(+), 43 deletions(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 8a09cfc..8745027 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +from typing import Union, List import io from glob import glob diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 34b2498..28b4292 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -46,12 +46,9 @@ class CallGraphVisitor(ast.NodeVisitor): self.logger = logger or logging.getLogger(__name__) # full module names for all given files - self.module_names = {} self.module_to_filename = {} # inverse mapping for recording which file each AST node came from for filename in filenames: mod_name = get_module_name(filename) - short_name = mod_name.rsplit('.', 1)[-1] - self.module_names[short_name] = mod_name self.module_to_filename[mod_name] = filename self.filenames = filenames @@ -471,8 +468,7 @@ class CallGraphVisitor(ast.NodeVisitor): # # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time from_node = self.get_node_of_current_namespace() - # resolve relative imports 'None' such as "from . import foo" - if node.module is None: + if node.module is None: # resolve relative imports 'None' such as "from . import foo" self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] @@ -503,19 +499,6 @@ class CallGraphVisitor(ast.NodeVisitor): if self.add_uses_edge(from_node, to_node): self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) - if tgt_name in self.module_names: - mod_name = self.module_names[tgt_name] - else: - mod_name = tgt_name - - for import_item in node.names: # the names are items inside the module - name = import_item.name - new_name = import_item.asname if import_item.asname is not None else name - # we imported the identifier name from the module mod_name - tgt_id = self.get_node(mod_name, name, node, flavor=Flavor.IMPORTEDITEM) - self.set_value(new_name, tgt_id) - self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) - def analyze_module_import(self, import_item, ast_node): """Analyze a names AST node inside an Import or ImportFrom AST node. @@ -525,39 +508,26 @@ class CallGraphVisitor(ast.NodeVisitor): ast_node: for recording source location information """ src_name = import_item.name # what is being imported - tgt_name = import_item.asname if import_item.asname is not None else src_name # under which name # mark the use site # # where it is being imported to, i.e. the **user** from_node = self.get_node_of_current_namespace() # the thing **being used** (under the asname, if any) - to_node = self.get_node('', tgt_name, ast_node, flavor=Flavor.IMPORTEDITEM) - - is_new_edge = self.add_uses_edge(from_node, to_node) - - # bind asname in the current namespace to the imported module - # - # conversion: possible short name -> fully qualified name - # (when analyzing a set of files in the same directory) - if src_name in self.module_names: - mod_name = self.module_names[src_name] + mod_node = self.get_node('', src_name, ast_node, flavor=Flavor.MODULE) + # if there is alias, add extra edge between alias and node + if import_item.asname is not None: + alias_name = import_item.asname else: - mod_name = src_name - tgt_module = self.get_node('', mod_name, ast_node, flavor=Flavor.MODULE) - # XXX: if there is no asname, it may happen that mod_name == tgt_name, - # in which case these will be the same Node. They are semantically - # distinct (Python name at receiving end, vs. module), but currently - # Pyan has no way of retaining that information. - if to_node is tgt_module: - to_node.flavor = Flavor.MODULE - self.set_value(tgt_name, tgt_module) + alias_name = mod_node.name + self.add_uses_edge(from_node, mod_node) + self.logger.info( + "New edge added for Use import %s in %s" + % (mod_node, from_node) + ) + self.set_value(alias_name, mod_node) # set node to be discoverable in module + self.logger.info("From setting name %s to %s" % (alias_name, mod_node)) - # must do this after possibly munging flavor to avoid confusing - # the user reading the log - self.logger.debug("Use from %s to Import %s" % (from_node, to_node)) - if is_new_edge: - self.logger.info("New edge added for Use from %s to Import %s" % (from_node, to_node)) # Edmund Horner's original post has info on what this fixed in Python 2. # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ From faaee6407c7a42d3009248bfccff8edb23174fd0 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 15 Oct 2020 17:37:26 +0100 Subject: [PATCH 068/117] ensure that modules are always defined and add namespace for constants --- pyan/analyzer.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 28b4292..bfe8ab2 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -268,6 +268,9 @@ class CallGraphVisitor(ast.NodeVisitor): self.name_stack.pop() self.last_value = None + if self.add_defines_edge(module_node, None): + self.logger.info("Def Module %s" % node) + def visit_ClassDef(self, node): self.logger.debug("ClassDef %s, %s:%s" % (node.name, self.filename, node.lineno)) @@ -541,8 +544,9 @@ class CallGraphVisitor(ast.NodeVisitor): def visit_Constant(self, node): self.logger.debug("Constant %s, %s:%s" % (node.value, self.filename, node.lineno)) t = type(node.value) + ns = self.get_node_of_current_namespace().get_name() tn = t.__name__ - self.last_value = self.get_node('', tn, node) + self.last_value = self.get_node(ns, tn, node, flavor=Flavor.ATTRIBUTE) # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Attribute(self, node): @@ -1410,13 +1414,15 @@ class CallGraphVisitor(ast.NodeVisitor): def add_defines_edge(self, from_node, to_node): """Add a defines edge in the graph between two nodes. N.B. This will mark both nodes as defined.""" - + status = False if from_node not in self.defines_edges: self.defines_edges[from_node] = set() - if to_node in self.defines_edges[from_node]: - return False - self.defines_edges[from_node].add(to_node) + status = True from_node.defined = True + if to_node is None or to_node in self.defines_edges[from_node]: + status = status or False + return status + self.defines_edges[from_node].add(to_node) to_node.defined = True return True From 25b014fd2ec2b1f4cd3af1401253b8fbe5841864 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 15 Oct 2020 17:37:40 +0100 Subject: [PATCH 069/117] Add very basic tests --- pytest.ini | 15 +++++++ tests/test_analyzer.py | 55 +++++++++++++++++++++++ tests/test_code/__init__.py | 0 tests/test_code/submodule1.py | 23 ++++++++++ tests/test_code/submodule2.py | 7 +++ tests/test_code/subpackage1/__init__.py | 3 ++ tests/test_code/subpackage1/submodule1.py | 7 +++ 7 files changed, 110 insertions(+) create mode 100644 pytest.ini create mode 100644 tests/test_analyzer.py create mode 100644 tests/test_code/__init__.py create mode 100644 tests/test_code/submodule1.py create mode 100644 tests/test_code/submodule2.py create mode 100644 tests/test_code/subpackage1/__init__.py create mode 100644 tests/test_code/subpackage1/submodule1.py diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..2c6c8ef --- /dev/null +++ b/pytest.ini @@ -0,0 +1,15 @@ +[pytest] +addopts = + -rsxX + -vv + + --cov-config=.coveragerc + --cov=pyan + --cov-report=html + --cov-report=term-missing:skip-covered + --no-cov-on-fail +testpaths = tests/ +log_cli_level = ERROR +log_format = %(asctime)s %(levelname)s %(message)s +log_date_format = %Y-%m-%d %H:%M:%S +cache_dir = .cache diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py new file mode 100644 index 0000000..88e5373 --- /dev/null +++ b/tests/test_analyzer.py @@ -0,0 +1,55 @@ +import logging +from glob import glob +import os +import pytest + +from pyan.analyzer import CallGraphVisitor + +@pytest.fixture +def callgraph(): + filenames = glob(os.path.join(os.path.dirname(__file__), "test_code/**/*.py"), recursive=True) + v = CallGraphVisitor(filenames, logger=logging.getLogger()) + return v + + +def get_node(nodes, name): + filtered_nodes = [node for node in nodes if node.get_name() == name] + assert len(filtered_nodes) == 1, f"Node with name {name} should exist" + return filtered_nodes[0] + +def get_in_dict(node_dict, name): + return node_dict[get_node(node_dict.keys(), name)] + + + +def test_resolve_import_as(callgraph): + imports = get_in_dict(callgraph.uses_edges, "test_code.submodule2") + get_node(imports, "test_code.submodule1") + assert len(imports) == 1, "only one effective import" + + + imports = get_in_dict(callgraph.uses_edges, "test_code.submodule1") + get_node(imports, "test_code.subpackage1.submodule1.A") + get_node(imports, "test_code.subpackage1") + + +def test_import_relative(callgraph): + imports = get_in_dict(callgraph.uses_edges, "test_code.subpackage1.submodule1") + get_node(imports, "test_code.submodule2.test_2") + + +def test_resolve_use_in_class(callgraph): + uses = get_in_dict(callgraph.uses_edges, "test_code.subpackage1.submodule1.A.__init__") + get_node(uses, "test_code.submodule2.test_2") + + +def test_resolve_use_in_function(callgraph): + uses = get_in_dict(callgraph.uses_edges, "test_code.submodule2.test_2") + get_node(uses, "test_code.submodule1.test_func1") + get_node(uses, "test_code.submodule1.test_func2") + + + + + + diff --git a/tests/test_code/__init__.py b/tests/test_code/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_code/submodule1.py b/tests/test_code/submodule1.py new file mode 100644 index 0000000..c7a7d31 --- /dev/null +++ b/tests/test_code/submodule1.py @@ -0,0 +1,23 @@ +from test_code.subpackage1 import A +from test_code import subpackage1 as subpackage + + +def test_func1(a): + return a + +def test_func2(a): + return a + + +class B: + + def __init__(self, k): + self.a = 1 + + + def to_A(self): + return A(self) + + def get_a_via_A(self): + return test_func1(self.to_A().b.a) + diff --git a/tests/test_code/submodule2.py b/tests/test_code/submodule2.py new file mode 100644 index 0000000..d5f66ca --- /dev/null +++ b/tests/test_code/submodule2.py @@ -0,0 +1,7 @@ +from . import submodule1 +import test_code.submodule1 as b + +A = 32 + +def test_2(a): + return submodule1.test_func2(a) + A + b.test_func1(a) \ No newline at end of file diff --git a/tests/test_code/subpackage1/__init__.py b/tests/test_code/subpackage1/__init__.py new file mode 100644 index 0000000..9b81aef --- /dev/null +++ b/tests/test_code/subpackage1/__init__.py @@ -0,0 +1,3 @@ +from test_code.subpackage1.submodule1 import A + +__all__ = ["A"] \ No newline at end of file diff --git a/tests/test_code/subpackage1/submodule1.py b/tests/test_code/subpackage1/submodule1.py new file mode 100644 index 0000000..10204f5 --- /dev/null +++ b/tests/test_code/subpackage1/submodule1.py @@ -0,0 +1,7 @@ + +from ..submodule2 import test_2 + +class A: + + def __init__(self, b): + self.b = test_2(b) \ No newline at end of file From 3539bb6d099c90a1f5125c2efc3b108e572beb32 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 May 2020 19:14:48 +0100 Subject: [PATCH 070/117] resolve `from` imports --- pyan/analyzer.py | 131 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 108 insertions(+), 23 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index bfe8ab2..97ee06a 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -147,6 +147,7 @@ class CallGraphVisitor(ast.NodeVisitor): # then remove any references pointing outside the analyzed file set. self.expand_unknowns() + self.resolve_imports() self.contract_nonexistents() self.cull_inherited() self.collapse_inner() @@ -159,6 +160,80 @@ class CallGraphVisitor(ast.NodeVisitor): # Python docs: # https://docs.python.org/3/library/ast.html#abstract-grammar + def resolve_imports(self): + """ + resolve relative imports and remap nodes + """ + # first find all imports and map to themselves. we will then remap those that are currently pointing + # to duplicates or into the void + imports_to_resolve = { + n + for items in self.nodes.values() + for n in items + if n.flavor == Flavor.IMPORTEDITEM + } + # map real definitions + import_mapping = {} + while len(imports_to_resolve) > 0: + from_node = imports_to_resolve.pop() + if from_node in import_mapping: + continue + to_uses = self.uses_edges.get(from_node, set([from_node])) + assert len(to_uses) == 1 + to_node = to_uses.pop() # resolve alias + # resolve namespace and get module + if to_node.namespace == "": + module_node = to_node + else: + assert from_node.name == to_node.name + module_node = self.get_node("", to_node.namespace) + module_uses = self.uses_edges.get(module_node) + if module_uses is not None: + # check if in module item exists and if yes, map to it + for candidate_to_node in module_uses: + if candidate_to_node.name == from_node.name: + to_node = candidate_to_node + import_mapping[from_node] = to_node + if to_node.flavor == Flavor.IMPORTEDITEM and from_node is not to_node: # avoid self-recursion + imports_to_resolve.add(to_node) + break + + # set previously undefined nodes to defined + # go through undefined attributes + attribute_import_mapping = {} + for nodes in self.nodes.values(): + for node in nodes: + if not node.defined and node.flavor == Flavor.ATTRIBUTE: + # try to resolve namespace and find imported item mapping + for from_node, to_node in import_mapping.items(): + if ( + f"{from_node.namespace}.{from_node.name}" == node.namespace + and from_node.flavor == Flavor.IMPORTEDITEM + ): + # use define edges as potential candidates + for candidate_to_node in self.defines_edges[to_node]: # + if candidate_to_node.name == node.name: + attribute_import_mapping[node] = candidate_to_node + break + import_mapping.update(attribute_import_mapping) + + # remap nodes based on import mapping + self.nodes = { + name: [import_mapping.get(n, n) for n in items] + for name, items in self.nodes.items() + } + self.uses_edges = { + import_mapping.get(from_node, from_node): { + import_mapping.get(to_node, to_node) for to_node in to_nodes + } + for from_node, to_nodes in self.uses_edges.items() if len(to_nodes) > 0 + } + self.defines_edges = { + import_mapping.get(from_node, from_node): { + import_mapping.get(to_node, to_node) for to_node in to_nodes + } + for from_node, to_nodes in self.defines_edges.items() if len(to_nodes) > 0 + } def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000): """ @@ -469,38 +544,48 @@ class CallGraphVisitor(ast.NodeVisitor): # resolve relative imports correctly. The current "here's a set of files, # analyze them" approach doesn't cut it. # + # As a solution, we register imports here and later, when all files have been parsed, resolve them. + # + # relative imports are currently not supported, i.e. `from ..mod import xy` is not correctly resolved + # # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time + # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#ImportFrom from_node = self.get_node_of_current_namespace() if node.module is None: # resolve relative imports 'None' such as "from . import foo" self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) - tgt_level = node.level + tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] tgt_name = current_module_namespace self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + elif node.level != 0: # resolve from ..module import foo + self.logger.debug("ImportFrom (original): from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + tgt_level = node.level + current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] + tgt_name = current_module_namespace + '.' + node.module + self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + else: + tgt_name = node.module # normal from module.submodule import foo - else: # import some names from a module - # TODO: This works only for absolute imports. - # - # Relative imports such as "from .mod import foo" and - # "from ..mod import foo" is treated incorrectly, since Pyan has - # no concept of Python packages (and doesn't know what to do - # with node.level). - # - # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#ImportFrom - # pyan can handle Relative imports such as "from .mod import foo" and "from ..mod import foo" - if node.level != 0: - self.logger.debug("ImportFrom (original): from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) - tgt_level = node.level - current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] - tgt_name = current_module_namespace + '.' + node.module - self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + # link each import separately + for alias in node.names: + # check if import is module + if tgt_name + "." + alias.name in self.module_to_filename: + to_node = self.get_node('', tgt_name + "." + alias.name, node, flavor=Flavor.MODULE) else: - tgt_name = node.module + to_node = self.get_node( + tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM + ) + # if there is alias, add extra edge between alias and node + if alias.asname is not None: + alias_name = alias.asname + else: + alias_name = alias.name + self.set_value(alias_name, to_node) # set node to be discoverable in module + self.logger.info("From setting name %s to %s" % (alias_name, to_node)) - to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace - self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) + self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) + if self.add_uses_edge(from_node, to_node): + self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) def analyze_module_import(self, import_item, ast_node): """Analyze a names AST node inside an Import or ImportFrom AST node. @@ -1632,7 +1717,7 @@ class CallGraphVisitor(ast.NodeVisitor): # What about incoming uses edges? E.g. consider a lambda that is saved # in an instance variable, then used elsewhere. How do we want the # graph to look like in that case? - + # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) # temporary solution is adding list to force a copy of 'self.nodes' for name in list(self.nodes): From 7237f818521cf9adab5e15c1ca0c44fd735b96a2 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 10 Dec 2020 14:33:51 +0000 Subject: [PATCH 071/117] Ensure that comments reflect support for relative imports --- pyan/analyzer.py | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 97ee06a..4cc23f9 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -530,26 +530,11 @@ class CallGraphVisitor(ast.NodeVisitor): def visit_ImportFrom(self, node): self.logger.debug("ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) - - # TODO: add support for relative imports (path may be like "....something.something") - - # HACK: support "from . import foo"...ish. This is very difficult - # to get right, so right now we don't even try to do it properly. - # - # We only special-case "from . import foo" so that it doesn't crash Pyan, - # and may even occasionally find the right module. - # - # Pyan would need to know the package structure, and how the program + # Pyan needs to know the package structure, and how the program # being analyzed is actually going to be invoked (!), to be able to - # resolve relative imports correctly. The current "here's a set of files, - # analyze them" approach doesn't cut it. + # resolve relative imports correctly. # # As a solution, we register imports here and later, when all files have been parsed, resolve them. - # - # relative imports are currently not supported, i.e. `from ..mod import xy` is not correctly resolved - # - # https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time - # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#ImportFrom from_node = self.get_node_of_current_namespace() if node.module is None: # resolve relative imports 'None' such as "from . import foo" self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) @@ -1505,7 +1490,6 @@ class CallGraphVisitor(ast.NodeVisitor): status = True from_node.defined = True if to_node is None or to_node in self.defines_edges[from_node]: - status = status or False return status self.defines_edges[from_node].add(to_node) to_node.defined = True @@ -1711,13 +1695,7 @@ class CallGraphVisitor(ast.NodeVisitor): # Lambdas and comprehensions do not define any names in the enclosing # scope, so we only need to treat the uses edges. - - # TODO: currently we handle outgoing uses edges only. - # - # What about incoming uses edges? E.g. consider a lambda that is saved - # in an instance variable, then used elsewhere. How do we want the - # graph to look like in that case? - + # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) # temporary solution is adding list to force a copy of 'self.nodes' for name in list(self.nodes): From 78f0bcddc0eed53cde29a9f58006a018ec0089a7 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 20:10:01 +0200 Subject: [PATCH 072/117] autopep8 and whitespace-cleanup --- pyan/analyzer.py | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 4cc23f9..a88af6e 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -59,17 +59,17 @@ class CallGraphVisitor(ast.NodeVisitor): self.scopes = {} # fully qualified name of namespace: Scope object self.class_base_ast_nodes = {} # pass 1: class Node: list of AST nodes - self.class_base_nodes = {} # pass 2: class Node: list of Node objects (local bases, no recursion) - self.mro = {} # pass 2: class Node: list of Node objects in Python's MRO order + self.class_base_nodes = {} # pass 2: class Node: list of Node objects (local bases, no recursion) + self.mro = {} # pass 2: class Node: list of Node objects in Python's MRO order # current context for analysis self.module_name = None self.filename = None - self.name_stack = [] # for building namespace name, node naming + self.name_stack = [] # for building namespace name, node naming self.scope_stack = [] # the Scope objects currently in scope self.class_stack = [] # Nodes for class definitions currently in scope self.context_stack = [] # for detecting which FunctionDefs are methods - self.last_value = None + self.last_value = None # Analyze. self.process() @@ -78,7 +78,7 @@ class CallGraphVisitor(ast.NodeVisitor): """Analyze the set of files, twice so that any forward-references are picked up.""" for pas in range(2): for filename in self.filenames: - self.logger.info("========== pass %d, file '%s' ==========" % (pas+1, filename)) + self.logger.info("========== pass %d, file '%s' ==========" % (pas + 1, filename)) self.process_one(filename) if pas == 0: self.resolve_base_classes() # must be done only after all files seen @@ -114,7 +114,7 @@ class CallGraphVisitor(ast.NodeVisitor): if isinstance(ast_node, ast.Name): baseclass_node = self.get_value(ast_node.id) elif isinstance(ast_node, ast.Attribute): - _,baseclass_node = self.get_attribute(ast_node) # don't care about obj, just grab attr + _, baseclass_node = self.get_attribute(ast_node) # don't care about obj, just grab attr else: # give up baseclass_node = None @@ -266,7 +266,7 @@ class CallGraphVisitor(ast.NodeVisitor): } return self - def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int =1000) -> set: + def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000) -> set: """ get nodes that related to `node` or are in `namespace` @@ -319,8 +319,8 @@ class CallGraphVisitor(ast.NodeVisitor): n for n in self.defines_edges.get(item, []) if n in self.defines_edges - and n not in new_nodes - and namespace in n.namespace + and n not in new_nodes + and namespace in n.namespace ] ) @@ -405,7 +405,7 @@ class CallGraphVisitor(ast.NodeVisitor): # method or a class method. (For a class method, it represents cls, # but Pyan only cares about types, not instances.) # - self_name,flavor = self.analyze_functiondef(node) + self_name, flavor = self.analyze_functiondef(node) # Now we can create the Node. # @@ -509,14 +509,14 @@ class CallGraphVisitor(ast.NodeVisitor): n = len(ast_args.defaults) for tgt, val in zip(ast_args.args[-n:], ast_args.defaults): targets = sanitize_exprs(tgt) - values = sanitize_exprs(val) + values = sanitize_exprs(val) self.analyze_binding(targets, values) if ast_args.kw_defaults: n = len(ast_args.kw_defaults) for tgt, val in zip(ast_args.kwonlyargs, ast_args.kw_defaults): if val is not None: targets = sanitize_exprs(tgt) - values = sanitize_exprs(val) + values = sanitize_exprs(val) self.analyze_binding(targets, values) def visit_Import(self, node): @@ -536,7 +536,7 @@ class CallGraphVisitor(ast.NodeVisitor): # # As a solution, we register imports here and later, when all files have been parsed, resolve them. from_node = self.get_node_of_current_namespace() - if node.module is None: # resolve relative imports 'None' such as "from . import foo" + if node.module is None: # resolve relative imports 'None' such as "from . import foo" self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) tgt_level = node.level current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] @@ -601,7 +601,6 @@ class CallGraphVisitor(ast.NodeVisitor): self.set_value(alias_name, mod_node) # set node to be discoverable in module self.logger.info("From setting name %s to %s" % (alias_name, mod_node)) - # Edmund Horner's original post has info on what this fixed in Python 2. # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ # @@ -639,7 +638,7 @@ class CallGraphVisitor(ast.NodeVisitor): elif isinstance(node.ctx, ast.Load): try: - obj_node,attr_node = self.get_attribute(node) + obj_node, attr_node = self.get_attribute(node) except UnresolvedSuperCallError: # Avoid adding a wildcard if the lookup failed due to an # unresolved super() in the attribute chain. @@ -825,7 +824,7 @@ class CallGraphVisitor(ast.NodeVisitor): outermost = gens[0] moregens = gens[1:] if len(gens) > 1 else [] - outermost_iters = sanitize_exprs(outermost.iter) + outermost_iters = sanitize_exprs(outermost.iter) outermost_targets = sanitize_exprs(outermost.target) for expr in outermost_iters: self.visit(expr) # set self.last_value (to something and hope for the best) @@ -840,7 +839,7 @@ class CallGraphVisitor(ast.NodeVisitor): # TODO: there's also an is_async field we might want to use in a future version of Pyan. for gen in moregens: targets = sanitize_exprs(gen.target) - values = sanitize_exprs(gen.iter) + values = sanitize_exprs(gen.iter) self.analyze_binding(targets, values) for expr in gen.ifs: self.visit(expr) @@ -953,7 +952,7 @@ class CallGraphVisitor(ast.NodeVisitor): or None if not applicable; and flavor is a Flavor, specifically one of FUNCTION, METHOD, STATICMETHOD or CLASSMETHOD.""" - if not isinstance(ast_node, (ast.AsyncFunctionDef , ast.FunctionDef)): + if not isinstance(ast_node, (ast.AsyncFunctionDef, ast.FunctionDef)): raise TypeError("Expected ast.FunctionDef; got %s" % (type(ast_node))) # Visit decorators @@ -1035,7 +1034,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit(value) # RHS -> set self.last_value captured_values.append(self.last_value) self.last_value = None - for tgt,val in zip(targets,captured_values): + for tgt, val in zip(targets, captured_values): self.last_value = val self.visit(tgt) # LHS, name in a store context self.last_value = None @@ -1137,7 +1136,7 @@ class CallGraphVisitor(ast.NodeVisitor): # ast.Attribute(attr=c, value=ast.Attribute(attr=b, value=a)) # if isinstance(ast_node.value, ast.Attribute): - obj_node,attr_name = self.resolve_attribute(ast_node.value) + obj_node, attr_name = self.resolve_attribute(ast_node.value) if isinstance(obj_node, Node) and obj_node.namespace is not None: ns = obj_node.get_name() # fully qualified namespace **of attr** @@ -1320,7 +1319,7 @@ class CallGraphVisitor(ast.NodeVisitor): if not isinstance(ast_node.ctx, ast.Load): raise ValueError("Expected a load context, got %s" % (type(ast_node.ctx))) - obj_node,attr_name = self.resolve_attribute(ast_node) + obj_node, attr_name = self.resolve_attribute(ast_node) if isinstance(obj_node, Node) and obj_node.namespace is not None: ns = obj_node.get_name() # fully qualified namespace **of attr** @@ -1377,7 +1376,7 @@ class CallGraphVisitor(ast.NodeVisitor): if not isinstance(new_value, Node): return False - obj_node,attr_name = self.resolve_attribute(ast_node) + obj_node, attr_name = self.resolve_attribute(ast_node) if isinstance(obj_node, Node) and obj_node.namespace is not None: ns = obj_node.get_name() # fully qualified namespace **of attr** @@ -1453,9 +1452,9 @@ class CallGraphVisitor(ast.NodeVisitor): def get_parent_node(self, graph_node): """Get the parent node of the given Node. (Used in postprocessing.)""" if '.' in graph_node.namespace: - ns,name = graph_node.namespace.rsplit('.', 1) + ns, name = graph_node.namespace.rsplit('.', 1) else: - ns,name = '',graph_node.namespace + ns, name = '', graph_node.namespace return self.get_node(ns, name, None) def associate_node(self, graph_node, ast_node, filename=None): @@ -1679,7 +1678,7 @@ class CallGraphVisitor(ast.NodeVisitor): pn2 = self.get_parent_node(n2) pn3 = self.get_parent_node(n3) if pn2 in self.uses_edges and pn3 in self.uses_edges[pn2]: # remove the first edge W to X.name -# if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) + # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) inherited = True if inherited and n in self.uses_edges: @@ -1695,7 +1694,7 @@ class CallGraphVisitor(ast.NodeVisitor): # Lambdas and comprehensions do not define any names in the enclosing # scope, so we only need to treat the uses edges. - + # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) # temporary solution is adding list to force a copy of 'self.nodes' for name in list(self.nodes): From 2bc95cd82813b0567ca29ff8d9ee4dafaf4f11b0 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 20:11:19 +0200 Subject: [PATCH 073/117] comment placement and indentation (was borked by autopep8) --- pyan/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index a88af6e..df112bf 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -1677,8 +1677,8 @@ class CallGraphVisitor(ast.NodeVisitor): if n3.name == n2.name and n2.namespace is not None and n3.namespace is not None and n3.namespace != n2.namespace: pn2 = self.get_parent_node(n2) pn3 = self.get_parent_node(n3) + # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) if pn2 in self.uses_edges and pn3 in self.uses_edges[pn2]: # remove the first edge W to X.name - # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) inherited = True if inherited and n in self.uses_edges: From b449ddb7ad9b5e66805bd89e9386dd9bc0367ce6 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 20:14:50 +0200 Subject: [PATCH 074/117] archive some old tests (issues #2 and #3) --- tests/old_tests/issue2/pyan_err.py | 14 ++++++++++++++ tests/old_tests/issue3/testi.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/old_tests/issue2/pyan_err.py create mode 100644 tests/old_tests/issue3/testi.py diff --git a/tests/old_tests/issue2/pyan_err.py b/tests/old_tests/issue2/pyan_err.py new file mode 100644 index 0000000..0b21eaf --- /dev/null +++ b/tests/old_tests/issue2/pyan_err.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8; -*- +# See issue #2 + +""" +This works fine +a = 3 +b = 4 +print(a + b) +""" + +# But this did not (#2) +a: int = 3 +b = 4 +print(a + b) diff --git a/tests/old_tests/issue3/testi.py b/tests/old_tests/issue3/testi.py new file mode 100644 index 0000000..5798231 --- /dev/null +++ b/tests/old_tests/issue3/testi.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8; -*- +# See issue #3 + +def f(): + return [x for x in range(10)] + +def g(): + return [(x, y) for x in range(10) for y in range(10)] + +def h(): + return [([(name, allargs) for name, _, _, allargs, _ in recs], + {name: inargs for name, inargs, _, _, _ in recs}, + {name: meta for name, _, _, _, meta in recs}) + for recs in (results[key] for key in sorted(results.keys()))] From eb63f543e7d19faa7b23cc7fc2a936dbce1c0e81 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 20:16:47 +0200 Subject: [PATCH 075/117] archive some old tests (issue #5) --- tests/old_tests/issue5/meas_xrd.py | 33 +++++++++++++++++++++++++++++ tests/old_tests/issue5/plot_xrd.py | 25 ++++++++++++++++++++++ tests/old_tests/issue5/relimport.py | 7 ++++++ 3 files changed, 65 insertions(+) create mode 100644 tests/old_tests/issue5/meas_xrd.py create mode 100644 tests/old_tests/issue5/plot_xrd.py create mode 100644 tests/old_tests/issue5/relimport.py diff --git a/tests/old_tests/issue5/meas_xrd.py b/tests/old_tests/issue5/meas_xrd.py new file mode 100644 index 0000000..1a4587e --- /dev/null +++ b/tests/old_tests/issue5/meas_xrd.py @@ -0,0 +1,33 @@ +import os.path + +import numpy as np +import pandas.io.parsers + +class MeasXRD: + def __init__(self, path: str): + if not os.path.isfile(path): + raise FileNotFoundError("Invalid XRD file path:", path) + + row_ind = 2 + self.params = {} + with open(path, "r") as file: + line = file.readline() + if line != "[Measurement conditions]\n": + raise ValueError("XRD measurement file does not contain a valid header") + + line = file.readline() + while line not in ["[Scan points]\n", ""]: + row_ind += 1 + columns = line.rstrip("\n").split(",", 1) + self.params[columns[0]] = columns[1] + line = file.readline() + + self.data = pandas.io.parsers.read_csv( + path, + skiprows=row_ind, + dtype={ + "Angle": np.float_, + "Intensity": np.int_ + }, + engine="c" + ) diff --git a/tests/old_tests/issue5/plot_xrd.py b/tests/old_tests/issue5/plot_xrd.py new file mode 100644 index 0000000..f2526e5 --- /dev/null +++ b/tests/old_tests/issue5/plot_xrd.py @@ -0,0 +1,25 @@ +import plotly.offline as py +import plotly.graph_objs as go + +from . import meas_xrd + +def plot_xrd(meas: meas_xrd.MeasXRD): + trace = go.Scatter( + x=meas.data["Angle"], + y=meas.data["Intensity"] + ) + + layout = go.Layout( + title="XRD data", + xaxis=dict( + title="Angle" + ), + yaxis=dict( + title="Intensity", + type="log" + ) + ) + + data = [trace] + fig = go.Figure(data=data, layout=layout) + return py.plot(fig, output_type="div", include_plotlyjs=False) diff --git a/tests/old_tests/issue5/relimport.py b/tests/old_tests/issue5/relimport.py new file mode 100644 index 0000000..1bf9f9d --- /dev/null +++ b/tests/old_tests/issue5/relimport.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8; -*- +# See issue #5 + +from .mod2 import foo +from ..mod3 import bar +from . import mod1 +from . import mod1 as moo From 66cdf1921c8cb931113d8c971b789e32dc06889b Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 20:19:09 +0200 Subject: [PATCH 076/117] archive runner scripts for old tests (issues #2 and #5) --- tests/old_tests/issue2/run.sh | 2 ++ tests/old_tests/issue5/run.sh | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/old_tests/issue2/run.sh create mode 100644 tests/old_tests/issue5/run.sh diff --git a/tests/old_tests/issue2/run.sh b/tests/old_tests/issue2/run.sh new file mode 100644 index 0000000..9a73808 --- /dev/null +++ b/tests/old_tests/issue2/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +pyan pyan_err.py -V >out.dot diff --git a/tests/old_tests/issue5/run.sh b/tests/old_tests/issue5/run.sh new file mode 100644 index 0000000..6829e6d --- /dev/null +++ b/tests/old_tests/issue5/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +pyan plot_xrd.py --uses --colored --grouped --annotated --dot > test.dot From 6bf8c52dbd2f0a12b0cea38fb1b71a01233dd588 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 20:23:36 +0200 Subject: [PATCH 077/117] autopep8 and whitespace-cleanup --- tests/test_analyzer.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 88e5373..0bc2b2a 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -21,13 +21,11 @@ def get_in_dict(node_dict, name): return node_dict[get_node(node_dict.keys(), name)] - def test_resolve_import_as(callgraph): imports = get_in_dict(callgraph.uses_edges, "test_code.submodule2") get_node(imports, "test_code.submodule1") assert len(imports) == 1, "only one effective import" - imports = get_in_dict(callgraph.uses_edges, "test_code.submodule1") get_node(imports, "test_code.subpackage1.submodule1.A") get_node(imports, "test_code.subpackage1") @@ -47,9 +45,3 @@ def test_resolve_use_in_function(callgraph): uses = get_in_dict(callgraph.uses_edges, "test_code.submodule2.test_2") get_node(uses, "test_code.submodule1.test_func1") get_node(uses, "test_code.submodule1.test_func2") - - - - - - From 84a5833d5d0224f9c5136bc0bc33f139a0638335 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:54:54 +0200 Subject: [PATCH 078/117] autopep8 --- pyan/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyan/main.py b/pyan/main.py index c5af354..5c59564 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -208,7 +208,7 @@ def main(cli_args=None): 'draw_defines': known_args.draw_defines, 'draw_uses': known_args.draw_uses, 'colored': known_args.colored, - 'grouped_alt' : known_args.grouped_alt, + 'grouped_alt': known_args.grouped_alt, 'grouped': known_args.grouped, 'nested_groups': known_args.nested_groups, 'annotated': known_args.annotated @@ -253,7 +253,7 @@ def main(cli_args=None): if known_args.dot: writer = DotWriter( graph, - options=['rankdir='+known_args.rankdir], + options=['rankdir=' + known_args.rankdir], output=known_args.filename, logger=logger ) @@ -261,7 +261,7 @@ def main(cli_args=None): if known_args.html: writer = HTMLWriter( graph, - options=['rankdir='+known_args.rankdir], + options=['rankdir=' + known_args.rankdir], output=known_args.filename, logger=logger ) @@ -269,7 +269,7 @@ def main(cli_args=None): if known_args.svg: writer = SVGWriter( graph, - options=['rankdir='+known_args.rankdir], + options=['rankdir=' + known_args.rankdir], output=known_args.filename, logger=logger ) From c48a9f8b26193bbca492d1afc33bf6c992cdca11 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:55:00 +0200 Subject: [PATCH 079/117] remove duplicate import --- pyan/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 7fae25c..8745027 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -3,7 +3,6 @@ from typing import Union, List import io from glob import glob -from typing import List, Union from .main import main from .analyzer import CallGraphVisitor From 202cc5e11393b37ae125893909492234536010d2 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:55:08 +0200 Subject: [PATCH 080/117] bump version --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 8745027..bd78f16 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -9,7 +9,7 @@ from .analyzer import CallGraphVisitor from .writers import SVGWriter, HTMLWriter, DotWriter from .visgraph import VisualGraph -__version__ = "1.0.5" +__version__ = "1.1.0" def create_callgraph( From 9e926bbd952c12d4ba281001005cd99af5276df5 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:56:22 +0200 Subject: [PATCH 081/117] mark TODO --- pyan/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyan/__init__.py b/pyan/__init__.py index bd78f16..f983604 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -12,6 +12,7 @@ from .visgraph import VisualGraph __version__ = "1.1.0" +# TODO: fix code duplication with main.py, should have just one implementation. def create_callgraph( filenames: Union[List[str], str] = "**/*.py", function: Union[str, None] = None, From a12b97f7a961ce5a3f15a04cdb4bd737c5011111 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:56:28 +0200 Subject: [PATCH 082/117] noqa the export-only import --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index f983604..b4f4021 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -4,7 +4,7 @@ from typing import Union, List import io from glob import glob -from .main import main +from .main import main # noqa: F401, for export only. from .analyzer import CallGraphVisitor from .writers import SVGWriter, HTMLWriter, DotWriter from .visgraph import VisualGraph From e6746924d6d9fadede46469863bce4f5ff81e902 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:56:55 +0200 Subject: [PATCH 083/117] add coding prop-line --- pyan/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyan/__main__.py b/pyan/__main__.py index 307d92e..11946f1 100644 --- a/pyan/__main__.py +++ b/pyan/__main__.py @@ -1,5 +1,6 @@ -import pyan +# -*- coding: utf-8 -*- +import pyan if __name__ == "__main__": pyan.main() From bfe9d3aeb45f7e08115fd4aed2b0091865b4b281 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:57:14 +0200 Subject: [PATCH 084/117] add missing blank line --- pyan/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyan/__init__.py b/pyan/__init__.py index b4f4021..77da52e 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- + from typing import Union, List import io from glob import glob From bf420d4ec82c6f8596d89146131ab4c3ab41ccb7 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:58:25 +0200 Subject: [PATCH 085/117] add missing blank line --- pyan/node.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyan/node.py b/pyan/node.py index 276f446..8d657f5 100644 --- a/pyan/node.py +++ b/pyan/node.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- + """Abstract node representing data gathered from the analysis.""" from enum import Enum From 69ff5ee28d2a353be30a25dc68518c17f8a3a479 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 10 Dec 2020 23:59:31 +0200 Subject: [PATCH 086/117] pep8 --- pyan/node.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pyan/node.py b/pyan/node.py index 8d657f5..2f2dbf4 100644 --- a/pyan/node.py +++ b/pyan/node.py @@ -5,6 +5,7 @@ from enum import Enum + def make_safe_label(label): """Avoid name clashes with GraphViz reserved words such as 'graph'.""" unsafe_words = ("digraph", "graph", "cluster", "subgraph", "node") @@ -13,23 +14,24 @@ def make_safe_label(label): out = out.replace(word, "%sX" % word) return out.replace('.', '__').replace('*', '') + class Flavor(Enum): """Flavor describes the kind of object a node represents.""" - UNSPECIFIED = "---" # as it says on the tin - UNKNOWN = "???" # not determined by analysis (wildcard) + UNSPECIFIED = "---" # as it says on the tin + UNKNOWN = "???" # not determined by analysis (wildcard) - NAMESPACE = "namespace" # node representing a namespace - ATTRIBUTE = "attribute" # attr of something, but not known if class or func. + NAMESPACE = "namespace" # node representing a namespace + ATTRIBUTE = "attribute" # attr of something, but not known if class or func. - IMPORTEDITEM = "import" # imported item of unanalyzed type + IMPORTEDITEM = "import" # imported item of unanalyzed type - MODULE = "module" - CLASS = "class" - FUNCTION = "function" - METHOD = "method" # instance method + MODULE = "module" + CLASS = "class" + FUNCTION = "function" + METHOD = "method" # instance method STATICMETHOD = "staticmethod" - CLASSMETHOD = "classmethod" - NAME = "name" # Python name (e.g. "x" in "x = 42") + CLASSMETHOD = "classmethod" + NAME = "name" # Python name (e.g. "x" in "x = 42") # Flavors have a partial ordering in specificness of the information. # @@ -51,6 +53,7 @@ class Flavor(Enum): def __repr__(self): return self.value + class Node: """A node is an object in the call graph. From 7c97c2e4574af2aedea12b04894367b0444dc91b Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:00:52 +0200 Subject: [PATCH 087/117] move comment to docstring --- pyan/visgraph.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pyan/visgraph.py b/pyan/visgraph.py index 40cfd79..9360710 100644 --- a/pyan/visgraph.py +++ b/pyan/visgraph.py @@ -6,15 +6,18 @@ import re import logging import colorsys -# Set node color by filename. -# -# HSL: hue = top-level namespace, lightness = nesting level, saturation constant. -# -# The "" namespace (for *.py files) gets the first color. Since its -# level is 0, its lightness will be 1.0, i.e. pure white regardless -# of the hue. -# class Colorizer: + """Output graph color manager. + + We set node color by filename. + + HSL: hue = top-level namespace, lightness = nesting level, saturation constant. + + The "" namespace (for *.py files) gets the first color. Since its + level is 0, its lightness will be 1.0, i.e. pure white regardless + of the hue. + """ + def __init__(self, num_colors, colored=True, logger=None): self.logger = logger or logging.getLogger(__name__) self.colored = colored From 54296fd1e5cdf57e04574ea9a9860b925afca9a6 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:02:02 +0200 Subject: [PATCH 088/117] pep8 --- pyan/writers.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyan/writers.py b/pyan/writers.py index f5af1e8..a48e8ba 100644 --- a/pyan/writers.py +++ b/pyan/writers.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- + """Graph markup writers.""" + import os import subprocess import sys @@ -15,7 +17,7 @@ class Writer(object): self.output = output self.logger = logger or logging.getLogger(__name__) self.indent_level = 0 - self.tabstop = tabstop*' ' + self.tabstop = tabstop * ' ' def log(self, msg): self.logger.info(msg) @@ -27,7 +29,7 @@ class Writer(object): self.indent_level -= level def write(self, line): - self.outstream.write(self.tabstop*self.indent_level+line+'\n') + self.outstream.write(self.tabstop * self.indent_level + line + '\n') def run(self): self.log('%s running' % type(self)) @@ -159,13 +161,13 @@ class DotWriter(Writer): def write_edge(self, edge): source = edge.source target = edge.target - color = edge.color + color = edge.color if edge.flavor == 'defines': self.write( ' %s -> %s [style="dashed",' ' color="%s"];' % (source.id, target.id, color)) - else: # edge.flavor == 'uses': + else: # edge.flavor == 'uses': self.write( ' %s -> %s [style="solid",' ' color="%s"];' @@ -300,7 +302,7 @@ class YedWriter(Writer): def write_node(self, node): self.log('Write node %s' % node.label) - width = 20 + 10*len(node.label) + width = 20 + 10 * len(node.label) self.write('' % node.id) self.indent() self.write('') From 5893bd5055d89f25675577a9b2c69741a8a6c7f4 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:03:13 +0200 Subject: [PATCH 089/117] pep8 --- setup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index b070168..4764d6a 100644 --- a/setup.py +++ b/setup.py @@ -58,10 +58,10 @@ dataexts = (".py", ".ipynb", ".sh", ".lyx", ".tex", ".txt", ".pdf") # Standard documentation to detect (and package if it exists). # +# just the basename without file extension standard_docs = ["README", "LICENSE", "TODO", "CHANGELOG", "AUTHORS"] - # just the basename without file extension -standard_doc_exts = [".md", ".rst", ".txt", ""] # commonly .md for - # GitHub projects, but other projects may use .rst or .txt (or even blank). +# commonly .md for GitHub projects, but other projects may use .rst or .txt (or even blank). +standard_doc_exts = [".md", ".rst", ".txt", ""] ######################################################### # Init @@ -83,8 +83,8 @@ datafiles = [] detected_docs = [] for docname in standard_docs: for ext in standard_doc_exts: - filename = "".join((docname, ext)) # relative to the directory in - # which setup.py resides + # relative to the directory in which setup.py resides + filename = "".join((docname, ext)) if os.path.isfile(filename): detected_docs.append(filename) datafiles.append(('.', detected_docs)) From 344d7d6f6a4225b7f9d134a56cdda5b7965891be Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:07:21 +0200 Subject: [PATCH 090/117] add requirements.txt for development-time dependencies --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..42e6fb3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +coverage>=5.3 +pytest>=6.1.2 +pytest-cov>=2.10.1 From 348ae2665f1d650b6d8836e6df9282d45b6ab626 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:07:38 +0200 Subject: [PATCH 091/117] add Python 3.7 as supported --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 4764d6a..d420987 100644 --- a/setup.py +++ b/setup.py @@ -150,6 +150,7 @@ setup( "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", "Topic :: Software Development" ], From 59aa11d76da653295bd92d1e7ba80ce58ce61118 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:07:49 +0200 Subject: [PATCH 092/117] clean up setup script --- setup.py | 68 ++++++++++---------------------------------------------- 1 file changed, 12 insertions(+), 56 deletions(-) diff --git a/setup.py b/setup.py index d420987..717a929 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,6 @@ or import os import ast -import sys from setuptools import setup ######################################################### @@ -47,48 +46,10 @@ DESC = ( 'the objects in the combined source, and how they define or ' 'use each other. The graph can be output for rendering by GraphViz or yEd.') -# Set up data files for packaging. -# -# Directories (relative to the top-level directory where setup.py resides) in -# which to look for data files. -datadirs = () - -# File extensions to be considered as data files. (Literal, no wildcards.) -dataexts = (".py", ".ipynb", ".sh", ".lyx", ".tex", ".txt", ".pdf") - -# Standard documentation to detect (and package if it exists). -# -# just the basename without file extension -standard_docs = ["README", "LICENSE", "TODO", "CHANGELOG", "AUTHORS"] -# commonly .md for GitHub projects, but other projects may use .rst or .txt (or even blank). -standard_doc_exts = [".md", ".rst", ".txt", ""] - ######################################################### # Init ######################################################### -# Gather user-defined data files -# -# https://stackoverflow.com/q/13628979/1959808 -# -datafiles = [] -#getext = lambda filename: os.path.splitext(filename)[1] -#for datadir in datadirs: -# datafiles.extend( [(root, [os.path.join(root, f) -# for f in files if getext(f) in dataexts]) -# for root, dirs, files in os.walk(datadir)] ) - -# Add standard documentation (README et al.), if any, to data files -# -detected_docs = [] -for docname in standard_docs: - for ext in standard_doc_exts: - # relative to the directory in which setup.py resides - filename = "".join((docname, ext)) - if os.path.isfile(filename): - detected_docs.append(filename) -datafiles.append(('.', detected_docs)) - # Extract __version__ from the package __init__.py # (since it's not a good idea to actually run __init__.py during the # build process). @@ -96,25 +57,23 @@ datafiles.append(('.', detected_docs)) # https://stackoverflow.com/q/2058802/1959808 # init_py_path = os.path.join('pyan', '__init__.py') -version = '0.0.unknown' +version = None try: with open(init_py_path) as f: for line in f: - if line.startswith('__version__'): - version = ast.parse(line).body[0].value.s + if line.startswith("__version__"): + module = ast.parse(line) + expr = module.body[0] + v = expr.value + if type(v) is ast.Constant: + version = v.value + elif type(v) is ast.Str: # TODO: Python 3.8: remove ast.Str + version = v.s break - else: - print(( - "WARNING: Version information not found in " - "'{path}', using placeholder '{version}'").format( - path=init_py_path, version=version), - file=sys.stderr) except FileNotFoundError: - print(( - "WARNING: Could not find file '{path}', " - "using placeholder version information '{version}'").format( - path=init_py_path, version=version), - file=sys.stderr) + pass +if not version: + raise RuntimeError(f"Version information not found in {init_py_path}") ######################################################### # Call setup() @@ -180,9 +139,6 @@ setup( package_data={'pyan': ["callgraph.html"]}, include_package_data=True, - # Custom data files not inside a Python package - data_files=datafiles, - entry_points={ 'console_scripts': [ 'pyan3 = pyan.main:main', From ffe3c97740025dfc0745b4907a41aa72402c73b0 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:12:24 +0200 Subject: [PATCH 093/117] remove unused variable in setup.py --- setup.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/setup.py b/setup.py index 717a929..87bd386 100644 --- a/setup.py +++ b/setup.py @@ -25,13 +25,6 @@ from setuptools import setup # General config ######################################################### -# Name of the top-level package of the library. -# -# This is also the top level of its source tree, -# relative to the top-level project directory setup.py resides in. -# -libname = "pyan" - # Short description for package list on PyPI # SHORTDESC = "Offline call graph generator for Python 3" From 5b8cf01e37ec9d43b04b9eb190863853681e5eb0 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:12:57 +0200 Subject: [PATCH 094/117] keeping old name "pyan3" for PyPI for now We should take some time to deprecate it in favor of just "pyan" now that Python 2 has been end-of-lifed. The name "pyan" is available on PyPI. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 87bd386..81f7f93 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ if not version: ######################################################### setup( - name="pyan", + name="pyan3", version=version, author="Juha Jeronen", author_email="juha.m.jeronen@gmail.com", From 873d55849fd60806ac05eb4121ad6f24c774bd1b Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:14:07 +0200 Subject: [PATCH 095/117] fix metadata --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 77da52e..f0900ef 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -10,7 +10,7 @@ from .analyzer import CallGraphVisitor from .writers import SVGWriter, HTMLWriter, DotWriter from .visgraph import VisualGraph -__version__ = "1.1.0" +__version__ = "1.1.1" # TODO: fix code duplication with main.py, should have just one implementation. From e16badca035f166b14c46af1dfe5268597589450 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:47:06 +0200 Subject: [PATCH 096/117] update author list and separate it into a new file --- AUTHORS.md | 19 +++++++++++++++++++ README.md | 14 +------------- 2 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 AUTHORS.md diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..d4ba589 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,19 @@ +Original [pyan.py](https://github.com/ejrh/ejrh/blob/master/utils/pyan.py) for Python 2 by Edmund Horner, 2012. [Original blog post with explanation](http://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/). + +[Coloring and grouping](https://ejrh.wordpress.com/2012/08/18/coloured-call-graphs/) for GraphViz output by Juha Jeronen. + +[Git repository cleanup](https://github.com/davidfraser/pyan/) and maintenance by David Fraser. + +[yEd GraphML output, and framework for easily adding new output formats](https://github.com/davidfraser/pyan/pull/1) by Patrick Massot. + +A bugfix [[2]](https://github.com/davidfraser/pyan/pull/2) and the option `--dot-rankdir` [[3]](https://github.com/davidfraser/pyan/pull/3) contributed by GitHub user ch41rmn. + +A bug in `.tgf` output [[4]](https://github.com/davidfraser/pyan/pull/4) pointed out and fix suggested by Adam Eijdenberg. + +This Python 3 port, analyzer expansion, and additional refactoring by Juha Jeronen. + +HTML and SVG export by Jan Beitner. + +Support for relative imports by Jan Beitner and Rakan Alanazi. + +Further contributions by Ioannis Filippidis, Jan Malek, José Eduardo Montenegro Cavalcanti de Oliveira, and Mantas Zimnickas. diff --git a/README.md b/README.md index 7dda6f8..61db4d7 100644 --- a/README.md +++ b/README.md @@ -194,19 +194,7 @@ When a binding statement is encountered, the current namespace determines in whi # Authors -Original [pyan.py](https://github.com/ejrh/ejrh/blob/master/utils/pyan.py) by Edmund Horner. [Original post with explanation](http://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/). - -[Coloring and grouping](https://ejrh.wordpress.com/2012/08/18/coloured-call-graphs/) for GraphViz output by Juha Jeronen. - -[Git repository cleanup](https://github.com/davidfraser/pyan/) and maintenance by David Fraser. - -[yEd GraphML output, and framework for easily adding new output formats](https://github.com/davidfraser/pyan/pull/1) by Patrick Massot. - -A bugfix [[2]](https://github.com/davidfraser/pyan/pull/2) and the option `--dot-rankdir` [[3]](https://github.com/davidfraser/pyan/pull/3) contributed by GitHub user ch41rmn. - -A bug in `.tgf` output [[4]](https://github.com/davidfraser/pyan/pull/4) pointed out and fix suggested by Adam Eijdenberg. - -This Python 3 port, analyzer expansion, and additional refactoring by Juha Jeronen. +See [AUTHORS.md](AUTHORS.md). # License From 94427893e13ead690e7a1664c1fad835628fb8a8 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 00:47:22 +0200 Subject: [PATCH 097/117] pre-emptive version bump --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index f0900ef..a4d50e6 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -10,7 +10,7 @@ from .analyzer import CallGraphVisitor from .writers import SVGWriter, HTMLWriter, DotWriter from .visgraph import VisualGraph -__version__ = "1.1.1" +__version__ = "1.1.2" # TODO: fix code duplication with main.py, should have just one implementation. From c74302974fea841039858cb25d4c8058c5fc38b5 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Fri, 11 Dec 2020 15:50:13 +0200 Subject: [PATCH 098/117] add authors of alternative PRs submitted to davidfraser's repo The code from these PRs wasn't used, but they arrived either earlier or near the same time as the ones to my development repo that I noticed. Two were small but important one-line fixes, whereas one was a port from argparse to optparse, consisting of about 40 lines. So for the sake of fairness, credit their authors, too. --- AUTHORS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index d4ba589..921d71f 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -16,4 +16,4 @@ HTML and SVG export by Jan Beitner. Support for relative imports by Jan Beitner and Rakan Alanazi. -Further contributions by Ioannis Filippidis, Jan Malek, José Eduardo Montenegro Cavalcanti de Oliveira, and Mantas Zimnickas. +Further contributions by Ioannis Filippidis, Jan Malek, José Eduardo Montenegro Cavalcanti de Oliveira, Mantas Zimnickas, Sam Basak, Brady Deetz, and GitHub user dmfreemon. From 67c57fa109a68f44af89f0ddabf811dac68e24b9 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 Jan 2021 15:36:31 +0000 Subject: [PATCH 099/117] Add sphinx extension --- README.md | 187 ++++++++++++++++++++++++++++++------------------- pyan/sphinx.py | 169 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 74 deletions(-) create mode 100644 pyan/sphinx.py diff --git a/README.md b/README.md index 61db4d7..b1c5dde 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Offline call graph generator for Python 3 Pyan takes one or more Python source files, performs a (rather superficial) static analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output for rendering by GraphViz or yEd. This project has 2 official repositories: + - The original stable [davidfraser/pyan](https://github.com/davidfraser/pyan). - The development repository [Technologicat/pyan](https://github.com/Technologicat/pyan) @@ -19,17 +20,17 @@ This project has 2 official repositories: [![Example output](graph0.png "Example: GraphViz rendering of Pyan output (click for .svg)")](graph0.svg) -**Defines** relations are drawn with *dotted gray arrows*. +**Defines** relations are drawn with _dotted gray arrows_. -**Uses** relations are drawn with *black solid arrows*. Recursion is indicated by an arrow from a node to itself. [Mutual recursion](https://en.wikipedia.org/wiki/Mutual_recursion#Basic_examples) between nodes X and Y is indicated by a pair of arrows, one pointing from X to Y, and the other from Y to X. +**Uses** relations are drawn with _black solid arrows_. Recursion is indicated by an arrow from a node to itself. [Mutual recursion](https://en.wikipedia.org/wiki/Mutual_recursion#Basic_examples) between nodes X and Y is indicated by a pair of arrows, one pointing from X to Y, and the other from Y to X. **Nodes** are always filled, and made translucent to clearly show any arrows passing underneath them. This is especially useful for large graphs with GraphViz's `fdp` filter. If colored output is not enabled, the fill is white. -In **node coloring**, the [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV) color model is used. The **hue** is determined by the *filename* the node comes from. The **lightness** is determined by *depth of namespace nesting*, with darker meaning more deeply nested. Saturation is constant. The spacing between different hues depends on the number of files analyzed; better results are obtained for fewer files. +In **node coloring**, the [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV) color model is used. The **hue** is determined by the _filename_ the node comes from. The **lightness** is determined by _depth of namespace nesting_, with darker meaning more deeply nested. Saturation is constant. The spacing between different hues depends on the number of files analyzed; better results are obtained for fewer files. **Groups** are filled with translucent gray to avoid clashes with any node color. -The nodes can be **annotated** by *filename and source line number* information. +The nodes can be **annotated** by _filename and source line number_ information. ## Note @@ -37,12 +38,10 @@ The static analysis approach Pyan takes is different from running the code and s In Pyan3, the analyzer was ported from `compiler` ([good riddance](https://stackoverflow.com/a/909172)) to a combination of `ast` and `symtable`, and slightly extended. - # Install pip install pyan3 - # Usage See `pyan3 --help`. @@ -71,9 +70,50 @@ from IPython.display import HTML HTML(pyan.create_callgraph(filenames="**/*.py", format="html")) ``` +#### Sphinx integration + +You can integrate callgraphs into Sphinx. +Install graphviz (e.g. via `sudo apt-get install graphviz`) and modify `source/conf.py` so that + +``` +# modify extensions +extensions = [ + ... + "sphinx.ext.graphviz + "pyan.sphinx", +] + +# add graphviz options +graphviz_output_format = "svg" +``` + +Now, there is a callgraph directive which has all the options of the [graphviz directive](https://www.sphinx-doc.org/en/master/usage/extensions/graphviz.html) +and in addition: + +- **:no-groups:** (boolean flag): do not group +- **:no-defines:** (boolean flag): if to not draw edges that show which functions, methods and classes are defined by a class or module +- **:no-uses:** (boolean flag): if to not draw edges that show how a function uses other functions +- **:no-colors:** (boolean flag): if to not color in callgraph (default is coloring) +- **:nested-grops:** (boolean flag): if to group by modules and submodules +- **:annotated:** (boolean flag): annotate callgraph with file names +- **:direction:** (string): "horizontal" or "vertical" callgraph +- **:toctree:** (string): path to toctree (as used with autosummary) to link elements of callgraph to documentation (makes all nodes clickable) +- **:zoomable:** (boolean flag): enables users to zoom and pan callgraph + +Example to create a callgraph for the function `pyan.create_callgraph` that is +zoomable, is defined from left to right and links each node to the API documentation that +was created at the toctree path `api`. + +```` +.. callgraph:: pyan.create_callgraph + :toctree: api + :zoomable: + :direction: horizontal +``` + #### Troubleshooting -If GraphViz says *trouble in init_rank*, try adding `-Gnewrank=true`, as in: +If GraphViz says _trouble in init_rank_, try adding `-Gnewrank=true`, as in: `dot -Gnewrank=true -Tsvg myuses.dot >myuses.svg` @@ -85,86 +125,85 @@ If the graph is visually unreadable due to too much detail, consider visualizing Currently Pyan always operates at the level of individual functions and methods; an option to visualize only relations between namespaces may (or may not) be added in a future version. - # Features -*Items tagged with ☆ are new in Pyan3.* +_Items tagged with ☆ are new in Pyan3._ **Graph creation**: - - Nodes for functions and classes - - Edges for defines - - Edges for uses - - This includes recursive calls ☆ - - Grouping to represent defines, with or without nesting - - Coloring of nodes by filename - - Unlimited number of hues ☆ +- Nodes for functions and classes +- Edges for defines +- Edges for uses + - This includes recursive calls ☆ +- Grouping to represent defines, with or without nesting +- Coloring of nodes by filename + - Unlimited number of hues ☆ **Analysis**: - - Name lookup across the given set of files - - Nested function definitions - - Nested class definitions ☆ - - Nested attribute accesses like `self.a.b` ☆ - - Inherited attributes ☆ - - Pyan3 looks up also in base classes when resolving attributes. In the old Pyan, calls to inherited methods used to be picked up by `contract_nonexistents()` followed by `expand_unknowns()`, but that often generated spurious uses edges (because the wildcard to `*.name` expands to `X.name` *for all* `X` that have an attribute called `name`.). - - Resolution of `super()` based on the static type at the call site ☆ - - MRO is (statically) respected in looking up inherited attributes and `super()` ☆ - - Assignment tracking with lexical scoping - - E.g. if `self.a = MyFancyClass()`, the analyzer knows that any references to `self.a` point to `MyFancyClass` - - All binding forms are supported (assign, augassign, for, comprehensions, generator expressions, with) ☆ - - Name clashes between `for` loop counter variables and functions or classes defined elsewhere no longer confuse Pyan. - - `self` is defined by capturing the name of the first argument of a method definition, like Python does. ☆ - - Simple item-by-item tuple assignments like `x,y,z = a,b,c` ☆ - - Chained assignments `a = b = c` ☆ - - Local scope for lambda, listcomp, setcomp, dictcomp, genexpr ☆ - - Keep in mind that list comprehensions gained a local scope (being treated like a function) only in Python 3. Thus, Pyan3, when applied to legacy Python 2 code, will give subtly wrong results if the code uses list comprehensions. - - Source filename and line number annotation ☆ - - The annotation is appended to the node label. If grouping is off, namespace is included in the annotation. If grouping is on, only source filename and line number information is included, because the group title already shows the namespace. +- Name lookup across the given set of files +- Nested function definitions +- Nested class definitions ☆ +- Nested attribute accesses like `self.a.b` ☆ +- Inherited attributes ☆ + - Pyan3 looks up also in base classes when resolving attributes. In the old Pyan, calls to inherited methods used to be picked up by `contract_nonexistents()` followed by `expand_unknowns()`, but that often generated spurious uses edges (because the wildcard to `*.name` expands to `X.name` _for all_ `X` that have an attribute called `name`.). +- Resolution of `super()` based on the static type at the call site ☆ +- MRO is (statically) respected in looking up inherited attributes and `super()` ☆ +- Assignment tracking with lexical scoping + - E.g. if `self.a = MyFancyClass()`, the analyzer knows that any references to `self.a` point to `MyFancyClass` + - All binding forms are supported (assign, augassign, for, comprehensions, generator expressions, with) ☆ + - Name clashes between `for` loop counter variables and functions or classes defined elsewhere no longer confuse Pyan. +- `self` is defined by capturing the name of the first argument of a method definition, like Python does. ☆ +- Simple item-by-item tuple assignments like `x,y,z = a,b,c` ☆ +- Chained assignments `a = b = c` ☆ +- Local scope for lambda, listcomp, setcomp, dictcomp, genexpr ☆ + - Keep in mind that list comprehensions gained a local scope (being treated like a function) only in Python 3. Thus, Pyan3, when applied to legacy Python 2 code, will give subtly wrong results if the code uses list comprehensions. +- Source filename and line number annotation ☆ + - The annotation is appended to the node label. If grouping is off, namespace is included in the annotation. If grouping is on, only source filename and line number information is included, because the group title already shows the namespace. ## TODO - - Determine confidence of detected edges (probability that the edge is correct). Start with a binary system, with only values 1.0 and 0.0. - - A fully resolved reference to a name, based on lexical scoping, has confidence 1.0. - - A reference to an unknown name has confidence 0.0. - - Attributes: - - A fully resolved reference to a known attribute of a known object has confidence 1.0. - - A reference to an unknown attribute of a known object has confidence 1.0. These are mainly generated by imports, when the imported file is not in the analyzed set. (Does this need a third value, such as 0.5?) - - A reference to an attribute of an unknown object has confidence 0.0. - - A wildcard and its expansions have confidence 0.0. - - Effects of binding analysis? The system should not claim full confidence in a bound value, unless it fully understands both the binding syntax and the value. (Note that this is very restrictive. A function call or a list in the expression for the value will currently spoil the full analysis.) - - Confidence values may need updating in pass 2. - - Make the analyzer understand `del name` (probably seen as `isinstance(node.ctx, ast.Del)` in `visit_Name()`, `visit_Attribute()`) - - Prefix methods by class name in the graph; create a legend for annotations. See the discussion [here](https://github.com/johnyf/pyan/issues/4). - - Improve the wildcard resolution mechanism, see discussion [here](https://github.com/johnyf/pyan/issues/5). - - Could record the namespace of the use site upon creating the wildcard, and check any possible resolutions against that (requiring that the resolved name is in scope at the use site)? - - Add an option to visualize relations only between namespaces, useful for large projects. - - Scan the nodes and edges, basically generate a new graph and visualize that. - - Publish test cases. - - Get rid of `self.last_value`? - - Consider each specific kind of expression or statement being handled; get the relevant info directly (or by a more controlled kind of recursion) instead of `self.visit()`. - - At some point, may need a second visitor class that is just a catch-all that extracts names, which is then applied to only relevant branches of the AST. - - On the other hand, maybe `self.last_value` is the simplest implementation that extracts a value from an expression, and it only needs to be used in a controlled manner (as `analyze_binding()` currently does); i.e. reset before visiting, and reset immediately when done. +- Determine confidence of detected edges (probability that the edge is correct). Start with a binary system, with only values 1.0 and 0.0. + - A fully resolved reference to a name, based on lexical scoping, has confidence 1.0. + - A reference to an unknown name has confidence 0.0. + - Attributes: + - A fully resolved reference to a known attribute of a known object has confidence 1.0. + - A reference to an unknown attribute of a known object has confidence 1.0. These are mainly generated by imports, when the imported file is not in the analyzed set. (Does this need a third value, such as 0.5?) + - A reference to an attribute of an unknown object has confidence 0.0. + - A wildcard and its expansions have confidence 0.0. + - Effects of binding analysis? The system should not claim full confidence in a bound value, unless it fully understands both the binding syntax and the value. (Note that this is very restrictive. A function call or a list in the expression for the value will currently spoil the full analysis.) + - Confidence values may need updating in pass 2. +- Make the analyzer understand `del name` (probably seen as `isinstance(node.ctx, ast.Del)` in `visit_Name()`, `visit_Attribute()`) +- Prefix methods by class name in the graph; create a legend for annotations. See the discussion [here](https://github.com/johnyf/pyan/issues/4). +- Improve the wildcard resolution mechanism, see discussion [here](https://github.com/johnyf/pyan/issues/5). + - Could record the namespace of the use site upon creating the wildcard, and check any possible resolutions against that (requiring that the resolved name is in scope at the use site)? +- Add an option to visualize relations only between namespaces, useful for large projects. + - Scan the nodes and edges, basically generate a new graph and visualize that. +- Publish test cases. +- Get rid of `self.last_value`? + - Consider each specific kind of expression or statement being handled; get the relevant info directly (or by a more controlled kind of recursion) instead of `self.visit()`. + - At some point, may need a second visitor class that is just a catch-all that extracts names, which is then applied to only relevant branches of the AST. + - On the other hand, maybe `self.last_value` is the simplest implementation that extracts a value from an expression, and it only needs to be used in a controlled manner (as `analyze_binding()` currently does); i.e. reset before visiting, and reset immediately when done. The analyzer **does not currently support**: - - Tuples/lists as first-class values (currently ignores any assignment of a tuple/list to a single name). - - Support empty lists, too (for resolving method calls to `.append()` and similar). - - Starred assignment `a,*b,c = d,e,f,g,h` - - Slicing and indexing in assignment (`ast.Subscript`) - - Additional unpacking generalizations ([PEP 448](https://www.python.org/dev/peps/pep-0448/), Python 3.5+). - - Any **uses** on the RHS *at the binding site* in all of the above are already detected by the name and attribute analyzers, but the binding information from assignments of these forms will not be recorded (at least not correctly). - - Enums; need to mark the use of any of their attributes as use of the Enum. Need to detect `Enum` in `bases` during analysis of ClassDef; then tag the class as an enum and handle differently. - - Resolving results of function calls, except for a very limited special case for `super()`. - - Any binding of a name to a result of a function (or method) call - provided that the binding itself is understood by Pyan - will instead show in the output as binding the name to that function (or method). (This may generate some unintuitive uses edges in the graph.) - - Distinguishing between different Lambdas in the same namespace (to report uses of a particular `lambda` that has been stored in `self.something`). - - Type hints ([PEP 484](https://www.python.org/dev/peps/pep-0484/), Python 3.5+). - - Type inference for function arguments - - Either of these two could be used to bind function argument names to the appropriate object types, avoiding the need for wildcard references (especially for attribute accesses on objects passed in as function arguments). - - Type inference could run as pass 3, using additional information from the state of the graph after pass 2 to connect call sites to function definitions. Alternatively, no additional pass; store the AST nodes in the earlier pass. Type inference would allow resolving some wildcards by finding the method of the actual object instance passed in. - - Must understand, at the call site, whether the first positional argument in the function def is handled implicitly or not. This is found by looking at the flavor of the Node representing the call target. - - Async definitions are detected, but passed through to the corresponding non-async analyzers; could be annotated. - - Cython; could strip or comment out Cython-specific code as a preprocess step, then treat as Python (will need to be careful to get line numbers right). +- Tuples/lists as first-class values (currently ignores any assignment of a tuple/list to a single name). + - Support empty lists, too (for resolving method calls to `.append()` and similar). +- Starred assignment `a,*b,c = d,e,f,g,h` +- Slicing and indexing in assignment (`ast.Subscript`) +- Additional unpacking generalizations ([PEP 448](https://www.python.org/dev/peps/pep-0448/), Python 3.5+). + - Any **uses** on the RHS _at the binding site_ in all of the above are already detected by the name and attribute analyzers, but the binding information from assignments of these forms will not be recorded (at least not correctly). +- Enums; need to mark the use of any of their attributes as use of the Enum. Need to detect `Enum` in `bases` during analysis of ClassDef; then tag the class as an enum and handle differently. +- Resolving results of function calls, except for a very limited special case for `super()`. + - Any binding of a name to a result of a function (or method) call - provided that the binding itself is understood by Pyan - will instead show in the output as binding the name to that function (or method). (This may generate some unintuitive uses edges in the graph.) +- Distinguishing between different Lambdas in the same namespace (to report uses of a particular `lambda` that has been stored in `self.something`). +- Type hints ([PEP 484](https://www.python.org/dev/peps/pep-0484/), Python 3.5+). +- Type inference for function arguments + - Either of these two could be used to bind function argument names to the appropriate object types, avoiding the need for wildcard references (especially for attribute accesses on objects passed in as function arguments). + - Type inference could run as pass 3, using additional information from the state of the graph after pass 2 to connect call sites to function definitions. Alternatively, no additional pass; store the AST nodes in the earlier pass. Type inference would allow resolving some wildcards by finding the method of the actual object instance passed in. + - Must understand, at the call site, whether the first positional argument in the function def is handled implicitly or not. This is found by looking at the flavor of the Node representing the call target. +- Async definitions are detected, but passed through to the corresponding non-async analyzers; could be annotated. +- Cython; could strip or comment out Cython-specific code as a preprocess step, then treat as Python (will need to be careful to get line numbers right). # How it works @@ -182,7 +221,7 @@ class MyClass: def dostuff(self) self.f() -``` +```` By tracking the name `self.f`, the analyzer will see that `MyClass.dostuff()` uses `some_func()`. diff --git a/pyan/sphinx.py b/pyan/sphinx.py new file mode 100644 index 0000000..ddc31d4 --- /dev/null +++ b/pyan/sphinx.py @@ -0,0 +1,169 @@ +""" +Simple sphinx extension that allows including callgraphs in documentation. + +Example usage: + +``` +.. callgraph:: + + +Options are + +- **:no-groups:** (boolean flag): do not group +- **:no-defines:** (boolean flag): if to not draw edges that show which + functions, methods and classes are defined by a class or module +- **:no-uses:** (boolean flag): if to not draw edges that show how a function + uses other functions +- **:no-colors:** (boolean flag): if to not color in callgraph (default is + coloring) +- **:nested-grops:** (boolean flag): if to group by modules and submodules +- **:annotated:** (boolean flag): annotate callgraph with file names +- **:direction:** (string): "horizontal" or "vertical" callgraph +- **:toctree:** (string): path to toctree (as used with autosummary) to link + elements of callgraph to documentation (makes all nodes clickable) +- **:zoomable:** (boolean flag): enables users to zoom and pan callgraph +``` +""" +import re +from typing import Any + +from docutils.parsers.rst import directives +from pyan import create_callgraph +from sphinx.ext.graphviz import align_spec, figure_wrapper, graphviz +from sphinx.util.docutils import SphinxDirective + + +def direction_spec(argument: Any) -> str: + return directives.choice(argument, ("vertical", "horizontal")) + + +class CallgraphDirective(SphinxDirective): + + # this enables content in the directive + has_content = True + + option_spec = { + # graphviz + "alt": directives.unchanged, + "align": align_spec, + "caption": directives.unchanged, + "name": directives.unchanged, + "class": directives.class_option, + # pyan + "no-groups": directives.unchanged, + "no-defines": directives.unchanged, + "no-uses": directives.unchanged, + "no-colors": directives.unchanged, + "nested-groups": directives.unchanged, + "annotated": directives.unchanged, + "direction": direction_spec, + "toctree": directives.unchanged, + "zoomable": directives.unchanged, + } + + def run(self): + func_name = self.content[0] + base_name = func_name.split(".")[0] + if len(func_name.split(".")) == 1: + func_name = None + base_path = __import__(base_name).__path__[0] + + direction = "vertical" + if "direction" in self.options: + direction = self.options["direction"] + dotcode = create_callgraph( + filenames=f"{base_path}/**/*.py", + function=func_name, + namespace=base_name, + format="dot", + grouped="no-groups" not in self.options, + draw_uses="no-uses" not in self.options, + draw_defines="no-defines" not in self.options, + nested_groups="nested-groups" in self.options, + colored="no-colors" not in self.options, + annotated="annotated" in self.options, + rankdir={"horizontal": "LR", "vertical": "TB"}[direction], + ) + node = graphviz() + + # insert link targets into groups: first insert link, then reformat link + if "toctree" in self.options: + path = self.options["toctree"].strip("/") + # create raw link + dotcode = re.sub( + r'([\w\d]+)(\s.+), (style="filled")', + r'\1\2, href="../' + path + r'/\1.html", target="_blank", \3', + dotcode, + ) + + def create_link(dot_name): + raw_link = re.sub(r"__(\w)", r".\1", dot_name) + # determine if name this is a class by checking if its first letter is capital + # (heuristic but should work almost always) + splits = raw_link.rsplit(".", 2) + if len(splits) > 1 and splits[-2][0].capitalize() == splits[-2][0]: + # is class + link = ".".join(splits[:-1]) + ".html#" + raw_link + '"' + else: + link = raw_link + '.html"' + return link + + dotcode = re.sub( + r'(href="../' + path + r'/)(\w+)(\.html")', + lambda m: m.groups()[0] + create_link(m.groups()[1]), + dotcode, + ) + + node["code"] = dotcode + node["options"] = {"docname": self.env.docname} + if "graphviz_dot" in self.options: + node["options"]["graphviz_dot"] = self.options["graphviz_dot"] + if "layout" in self.options: + node["options"]["graphviz_dot"] = self.options["layout"] + if "alt" in self.options: + node["alt"] = self.options["alt"] + if "align" in self.options: + node["align"] = self.options["align"] + + if "class" in self.options: + classes = self.options["class"] + else: + classes = [] + if "zoomable" in self.options: + if len(classes) == 0: + classes = ["zoomable-callgraph"] + else: + classes.append("zoomable-callgraph") + if len(classes) > 0: + node["classes"] = classes + + if "caption" not in self.options: + self.add_name(node) + return [node] + else: + figure = figure_wrapper(self, node, self.options["caption"]) + self.add_name(figure) + return [figure] + + +def setup(app): + + app.add_directive("callgraph", CallgraphDirective) + app.add_js_file("https://cdn.jsdelivr.net/npm/svg-pan-zoom@3.6.1/dist/svg-pan-zoom.min.js") + + # script to find zoomable svgs + script = """ + window.addEventListener('load', () => { + Array.from(document.getElementsByClassName('zoomable-callgraph')).forEach(function(element) { + svgPanZoom(element); + }); + }) + """ + + app.add_js_file(None, body=script) + + return { + "version": "0.1", + "parallel_read_safe": True, + "parallel_write_safe": True, + } From b8f63daed2a4bf09c483dd56ef426bbba12d7939 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 Jan 2021 15:37:15 +0000 Subject: [PATCH 100/117] Add git-ignore --- .gitignore | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2dc9904 --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +*.csv +*.pkl +*.joblib +*.msgpack +.DS_Store +.ipynb_checkpoints +.venv/ +Endpoint_test/ +run_simulator.py +__pycache__/ + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/source/api + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + + +# others +VERSION +coverage.xml +junit.xml +htmlcov + +# editors +.idea/ +.history/ +.vscode/ + From fdde88ec3a0a642f3352d05d3b5a18db21a62525 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Tue, 5 Jan 2021 15:53:16 +0000 Subject: [PATCH 101/117] Use recursive=True for glob --- modvis.py | 192 ++++++++++++++++++++++++++----------------- pyan/__init__.py | 4 +- pyan/main.py | 206 ++++++++++++++++------------------------------- 3 files changed, 189 insertions(+), 213 deletions(-) diff --git a/modvis.py b/modvis.py index e98fbba..d9300b1 100644 --- a/modvis.py +++ b/modvis.py @@ -11,24 +11,28 @@ from optparse import OptionParser # TODO: migrate to argparse import pyan.node import pyan.visgraph import pyan.writers + # from pyan.anutils import get_module_name + def filename_to_module_name(fullpath): # we need to see __init__, hence we don't use anutils.get_module_name. """'some/path/module.py' -> 'some.path.module'""" if not fullpath.endswith(".py"): raise ValueError("Expected a .py filename, got '{}'".format(fullpath)) rel = ".{}".format(os.path.sep) # ./ if fullpath.startswith(rel): - fullpath = fullpath[len(rel):] + fullpath = fullpath[len(rel) :] fullpath = fullpath[:-3] # remove .py - return fullpath.replace(os.path.sep, '.') + return fullpath.replace(os.path.sep, ".") + def split_module_name(m): """'fully.qualified.name' -> ('fully.qualified', 'name')""" - k = m.rfind('.') + k = m.rfind(".") if k == -1: return ("", m) - return (m[:k], m[(k + 1):]) + return (m[:k], m[(k + 1) :]) + # blacklist = (".git", "build", "dist", "test") # def find_py_files(basedir): @@ -43,6 +47,7 @@ def split_module_name(m): # py_files.append(fullpath) # return py_files + def resolve(current_module, target_module, level): """Return fully qualified name of the target_module in an import. @@ -72,11 +77,12 @@ def resolve(current_module, target_module, level): base = "" break base = base[:k] - return '.'.join((base, target_module)) + return ".".join((base, target_module)) + class ImportVisitor(ast.NodeVisitor): def __init__(self, filenames, logger): - self.modules = {} # modname: {dep0, dep1, ...} + self.modules = {} # modname: {dep0, dep1, ...} self.fullpaths = {} # modname: fullpath self.logger = logger self.analyze(filenames) @@ -115,14 +121,20 @@ class ImportVisitor(ast.NodeVisitor): self.logger.debug(" added possible implicit use of '{}'".format(possible_init)) def visit_Import(self, node): - self.logger.debug("{}:{}: Import {}".format(self.current_module, node.lineno, [alias.name for alias in node.names])) + self.logger.debug( + "{}:{}: Import {}".format(self.current_module, node.lineno, [alias.name for alias in node.names]) + ) for alias in node.names: self.add_dependency(alias.name) # alias.asname not relevant for our purposes def visit_ImportFrom(self, node): # from foo import some_symbol if node.module: - self.logger.debug("{}:{}: ImportFrom '{}', relative import level {}".format(self.current_module, node.lineno, node.module, node.level)) + self.logger.debug( + "{}:{}: ImportFrom '{}', relative import level {}".format( + self.current_module, node.lineno, node.module, node.level + ) + ) absname = resolve(self.current_module, node.module, node.level) if node.level > 0: self.logger.debug(" resolved relative import to '{}'".format(absname)) @@ -131,7 +143,11 @@ class ImportVisitor(ast.NodeVisitor): # from . import foo --> module = None; now the **names** refer to modules else: for alias in node.names: - self.logger.debug("{}:{}: ImportFrom '{}', target module '{}', relative import level {}".format(self.current_module, node.lineno, '.' * node.level, alias.name, node.level)) + self.logger.debug( + "{}:{}: ImportFrom '{}', target module '{}', relative import level {}".format( + self.current_module, node.lineno, "." * node.level, alias.name, node.level + ) + ) absname = resolve(self.current_module, alias.name, node.level) if node.level > 0: self.logger.debug(" resolved relative import to '{}'".format(absname)) @@ -147,6 +163,7 @@ class ImportVisitor(ast.NodeVisitor): the cyclic part (where the first and last elements are the same). """ cycles = [] + def walk(m, seen=None, trace=None): trace = (trace or []) + [m] seen = seen or set() @@ -158,6 +175,7 @@ class ImportVisitor(ast.NodeVisitor): for d in sorted(deps): if d in self.modules: walk(d, seen, trace) + for root in sorted(self.modules): walk(root) @@ -171,7 +189,7 @@ class ImportVisitor(ast.NodeVisitor): def prepare_graph(self): # same format as in pyan.analyzer """Postprocessing. Prepare data for pyan.visgraph for graph file generation.""" - self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) + self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) self.uses_edges = {} # we have no defines_edges, which doesn't matter as long as we don't enable that option in visgraph. @@ -188,11 +206,7 @@ class ImportVisitor(ast.NodeVisitor): # TODO: it could be useful to decide the hue by the top-level directory name # TODO: (after the './' if any), and lightness by the depth in each tree. # TODO: This would be most similar to how Pyan does it for functions/classes. - n = pyan.node.Node(namespace=ns, - name=mod, - ast_node=None, - filename=package, - flavor=pyan.node.Flavor.MODULE) + n = pyan.node.Node(namespace=ns, name=mod, ast_node=None, filename=package, flavor=pyan.node.Flavor.MODULE) n.defined = True # Pyan's analyzer.py allows several nodes to share the same short name, # which is used as the key to self.nodes; but we use the fully qualified @@ -218,68 +232,89 @@ class ImportVisitor(ast.NodeVisitor): for d in deps: assert d.get_name() in self.nodes + def main(): usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" - desc = ('Analyse one or more Python source files and generate an' - 'approximate module dependency graph.') + desc = "Analyse one or more Python source files and generate an" "approximate module dependency graph." parser = OptionParser(usage=usage, description=desc) - parser.add_option("--dot", - action="store_true", default=False, - help="output in GraphViz dot format") - parser.add_option("--tgf", - action="store_true", default=False, - help="output in Trivial Graph Format") - parser.add_option("--yed", - action="store_true", default=False, - help="output in yEd GraphML Format") - parser.add_option("-f", "--file", dest="filename", - help="write graph to FILE", metavar="FILE", default=None) - parser.add_option("-l", "--log", dest="logname", - help="write log to LOG", metavar="LOG") - parser.add_option("-v", "--verbose", - action="store_true", default=False, dest="verbose", - help="verbose output") - parser.add_option("-V", "--very-verbose", - action="store_true", default=False, dest="very_verbose", - help="even more verbose output (mainly for debug)") - parser.add_option("-c", "--colored", - action="store_true", default=False, dest="colored", - help="color nodes according to namespace [dot only]") - parser.add_option("-g", "--grouped", - action="store_true", default=False, dest="grouped", - help="group nodes (create subgraphs) according to namespace [dot only]") - parser.add_option("-e", "--nested-groups", - action="store_true", default=False, dest="nested_groups", - help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]") - parser.add_option("-C", "--cycles", - action="store_true", default=False, dest="cycles", - help="detect import cycles and print report to stdout") - parser.add_option("--dot-rankdir", default="TB", dest="rankdir", - help=( - "specifies the dot graph 'rankdir' property for " - "controlling the direction of the graph. " - "Allowed values: ['TB', 'LR', 'BT', 'RL']. " - "[dot only]")) - parser.add_option("-a", "--annotated", - action="store_true", default=False, dest="annotated", - help="annotate with module location") + parser.add_option("--dot", action="store_true", default=False, help="output in GraphViz dot format") + parser.add_option("--tgf", action="store_true", default=False, help="output in Trivial Graph Format") + parser.add_option("--yed", action="store_true", default=False, help="output in yEd GraphML Format") + parser.add_option("-f", "--file", dest="filename", help="write graph to FILE", metavar="FILE", default=None) + parser.add_option("-l", "--log", dest="logname", help="write log to LOG", metavar="LOG") + parser.add_option("-v", "--verbose", action="store_true", default=False, dest="verbose", help="verbose output") + parser.add_option( + "-V", + "--very-verbose", + action="store_true", + default=False, + dest="very_verbose", + help="even more verbose output (mainly for debug)", + ) + parser.add_option( + "-c", + "--colored", + action="store_true", + default=False, + dest="colored", + help="color nodes according to namespace [dot only]", + ) + parser.add_option( + "-g", + "--grouped", + action="store_true", + default=False, + dest="grouped", + help="group nodes (create subgraphs) according to namespace [dot only]", + ) + parser.add_option( + "-e", + "--nested-groups", + action="store_true", + default=False, + dest="nested_groups", + help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]", + ) + parser.add_option( + "-C", + "--cycles", + action="store_true", + default=False, + dest="cycles", + help="detect import cycles and print report to stdout", + ) + parser.add_option( + "--dot-rankdir", + default="TB", + dest="rankdir", + help=( + "specifies the dot graph 'rankdir' property for " + "controlling the direction of the graph. " + "Allowed values: ['TB', 'LR', 'BT', 'RL']. " + "[dot only]" + ), + ) + parser.add_option( + "-a", "--annotated", action="store_true", default=False, dest="annotated", help="annotate with module location" + ) options, args = parser.parse_args() - filenames = [fn2 for fn in args for fn2 in glob(fn)] + filenames = [fn2 for fn in args for fn2 in glob(fn, recursive=True)] if len(args) == 0: - parser.error('Need one or more filenames to process') + parser.error("Need one or more filenames to process") if options.nested_groups: options.grouped = True graph_options = { - 'draw_defines': False, # we have no defines edges - 'draw_uses': True, - 'colored': options.colored, - 'grouped_alt': False, - 'grouped': options.grouped, - 'nested_groups': options.nested_groups, - 'annotated': options.annotated} + "draw_defines": False, # we have no defines edges + "draw_uses": True, + "colored": options.colored, + "grouped_alt": False, + "grouped": options.grouped, + "nested_groups": options.nested_groups, + "annotated": options.annotated, + } # TODO: use an int argument for verbosity logger = logging.getLogger(__name__) @@ -333,10 +368,13 @@ def main(): for prefix, cycle in cycles: unique_cycles.add(tuple(cycle)) print("Detected the following import cycles (n_results={}).".format(len(unique_cycles))) + def stats(): lengths = [len(x) - 1 for x in unique_cycles] # number of modules in the cycle + def mean(lst): return sum(lst) / len(lst) + def median(lst): tmp = list(sorted(lst)) n = len(lst) @@ -344,8 +382,12 @@ def main(): return tmp[n // 2] # e.g. tmp[5] if n = 11 else: return (tmp[n // 2 - 1] + tmp[n // 2]) / 2 # e.g. avg of tmp[4] and tmp[5] if n = 10 + return min(lengths), mean(lengths), median(lengths), max(lengths) - print("Number of modules in a cycle: min = {}, average = {:0.2g}, median = {:0.2g}, max = {}".format(*stats())) + + print( + "Number of modules in a cycle: min = {}, average = {:0.2g}, median = {:0.2g}, max = {}".format(*stats()) + ) for c in sorted(unique_cycles): print(" {}".format(c)) @@ -364,18 +406,16 @@ def main(): graph = pyan.visgraph.VisualGraph.from_visitor(v, options=graph_options, logger=logger) if options.dot: - writer = pyan.writers.DotWriter(graph, - options=['rankdir=' + options.rankdir], - output=options.filename, - logger=logger) + writer = pyan.writers.DotWriter( + graph, options=["rankdir=" + options.rankdir], output=options.filename, logger=logger + ) if options.tgf: - writer = pyan.writers.TgfWriter( - graph, output=options.filename, logger=logger) + writer = pyan.writers.TgfWriter(graph, output=options.filename, logger=logger) if options.yed: - writer = pyan.writers.YedWriter( - graph, output=options.filename, logger=logger) + writer = pyan.writers.YedWriter(graph, output=options.filename, logger=logger) if make_graph: writer.run() -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/pyan/__init__.py b/pyan/__init__.py index a4d50e6..faba830 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -27,6 +27,7 @@ def create_callgraph( grouped_alt: bool = False, annotated: bool = False, grouped: bool = True, + max_iter: int = 1000, ) -> str: """ create callgraph based on static code analysis @@ -49,6 +50,7 @@ def create_callgraph( grouped_alt: if to use alternative grouping annotated: if to annotate graph with filenames grouped: if to group by modules + max_iter: maximum number of iterations for filtering. Defaults to 1000. Returns: str: callgraph @@ -77,7 +79,7 @@ def create_callgraph( node = v.get_node(function_namespace, function_name) else: node = None - v.filter(node=node, namespace=namespace) + v.filter(node=node, namespace=namespace, max_iter=max_iter) graph = VisualGraph.from_visitor(v, options=graph_options) stream = io.StringIO() diff --git a/pyan/main.py b/pyan/main.py index 5c59564..8dc54d8 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -17,159 +17,115 @@ from .analyzer import CallGraphVisitor from .visgraph import VisualGraph from .writers import TgfWriter, DotWriter, YedWriter, HTMLWriter, SVGWriter + def main(cli_args=None): usage = """%(prog)s FILENAME... [--dot|--tgf|--yed|--svg|--html]""" desc = ( - 'Analyse one or more Python source files and generate an' - 'approximate call graph of the modules, classes and functions' - ' within them.' + "Analyse one or more Python source files and generate an" + "approximate call graph of the modules, classes and functions" + " within them." ) parser = ArgumentParser(usage=usage, description=desc) - parser.add_argument( - "--dot", - action="store_true", - default=False, - help="output in GraphViz dot format" - ) + parser.add_argument("--dot", action="store_true", default=False, help="output in GraphViz dot format") + + parser.add_argument("--tgf", action="store_true", default=False, help="output in Trivial Graph Format") + + parser.add_argument("--svg", action="store_true", default=False, help="output in SVG Format") + + parser.add_argument("--html", action="store_true", default=False, help="output in HTML Format") + + parser.add_argument("--yed", action="store_true", default=False, help="output in yEd GraphML Format") + + parser.add_argument("--file", dest="filename", help="write graph to FILE", metavar="FILE", default=None) + + parser.add_argument("--namespace", dest="namespace", help="filter for NAMESPACE", metavar="NAMESPACE", default=None) + + parser.add_argument("--function", dest="function", help="filter for FUNCTION", metavar="FUNCTION", default=None) + + parser.add_argument("-l", "--log", dest="logname", help="write log to LOG", metavar="LOG") + + parser.add_argument("-v", "--verbose", action="store_true", default=False, dest="verbose", help="verbose output") parser.add_argument( - "--tgf", - action="store_true", - default=False, - help="output in Trivial Graph Format" - ) - - parser.add_argument( - "--svg", - action="store_true", - default=False, - help="output in SVG Format" - ) - - parser.add_argument( - "--html", - action="store_true", - default=False, - help="output in HTML Format" - ) - - parser.add_argument( - "--yed", - action="store_true", - default=False, - help="output in yEd GraphML Format" - ) - - parser.add_argument( - "--file", - dest="filename", - help="write graph to FILE", - metavar="FILE", - default=None - ) - - parser.add_argument( - "--namespace", - dest="namespace", - help="filter for NAMESPACE", - metavar="NAMESPACE", - default=None - ) - - parser.add_argument( - "--function", - dest="function", - help="filter for FUNCTION", - metavar="FUNCTION", - default=None - ) - - parser.add_argument( - "-l", "--log", - dest="logname", - help="write log to LOG", - metavar="LOG" - ) - - parser.add_argument( - "-v", "--verbose", - action="store_true", - default=False, - dest="verbose", - help="verbose output" - ) - - parser.add_argument( - "-V", "--very-verbose", + "-V", + "--very-verbose", action="store_true", default=False, dest="very_verbose", - help="even more verbose output (mainly for debug)" + help="even more verbose output (mainly for debug)", ) parser.add_argument( - "-d", "--defines", + "-d", + "--defines", action="store_true", dest="draw_defines", - help="add edges for 'defines' relationships [default]" + help="add edges for 'defines' relationships [default]", ) parser.add_argument( - "-n", "--no-defines", + "-n", + "--no-defines", action="store_false", default=True, dest="draw_defines", - help="do not add edges for 'defines' relationships" + help="do not add edges for 'defines' relationships", ) parser.add_argument( - "-u", "--uses", + "-u", + "--uses", action="store_true", default=True, dest="draw_uses", - help="add edges for 'uses' relationships [default]" + help="add edges for 'uses' relationships [default]", ) parser.add_argument( - "-N", "--no-uses", + "-N", + "--no-uses", action="store_false", default=True, dest="draw_uses", - help="do not add edges for 'uses' relationships" + help="do not add edges for 'uses' relationships", ) parser.add_argument( - "-c", "--colored", + "-c", + "--colored", action="store_true", default=False, dest="colored", - help="color nodes according to namespace [dot only]" + help="color nodes according to namespace [dot only]", ) parser.add_argument( - "-G", "--grouped-alt", + "-G", + "--grouped-alt", action="store_true", default=False, dest="grouped_alt", - help="suggest grouping by adding invisible defines edges [only useful with --no-defines]" + help="suggest grouping by adding invisible defines edges [only useful with --no-defines]", ) parser.add_argument( - "-g", "--grouped", + "-g", + "--grouped", action="store_true", default=False, dest="grouped", - help="group nodes (create subgraphs) according to namespace [dot only]" + help="group nodes (create subgraphs) according to namespace [dot only]", ) parser.add_argument( - "-e", "--nested-groups", + "-e", + "--nested-groups", action="store_true", default=False, dest="nested_groups", - help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]" + help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]", ) parser.add_argument( @@ -181,37 +137,38 @@ def main(cli_args=None): "controlling the direction of the graph. " "Allowed values: ['TB', 'LR', 'BT', 'RL']. " "[dot only]" - ) + ), ) parser.add_argument( - "-a", "--annotated", + "-a", + "--annotated", action="store_true", default=False, dest="annotated", - help="annotate with module and source line number" + help="annotate with module and source line number", ) known_args, unknown_args = parser.parse_known_args(cli_args) - filenames = [fn2 for fn in unknown_args for fn2 in glob(fn)] + filenames = [fn2 for fn in unknown_args for fn2 in glob(fn, recursive=True)] if len(unknown_args) == 0: - parser.error('Need one or more filenames to process') + parser.error("Need one or more filenames to process") elif len(filenames) == 0: - parser.error('No files found matching given glob: %s' % ' '.join(unknown_args)) + parser.error("No files found matching given glob: %s" % " ".join(unknown_args)) if known_args.nested_groups: known_args.grouped = True graph_options = { - 'draw_defines': known_args.draw_defines, - 'draw_uses': known_args.draw_uses, - 'colored': known_args.colored, - 'grouped_alt': known_args.grouped_alt, - 'grouped': known_args.grouped, - 'nested_groups': known_args.nested_groups, - 'annotated': known_args.annotated + "draw_defines": known_args.draw_defines, + "draw_uses": known_args.draw_uses, + "colored": known_args.colored, + "grouped_alt": known_args.grouped_alt, + "grouped": known_args.grouped, + "nested_groups": known_args.nested_groups, + "annotated": known_args.annotated, } # TODO: use an int argument for verbosity @@ -251,46 +208,23 @@ def main(cli_args=None): writer = None if known_args.dot: - writer = DotWriter( - graph, - options=['rankdir=' + known_args.rankdir], - output=known_args.filename, - logger=logger - ) + writer = DotWriter(graph, options=["rankdir=" + known_args.rankdir], output=known_args.filename, logger=logger) if known_args.html: - writer = HTMLWriter( - graph, - options=['rankdir=' + known_args.rankdir], - output=known_args.filename, - logger=logger - ) + writer = HTMLWriter(graph, options=["rankdir=" + known_args.rankdir], output=known_args.filename, logger=logger) if known_args.svg: - writer = SVGWriter( - graph, - options=['rankdir=' + known_args.rankdir], - output=known_args.filename, - logger=logger - ) + writer = SVGWriter(graph, options=["rankdir=" + known_args.rankdir], output=known_args.filename, logger=logger) if known_args.tgf: - writer = TgfWriter( - graph, - output=known_args.filename, - logger=logger - ) + writer = TgfWriter(graph, output=known_args.filename, logger=logger) if known_args.yed: - writer = YedWriter( - graph, - output=known_args.filename, - logger=logger - ) + writer = YedWriter(graph, output=known_args.filename, logger=logger) if writer: writer.run() -if __name__ == '__main__': +if __name__ == "__main__": main() From 498b01606a6f94e42755c010e8e4a6e49b41ce8f Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 13:46:40 +0000 Subject: [PATCH 102/117] Add link to gitignore origin --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2dc9904..ba62d7b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +# based on https://github.com/github/gitignore/blob/master/Python.gitignore *.csv *.pkl *.joblib From 6ba85d9a1a6a0b02795c9e571ce31bcd37398d93 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 13:57:11 +0000 Subject: [PATCH 103/117] fix readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b1c5dde..098f204 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ Install graphviz (e.g. via `sudo apt-get install graphviz`) and modify `source/c # modify extensions extensions = [ ... - "sphinx.ext.graphviz + "sphinx.ext.graphviz" "pyan.sphinx", ] From 2902f5b79b44b7525e7842bc5d2b104132fbc05e Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 13:59:40 +0000 Subject: [PATCH 104/117] Reformat code with black and isort --- .pre-commit-config.yaml | 20 ++ Untitled.ipynb | 231 ++++++++++++++++++++ modvis.py | 4 +- pyan/__init__.py | 8 +- pyan/analyzer.py | 253 +++++++++++++--------- pyan/anutils.py | 39 +++- pyan/main.py | 6 +- pyan/node.py | 29 ++- pyan/sphinx.py | 3 +- pyan/visgraph.py | 107 ++++----- pyan/writers.py | 190 +++++++--------- pyproject.toml | 26 +++ setup.cfg | 24 ++ setup.py | 57 +++-- tests/old_tests/issue3/testi.py | 17 +- tests/old_tests/issue5/meas_xrd.py | 9 +- tests/old_tests/issue5/plot_xrd.py | 21 +- tests/old_tests/issue5/relimport.py | 5 +- tests/test_analyzer.py | 5 +- tests/test_code/submodule1.py | 6 +- tests/test_code/submodule2.py | 6 +- tests/test_code/subpackage1/__init__.py | 2 +- tests/test_code/subpackage1/submodule1.py | 5 +- tests/test_code/subpackage2/submodule1.py | 2 + 24 files changed, 697 insertions(+), 378 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 Untitled.ipynb create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100644 tests/test_code/subpackage2/submodule1.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..42391e5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - repo: https://gitlab.com/pycqa/flake8 + rev: "" + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/mirrors-isort + rev: v5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..5941ec3 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../qvc/pricing/pricing/data/assets.py']\n" + ] + } + ], + "source": [ + "import pyan as p\n", + "from glob import glob\n", + "import importlib\n", + "importlib.reload(p)\n", + "\n", + "filenames = glob(f\"../qvc/pricing/pricing/data/assets.py\", recursive=True)\n", + "print(filenames)\n", + "import logging\n", + "logging.basicConfig(level=logging.ERROR)\n", + "visitor = p.analyzer.CallGraphVisitor(filenames, logging.getLogger())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_related_nodes(visitor, node, namespace=\"pricing\", i=10):\n", + " new_nodes = [node]\n", + " if i < 0:\n", + " return new_nodes\n", + "\n", + " for n in visitor.uses_edges.get(node, []):\n", + " if n in visitor.uses_edges and n not in new_nodes and n.namespace.startswith(namespace):\n", + " new_nodes.extend(get_related_nodes(visitor, n, namespace=namespace, i=i - 1))\n", + "\n", + " for n in visitor.defines_edges.get(node, []):\n", + " if n in visitor.defines_edges and n not in new_nodes and n.namespace.startswith(namespace):\n", + " new_nodes.extend(get_related_nodes(visitor, n, namespace=namespace, i=i - 1))\n", + " return new_nodes\n", + "\n", + "node = [\n", + " n\n", + " for n in visitor.uses_edges.keys()\n", + " if repr(n.flavor) == \"function\" and n.namespace.startswith(\"pricing.data.assets\")\n", + " ][1]\n", + "node\n", + "get_related_nodes(visitor, node)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "node" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{: None,\n", + " : None,\n", + " : None,\n", + " : None,\n", + " : None,\n", + " : None}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{n: n.namespace for n in visitor.uses_edges[node]}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m{\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamespace\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefines_edges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m: " + ] + } + ], + "source": [ + "{n: n.namespace for n in visitor.defines_edges[node]}" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'func': <_ast.Attribute object at 0x7fc0e18d7048>, 'args': [<_ast.Name object at 0x7fc0e18d70b8>], 'keywords': [], 'lineno': 285, 'col_offset': 8}\n" + ] + } + ], + "source": [ + "def print_func(f):\n", + " if isinstance(f, list):\n", + " for s in f:\n", + " print_func(s)\n", + " else:\n", + " print(f.__dict__)\n", + "print_func(node.ast_node.body[2].value.func.value)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['../qvc/pricing/pricing/data/assets.py']" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "node.ast_node" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvisitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_node_of_current_namespace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Documents/Github/pyan/pyan/analyzer.py\u001b[0m in \u001b[0;36mget_node_of_current_namespace\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1105\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mno\u001b[0m \u001b[0massociated\u001b[0m \u001b[0mAST\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1106\u001b[0m \"\"\"\n\u001b[0;32m-> 1107\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_stack\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# name_stack should never be empty (always at least module name)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1109\u001b[0m \u001b[0mnamespace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'.'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_stack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "visitor.get_node_of_current_namespace()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/modvis.py b/modvis.py index d9300b1..e048b7f 100644 --- a/modvis.py +++ b/modvis.py @@ -3,10 +3,10 @@ """A simple import analyzer. Visualize dependencies between modules.""" import ast -import os -import logging from glob import glob +import logging from optparse import OptionParser # TODO: migrate to argparse +import os import pyan.node import pyan.visgraph diff --git a/pyan/__init__.py b/pyan/__init__.py index faba830..2f76619 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from typing import Union, List -import io from glob import glob +import io +from typing import List, Union -from .main import main # noqa: F401, for export only. from .analyzer import CallGraphVisitor -from .writers import SVGWriter, HTMLWriter, DotWriter +from .main import main # noqa: F401, for export only. from .visgraph import VisualGraph +from .writers import DotWriter, HTMLWriter, SVGWriter __version__ = "1.1.2" diff --git a/pyan/analyzer.py b/pyan/analyzer.py index df112bf..fc83874 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -2,16 +2,23 @@ # -*- coding: utf-8 -*- """The AST visitor.""" -import logging import ast +import logging import symtable from typing import Union -from .node import Node, Flavor -from .anutils import tail, get_module_name, format_alias, \ - get_ast_node_name, sanitize_exprs, \ - resolve_method_resolution_order, \ - Scope, ExecuteInInnerScope, UnresolvedSuperCallError +from .anutils import ( + ExecuteInInnerScope, + Scope, + UnresolvedSuperCallError, + format_alias, + get_ast_node_name, + get_module_name, + resolve_method_resolution_order, + sanitize_exprs, + tail, +) +from .node import Flavor, Node # TODO: add Cython support (strip type annotations in a preprocess step, then treat as Python) # TODO: built-in functions (range(), enumerate(), zip(), iter(), ...): @@ -33,6 +40,8 @@ from .anutils import tail, get_module_name, format_alias, \ # https://docs.python.org/2/library/compiler.html#module-compiler.ast # https://docs.python.org/3/library/ast.html#abstract-grammar # + + class CallGraphVisitor(ast.NodeVisitor): """A visitor that can be walked over a Python AST, and will derive information about the objects in the AST and how they use each other. @@ -55,7 +64,7 @@ class CallGraphVisitor(ast.NodeVisitor): # data gathered from analysis self.defines_edges = {} self.uses_edges = {} - self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) + self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) self.scopes = {} # fully qualified name of namespace: Scope object self.class_base_ast_nodes = {} # pass 1: class Node: list of AST nodes @@ -87,7 +96,10 @@ class CallGraphVisitor(ast.NodeVisitor): def process_one(self, filename): """Analyze the specified Python source file.""" if filename not in self.filenames: - raise ValueError("Filename '%s' has not been preprocessed (was not given to __init__, which got %s)" % (filename, self.filenames)) + raise ValueError( + "Filename '%s' has not been preprocessed (was not given to __init__, which got %s)" + % (filename, self.filenames) + ) with open(filename, "rt", encoding="utf-8") as f: content = f.read() self.filename = filename @@ -166,12 +178,7 @@ class CallGraphVisitor(ast.NodeVisitor): """ # first find all imports and map to themselves. we will then remap those that are currently pointing # to duplicates or into the void - imports_to_resolve = { - n - for items in self.nodes.values() - for n in items - if n.flavor == Flavor.IMPORTEDITEM - } + imports_to_resolve = {n for items in self.nodes.values() for n in items if n.flavor == Flavor.IMPORTEDITEM} # map real definitions import_mapping = {} while len(imports_to_resolve) > 0: @@ -218,21 +225,16 @@ class CallGraphVisitor(ast.NodeVisitor): import_mapping.update(attribute_import_mapping) # remap nodes based on import mapping - self.nodes = { - name: [import_mapping.get(n, n) for n in items] - for name, items in self.nodes.items() - } + self.nodes = {name: [import_mapping.get(n, n) for n in items] for name, items in self.nodes.items()} self.uses_edges = { - import_mapping.get(from_node, from_node): { - import_mapping.get(to_node, to_node) for to_node in to_nodes - } - for from_node, to_nodes in self.uses_edges.items() if len(to_nodes) > 0 + import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes} + for from_node, to_nodes in self.uses_edges.items() + if len(to_nodes) > 0 } self.defines_edges = { - import_mapping.get(from_node, from_node): { - import_mapping.get(to_node, to_node) for to_node in to_nodes - } - for from_node, to_nodes in self.defines_edges.items() if len(to_nodes) > 0 + import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes} + for from_node, to_nodes in self.defines_edges.items() + if len(to_nodes) > 0 } def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000): @@ -251,9 +253,7 @@ class CallGraphVisitor(ast.NodeVisitor): # filter the nodes to avoid cluttering the callgraph with irrelevant information filtered_nodes = self.get_related_nodes(node, namespace=namespace, max_iter=max_iter) - self.nodes = { - name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items() - } + self.nodes = {name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items()} self.uses_edges = { node: {n for n in nodes if n in filtered_nodes} for node, nodes in self.uses_edges.items() @@ -266,7 +266,9 @@ class CallGraphVisitor(ast.NodeVisitor): } return self - def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000) -> set: + def get_related_nodes( + self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000 + ) -> set: """ get nodes that related to `node` or are in `namespace` @@ -286,7 +288,9 @@ class CallGraphVisitor(ast.NodeVisitor): new_nodes = {n for items in self.nodes.values() for n in items} else: new_nodes = { - n for items in self.nodes.values() for n in items + n + for items in self.nodes.values() + for n in items if n.namespace is not None and namespace in n.namespace } @@ -318,9 +322,7 @@ class CallGraphVisitor(ast.NodeVisitor): [ n for n in self.defines_edges.get(item, []) - if n in self.defines_edges - and n not in new_nodes - and namespace in n.namespace + if n in self.defines_edges and n not in new_nodes and namespace in n.namespace ] ) @@ -330,7 +332,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.logger.debug("Module %s, %s" % (self.module_name, self.filename)) # Modules live in the top-level namespace, ''. - module_node = self.get_node('', self.module_name, node, flavor=Flavor.MODULE) + module_node = self.get_node("", self.module_name, node, flavor=Flavor.MODULE) self.associate_node(module_node, node, filename=self.filename) ns = self.module_name @@ -487,13 +489,13 @@ class CallGraphVisitor(ast.NodeVisitor): # is not a valid Python identifier. # # It has no sensible flavor, so we leave its flavor unspecified. - nonsense_node = self.get_node(inner_ns, '^^^argument^^^', None) + nonsense_node = self.get_node(inner_ns, "^^^argument^^^", None) # args, vararg (*args), kwonlyargs, kwarg (**kwargs) for a in ast_args.args: # positional sc.defs[a.arg] = nonsense_node if ast_args.vararg is not None: # *args if present sc.defs[ast_args.vararg] = nonsense_node - for a in ast_args.kwonlyargs: # any after *args or * + for a in ast_args.kwonlyargs: # any after *args or * sc.defs[a.arg] = nonsense_node if ast_args.kwarg is not None: # **kwargs if present sc.defs[ast_args.kwarg] = nonsense_node @@ -529,7 +531,10 @@ class CallGraphVisitor(ast.NodeVisitor): self.analyze_module_import(import_item, node) def visit_ImportFrom(self, node): - self.logger.debug("ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom: from %s import %s, %s:%s" + % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) # Pyan needs to know the package structure, and how the program # being analyzed is actually going to be invoked (!), to be able to # resolve relative imports correctly. @@ -537,17 +542,29 @@ class CallGraphVisitor(ast.NodeVisitor): # As a solution, we register imports here and later, when all files have been parsed, resolve them. from_node = self.get_node_of_current_namespace() if node.module is None: # resolve relative imports 'None' such as "from . import foo" - self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom (original) from %s import %s, %s:%s" + % ("." * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) tgt_level = node.level - current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] + current_module_namespace = self.module_name.rsplit(".", tgt_level)[0] tgt_name = current_module_namespace - self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom (resolved): from %s import %s, %s:%s" + % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) elif node.level != 0: # resolve from ..module import foo - self.logger.debug("ImportFrom (original): from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom (original): from %s import %s, %s:%s" + % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) tgt_level = node.level - current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] - tgt_name = current_module_namespace + '.' + node.module - self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + current_module_namespace = self.module_name.rsplit(".", tgt_level)[0] + tgt_name = current_module_namespace + "." + node.module + self.logger.debug( + "ImportFrom (resolved): from %s import %s, %s:%s" + % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) else: tgt_name = node.module # normal from module.submodule import foo @@ -555,11 +572,9 @@ class CallGraphVisitor(ast.NodeVisitor): for alias in node.names: # check if import is module if tgt_name + "." + alias.name in self.module_to_filename: - to_node = self.get_node('', tgt_name + "." + alias.name, node, flavor=Flavor.MODULE) + to_node = self.get_node("", tgt_name + "." + alias.name, node, flavor=Flavor.MODULE) else: - to_node = self.get_node( - tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM - ) + to_node = self.get_node(tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM) # if there is alias, add extra edge between alias and node if alias.asname is not None: alias_name = alias.asname @@ -587,17 +602,14 @@ class CallGraphVisitor(ast.NodeVisitor): # where it is being imported to, i.e. the **user** from_node = self.get_node_of_current_namespace() # the thing **being used** (under the asname, if any) - mod_node = self.get_node('', src_name, ast_node, flavor=Flavor.MODULE) + mod_node = self.get_node("", src_name, ast_node, flavor=Flavor.MODULE) # if there is alias, add extra edge between alias and node if import_item.asname is not None: alias_name = import_item.asname else: alias_name = mod_node.name self.add_uses_edge(from_node, mod_node) - self.logger.info( - "New edge added for Use import %s in %s" - % (mod_node, from_node) - ) + self.logger.info("New edge added for Use import %s in %s" % (mod_node, from_node)) self.set_value(alias_name, mod_node) # set node to be discoverable in module self.logger.info("From setting name %s to %s" % (alias_name, mod_node)) @@ -620,7 +632,9 @@ class CallGraphVisitor(ast.NodeVisitor): # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Attribute(self, node): objname = get_ast_node_name(node.value) - self.logger.debug("Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno)) + self.logger.debug( + "Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno) + ) # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Attribute() only needs to handle @@ -630,7 +644,7 @@ class CallGraphVisitor(ast.NodeVisitor): new_value = self.last_value try: if self.set_attribute(node, new_value): - self.logger.info('setattr %s on %s to %s' % (node.attr, objname, new_value)) + self.logger.info("setattr %s on %s to %s" % (node.attr, objname, new_value)) except UnresolvedSuperCallError: # Trying to set something belonging to an unresolved super() # of something; just ignore this attempt to setattr. @@ -646,7 +660,7 @@ class CallGraphVisitor(ast.NodeVisitor): # Both object and attr known. if isinstance(attr_node, Node): - self.logger.info('getattr %s on %s returns %s' % (node.attr, objname, attr_node)) + self.logger.info("getattr %s on %s returns %s" % (node.attr, objname, attr_node)) # add uses edge from_node = self.get_node_of_current_namespace() @@ -680,9 +694,15 @@ class CallGraphVisitor(ast.NodeVisitor): from_node = self.get_node_of_current_namespace() ns = obj_node.get_name() # fully qualified namespace **of attr** to_node = self.get_node(ns, tgt_name, node, flavor=Flavor.ATTRIBUTE) - self.logger.debug("Use from %s to %s (target obj %s known but target attr %s not resolved; maybe fwd ref or unanalyzed import)" % (from_node, to_node, obj_node, node.attr)) + self.logger.debug( + f"Use from {from_node} to {to_node} (target obj {obj_node} known but target attr " + f"{node.attr} not resolved; maybe fwd ref or unanalyzed import)" + ) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (target obj %s known but target attr %s not resolved; maybe fwd ref or unanalyzed import)" % (from_node, to_node, obj_node, node.attr)) + self.logger.info( + "New edge added for Use from {from_node} to {to_node} (target obj {obj_node} known but " + f"target attr {node.attr} not resolved; maybe fwd ref or unanalyzed import)" + ) # remove resolved wildcard from current site to self.remove_wild(from_node, obj_node, node.attr) @@ -700,7 +720,6 @@ class CallGraphVisitor(ast.NodeVisitor): # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Name() only needs to handle # the load context (i.e. detect uses of the name). - # if isinstance(node.ctx, ast.Store): # when we get here, self.last_value has been set by visit_Assign() self.set_value(node.id, self.last_value) @@ -711,8 +730,8 @@ class CallGraphVisitor(ast.NodeVisitor): to_node = self.get_value(tgt_name) # resolves "self" if needed current_class = self.get_current_class() if current_class is None or to_node is not current_class: # add uses edge only if not pointing to "self" - ###TODO if the name is a local variable (i.e. in the innermost scope), and - ###has no known value, then don't try to create a Node for it. + # TODO if the name is a local variable (i.e. in the innermost scope), and + # has no known value, then don't try to create a Node for it. if not isinstance(to_node, Node): # namespace=None means we don't know the namespace yet to_node = self.get_node(None, tgt_name, node, flavor=Flavor.UNKNOWN) @@ -738,9 +757,15 @@ class CallGraphVisitor(ast.NodeVisitor): values = sanitize_exprs(node.value) # values is the same for each set of targets for targets in node.targets: targets = sanitize_exprs(targets) - self.logger.debug("Assign %s %s, %s:%s" % ([get_ast_node_name(x) for x in targets], - [get_ast_node_name(x) for x in values], - self.filename, node.lineno)) + self.logger.debug( + "Assign %s %s, %s:%s" + % ( + [get_ast_node_name(x) for x in targets], + [get_ast_node_name(x) for x in values], + self.filename, + node.lineno, + ) + ) self.analyze_binding(targets, values) def visit_AnnAssign(self, node): # PEP 526, Python 3.6+ @@ -748,13 +773,15 @@ class CallGraphVisitor(ast.NodeVisitor): self.last_value = None if node.value is not None: value = sanitize_exprs(node.value) - self.logger.debug("AnnAssign %s %s, %s:%s" % (get_ast_node_name(target[0]), - get_ast_node_name(value[0]), - self.filename, node.lineno)) + self.logger.debug( + "AnnAssign %s %s, %s:%s" + % (get_ast_node_name(target[0]), get_ast_node_name(value[0]), self.filename, node.lineno) + ) self.analyze_binding(target, value) else: # just a type declaration - self.logger.debug("AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), - self.filename, node.lineno)) + self.logger.debug( + "AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), self.filename, node.lineno) + ) self.last_value = None self.visit(target[0]) # TODO: use the type annotation from node.annotation? @@ -764,10 +791,16 @@ class CallGraphVisitor(ast.NodeVisitor): targets = sanitize_exprs(node.target) values = sanitize_exprs(node.value) # values is the same for each set of targets - self.logger.debug("AugAssign %s %s %s, %s:%s" % ([get_ast_node_name(x) for x in targets], - type(node.op), - [get_ast_node_name(x) for x in values], - self.filename, node.lineno)) + self.logger.debug( + "AugAssign %s %s %s, %s:%s" + % ( + [get_ast_node_name(x) for x in targets], + type(node.op), + [get_ast_node_name(x) for x in values], + self.filename, + node.lineno, + ) + ) # TODO: maybe no need to handle tuple unpacking in AugAssign? (but simpler to use the same implementation) self.analyze_binding(targets, values) @@ -849,8 +882,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit(getattr(node, field2)) def visit_Call(self, node): - self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), - self.filename, node.lineno)) + self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), self.filename, node.lineno)) # visit args to detect uses for arg in node.args: @@ -871,7 +903,9 @@ class CallGraphVisitor(ast.NodeVisitor): to_node = result_node self.logger.debug("Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)) + self.logger.info( + "New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node) + ) else: # generic function call # Visit the function name part last, so that inside a binding form, @@ -891,10 +925,12 @@ class CallGraphVisitor(ast.NodeVisitor): if self.last_value in self.class_base_ast_nodes: from_node = self.get_node_of_current_namespace() class_node = self.last_value - to_node = self.get_node(class_node.get_name(), '__init__', None, flavor=Flavor.METHOD) + to_node = self.get_node(class_node.get_name(), "__init__", None, flavor=Flavor.METHOD) self.logger.debug("Use from %s to %s (call creates an instance)" % (from_node, to_node)) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node)) + self.logger.info( + "New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node) + ) def visit_With(self, node): self.logger.debug("With (context manager), %s:%s" % (self.filename, node.lineno)) @@ -906,7 +942,7 @@ class CallGraphVisitor(ast.NodeVisitor): withed_obj_node = graph_node self.logger.debug("Use from %s to With %s" % (from_node, withed_obj_node)) - for methodname in ('__enter__', '__exit__'): + for methodname in ("__enter__", "__exit__"): to_node = self.get_node(withed_obj_node.get_name(), methodname, None, flavor=Flavor.METHOD) if self.add_uses_edge(from_node, to_node): self.logger.info("New edge added for Use from %s to %s" % (from_node, to_node)) @@ -1036,13 +1072,13 @@ class CallGraphVisitor(ast.NodeVisitor): self.last_value = None for tgt, val in zip(targets, captured_values): self.last_value = val - self.visit(tgt) # LHS, name in a store context + self.visit(tgt) # LHS, name in a store context self.last_value = None else: # FIXME: for now, do the wrong thing in the non-trivial case # old code, no tuple unpacking support for value in values: self.visit(value) # set self.last_value to **something** on the RHS and hope for the best - for tgt in targets: # LHS, name in a store context + for tgt in targets: # LHS, name in a store context self.visit(tgt) self.last_value = None @@ -1107,7 +1143,10 @@ class CallGraphVisitor(ast.NodeVisitor): # build a temporary ast.Attribute AST node so that we can use get_attribute() tmp_astnode = ast.Attribute(value=obj_astnode, attr=attrname, ctx=obj_astnode.ctx) obj_node, attr_node = self.get_attribute(tmp_astnode) - self.logger.debug("Resolve %s() of %s: returning attr node %s" % (funcname, get_ast_node_name(obj_astnode), attr_node)) + self.logger.debug( + "Resolve %s() of %s: returning attr node %s" + % (funcname, get_ast_node_name(obj_astnode), attr_node) + ) return attr_node # add implementations for other built-in funcnames here if needed @@ -1127,8 +1166,9 @@ class CallGraphVisitor(ast.NodeVisitor): if not isinstance(ast_node, ast.Attribute): raise TypeError("Expected ast.Attribute; got %s" % (type(ast_node))) - self.logger.debug("Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), - ast_node.attr, type(ast_node.ctx))) + self.logger.debug( + "Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), ast_node.attr, type(ast_node.ctx)) + ) # Resolve nested attributes # @@ -1173,7 +1213,7 @@ class CallGraphVisitor(ast.NodeVisitor): # The CLASS flavor is the best match, as these constants # are object types. # - obj_node = self.get_node('', tn, None, flavor=Flavor.CLASS) + obj_node = self.get_node("", tn, None, flavor=Flavor.CLASS) # attribute of a function call. Detect cases like super().dostuff() elif isinstance(ast_node.value, ast.Call): @@ -1210,12 +1250,14 @@ class CallGraphVisitor(ast.NodeVisitor): # in different scopes, as we should). # scopes = {} + def process(parent_ns, table): sc = Scope(table) ns = "%s.%s" % (parent_ns, sc.name) if len(sc.name) else parent_ns scopes[ns] = sc for t in table.get_children(): process(ns, t) + process(self.module_name, symtable.symtable(code, filename, compile_type="exec")) # add to existing scopes (while not overwriting any existing definitions with None) @@ -1247,7 +1289,7 @@ class CallGraphVisitor(ast.NodeVisitor): """ assert len(self.name_stack) # name_stack should never be empty (always at least module name) - namespace = '.'.join(self.name_stack[0:-1]) + namespace = ".".join(self.name_stack[0:-1]) name = self.name_stack[-1] return self.get_node(namespace, name, None, flavor=Flavor.NAMESPACE) @@ -1268,13 +1310,15 @@ class CallGraphVisitor(ast.NodeVisitor): if sc is not None: value = sc.defs[name] if isinstance(value, Node): - self.logger.info('Get %s in %s, found in %s, value %s' % (name, self.scope_stack[-1], sc, value)) + self.logger.info("Get %s in %s, found in %s, value %s" % (name, self.scope_stack[-1], sc, value)) return value else: # TODO: should always be a Node or None - self.logger.debug('Get %s in %s, found in %s: value %s is not a Node' % (name, self.scope_stack[-1], sc, value)) + self.logger.debug( + "Get %s in %s, found in %s: value %s is not a Node" % (name, self.scope_stack[-1], sc, value) + ) else: - self.logger.debug('Get %s in %s: no Node value (or name not in scope)' % (name, self.scope_stack[-1])) + self.logger.debug("Get %s in %s: no Node value (or name not in scope)" % (name, self.scope_stack[-1])) def set_value(self, name, value): """Set the value of name in the current scope. Value must be a Node.""" @@ -1289,12 +1333,12 @@ class CallGraphVisitor(ast.NodeVisitor): if sc is not None: if isinstance(value, Node): sc.defs[name] = value - self.logger.info('Set %s in %s to %s' % (name, sc, value)) + self.logger.info("Set %s in %s to %s" % (name, sc, value)) else: # TODO: should always be a Node or None - self.logger.debug('Set %s in %s: value %s is not a Node' % (name, sc, value)) + self.logger.debug("Set %s in %s: value %s is not a Node" % (name, sc, value)) else: - self.logger.debug('Set: name %s not in scope' % (name)) + self.logger.debug("Set: name %s not in scope" % (name)) ########################################################################### # Attribute getter and setter @@ -1451,10 +1495,10 @@ class CallGraphVisitor(ast.NodeVisitor): def get_parent_node(self, graph_node): """Get the parent node of the given Node. (Used in postprocessing.)""" - if '.' in graph_node.namespace: - ns, name = graph_node.namespace.rsplit('.', 1) + if "." in graph_node.namespace: + ns, name = graph_node.namespace.rsplit(".", 1) else: - ns, name = '', graph_node.namespace + ns, name = "", graph_node.namespace return self.get_node(ns, name, None) def associate_node(self, graph_node, ast_node, filename=None): @@ -1667,17 +1711,26 @@ class CallGraphVisitor(ast.NodeVisitor): n.defined = False def cull_inherited(self): - """For each use edge from W to X.name, if it also has an edge to W to Y.name where Y is used by X, then remove the first edge.""" + """ + For each use edge from W to X.name, if it also has an edge to W to Y.name where + Y is used by X, then remove the first edge. + """ removed_uses_edges = [] for n in self.uses_edges: for n2 in self.uses_edges[n]: inherited = False for n3 in self.uses_edges[n]: - if n3.name == n2.name and n2.namespace is not None and n3.namespace is not None and n3.namespace != n2.namespace: + if ( + n3.name == n2.name + and n2.namespace is not None + and n3.namespace is not None + and n3.namespace != n2.namespace + ): pn2 = self.get_parent_node(n2) pn3 = self.get_parent_node(n3) - # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) + # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: + # remove the second edge W to Y.name (TODO: add an option to choose this) if pn2 in self.uses_edges and pn3 in self.uses_edges[pn2]: # remove the first edge W to X.name inherited = True @@ -1698,7 +1751,7 @@ class CallGraphVisitor(ast.NodeVisitor): # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) # temporary solution is adding list to force a copy of 'self.nodes' for name in list(self.nodes): - if name in ('lambda', 'listcomp', 'setcomp', 'dictcomp', 'genexpr'): + if name in ("lambda", "listcomp", "setcomp", "dictcomp", "genexpr"): for n in self.nodes[name]: pn = self.get_parent_node(n) if n in self.uses_edges: diff --git a/pyan/anutils.py b/pyan/anutils.py index 9cd1bb4..3edd08a 100644 --- a/pyan/anutils.py +++ b/pyan/anutils.py @@ -2,29 +2,33 @@ # -*- coding: utf-8 -*- """Utilities for analyzer.""" -import os.path import ast +import os.path + from .node import Flavor + def head(lst): if len(lst): return lst[0] + def tail(lst): if len(lst) > 1: return lst[1:] else: return [] + def get_module_name(filename): """Try to determine the full module name of a source file, by figuring out if its directory looks like a package (i.e. has an __init__.py file).""" - if os.path.basename(filename) == '__init__.py': + if os.path.basename(filename) == "__init__.py": return get_module_name(os.path.dirname(filename)) - init_path = os.path.join(os.path.dirname(filename), '__init__.py') - mod_name = os.path.basename(filename).replace('.py', '') + init_path = os.path.join(os.path.dirname(filename), "__init__.py") + mod_name = os.path.basename(filename).replace(".py", "") if not os.path.exists(init_path): return mod_name @@ -36,7 +40,8 @@ def get_module_name(filename): if not os.path.dirname(filename): return mod_name - return get_module_name(os.path.dirname(filename)) + '.' + mod_name + return get_module_name(os.path.dirname(filename)) + "." + mod_name + def format_alias(x): """Return human-readable description of an ast.alias (used in Import and ImportFrom nodes).""" @@ -48,6 +53,7 @@ def format_alias(x): else: return "%s" % (x.name) + def get_ast_node_name(x): """Return human-readable name of ast.Attribute or ast.Name. Pass through anything else.""" if isinstance(x, ast.Attribute): @@ -58,19 +64,23 @@ def get_ast_node_name(x): else: return x + # Helper for handling binding forms. def sanitize_exprs(exprs): """Convert ast.Tuples in exprs to Python tuples; wrap result in a Python tuple.""" + def process(expr): if isinstance(expr, (ast.Tuple, ast.List)): return expr.elts # .elts is a Python tuple else: return [expr] + if isinstance(exprs, (tuple, list)): return [process(expr) for expr in exprs] else: return process(exprs) + def resolve_method_resolution_order(class_base_nodes, logger): """Compute the method resolution order (MRO) for each of the analyzed classes. @@ -85,17 +95,21 @@ def resolve_method_resolution_order(class_base_nodes, logger): from functools import reduce from operator import add + def C3_find_good_head(heads, tails): # find an element of heads which is not in any of the tails flat_tails = reduce(add, tails, []) # flatten the outer level for hd in heads: if hd not in flat_tails: break else: # no break only if there are cyclic dependencies. - raise LinearizationImpossible("MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails)) + raise LinearizationImpossible( + "MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails) + ) return hd def remove_all(elt, lst): # remove all occurrences of elt from lst, return a copy return [x for x in lst if x != elt] + def remove_all_in(elt, lists): # remove elt from all lists, return a copy return [remove_all(elt, lst) for lst in lists] @@ -117,6 +131,7 @@ def resolve_method_resolution_order(class_base_nodes, logger): mro = {} # result try: memo = {} # caching/memoization + def C3_linearize(node): logger.debug("MRO: C3 linearizing %s" % (node)) seen.add(node) @@ -137,6 +152,7 @@ def resolve_method_resolution_order(class_base_nodes, logger): memo[node] = [node] + C3_merge(lists) logger.debug("MRO: C3 linearized %s, result %s" % (node, memo[node])) return memo[node] + for node in class_base_nodes: logger.debug("MRO: analyzing class %s" % (node)) seen = set() # break cycles (separately for each class we start from) @@ -150,6 +166,7 @@ def resolve_method_resolution_order(class_base_nodes, logger): # analyzed is so badly formed that the MRO algorithm fails) memo = {} # caching/memoization + def lookup_bases_recursive(node): seen.add(node) if node not in memo: @@ -170,10 +187,13 @@ def resolve_method_resolution_order(class_base_nodes, logger): return mro + class UnresolvedSuperCallError(Exception): """For specifically signaling an unresolved super().""" + pass + class Scope: """Adaptor that makes scopes look somewhat like those from the Python 2 compiler module, as far as Pyan's CallGraphVisitor is concerned.""" @@ -181,15 +201,16 @@ class Scope: def __init__(self, table): """table: SymTable instance from symtable.symtable()""" name = table.get_name() - if name == 'top': - name = '' # Pyan defines the top level as anonymous + if name == "top": + name = "" # Pyan defines the top level as anonymous self.name = name self.type = table.get_type() # useful for __repr__() - self.defs = {iden:None for iden in table.get_identifiers()} # name:assigned_value + self.defs = {iden: None for iden in table.get_identifiers()} # name:assigned_value def __repr__(self): return "" % (self.type, self.name) + # A context manager, sort of a friend of CallGraphVisitor (depends on implementation details) class ExecuteInInnerScope: """Execute a code block with the scope stack augmented with an inner scope. diff --git a/pyan/main.py b/pyan/main.py index 8dc54d8..c13ed83 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -9,13 +9,13 @@ for rendering by e.g. GraphViz or yEd. """ -import logging -from glob import glob from argparse import ArgumentParser +from glob import glob +import logging from .analyzer import CallGraphVisitor from .visgraph import VisualGraph -from .writers import TgfWriter, DotWriter, YedWriter, HTMLWriter, SVGWriter +from .writers import DotWriter, HTMLWriter, SVGWriter, TgfWriter, YedWriter def main(cli_args=None): diff --git a/pyan/node.py b/pyan/node.py index 2f2dbf4..49c8c5b 100644 --- a/pyan/node.py +++ b/pyan/node.py @@ -12,11 +12,12 @@ def make_safe_label(label): out = label for word in unsafe_words: out = out.replace(word, "%sX" % word) - return out.replace('.', '__').replace('*', '') + return out.replace(".", "__").replace("*", "") class Flavor(Enum): """Flavor describes the kind of object a node represents.""" + UNSPECIFIED = "---" # as it says on the tin UNKNOWN = "???" # not determined by analysis (wildcard) @@ -100,7 +101,7 @@ class Node: Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: return self.name @@ -108,7 +109,7 @@ class Node: """Return the short name, plus module and line number of definition site, if available. Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: if self.get_level() >= 1 and self.ast_node is not None: return "%s\\n(%s:%d)" % (self.name, self.filename, self.ast_node.lineno) @@ -119,11 +120,17 @@ class Node: """Return the short name, plus namespace, and module and line number of definition site, if available. Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: if self.get_level() >= 1: if self.ast_node is not None: - return "%s\\n\\n(%s:%d,\\n%s in %s)" % (self.name, self.filename, self.ast_node.lineno, repr(self.flavor), self.namespace) + return "%s\\n\\n(%s:%d,\\n%s in %s)" % ( + self.name, + self.filename, + self.ast_node.lineno, + repr(self.flavor), + self.namespace, + ) else: return "%s\\n\\n(%s in %s)" % (self.name, repr(self.flavor), self.namespace) else: @@ -132,12 +139,12 @@ class Node: def get_name(self): """Return the full name of this node.""" - if self.namespace == '': + if self.namespace == "": return self.name elif self.namespace is None: - return '*.' + self.name + return "*." + self.name else: - return self.namespace + '.' + self.name + return self.namespace + "." + self.name def get_level(self): """Return the level of this node (in terms of nested namespaces). @@ -149,7 +156,7 @@ class Node: if self.namespace == "": return 0 else: - return 1 + self.namespace.count('.') + return 1 + self.namespace.count(".") def get_toplevel_namespace(self): """Return the name of the top-level namespace of this node, or "" if none.""" @@ -158,7 +165,7 @@ class Node: if self.namespace is None: # group all unknowns in one namespace, "*" return "*" - idx = self.namespace.find('.') + idx = self.namespace.find(".") if idx > -1: return self.namespace[0:idx] else: @@ -179,4 +186,4 @@ class Node: return make_safe_label(self.namespace) def __repr__(self): - return '' % (repr(self.flavor), self.get_name()) + return "" % (repr(self.flavor), self.get_name()) diff --git a/pyan/sphinx.py b/pyan/sphinx.py index ddc31d4..717c07c 100644 --- a/pyan/sphinx.py +++ b/pyan/sphinx.py @@ -28,10 +28,11 @@ import re from typing import Any from docutils.parsers.rst import directives -from pyan import create_callgraph from sphinx.ext.graphviz import align_spec, figure_wrapper, graphviz from sphinx.util.docutils import SphinxDirective +from pyan import create_callgraph + def direction_spec(argument: Any) -> str: return directives.choice(argument, ("vertical", "horizontal")) diff --git a/pyan/visgraph.py b/pyan/visgraph.py index 9360710..748e67a 100644 --- a/pyan/visgraph.py +++ b/pyan/visgraph.py @@ -2,9 +2,10 @@ # -*- coding: utf-8 -*- """Format-agnostic representation of the output graph.""" -import re -import logging import colorsys +import logging +import re + class Colorizer: """Output graph color manager. @@ -75,10 +76,9 @@ class VisualNode(object): """ A node in the output graph: colors, internal ID, human-readable label, ... """ - def __init__( - self, id, label='', flavor='', - fill_color='', text_color='', group=''): - self.id = id # graphing software friendly label (no special chars) + + def __init__(self, id, label="", flavor="", fill_color="", text_color="", group=""): + self.id = id # graphing software friendly label (no special chars) self.label = label # human-friendly label self.flavor = flavor self.fill_color = fill_color @@ -86,15 +86,11 @@ class VisualNode(object): self.group = group def __repr__(self): - optionals = [ - repr(s) for s in [ - self.label, self.flavor, - self.fill_color, self.text_color, self.group] if s] + optionals = [repr(s) for s in [self.label, self.flavor, self.fill_color, self.text_color, self.group] if s] if optionals: - return ('VisualNode(' + repr(self.id) + - ', ' + ', '.join(optionals) + ')') + return "VisualNode(" + repr(self.id) + ", " + ", ".join(optionals) + ")" else: - return 'VisualNode(' + repr(self.id) + ')' + return "VisualNode(" + repr(self.id) + ")" class VisualEdge(object): @@ -103,6 +99,7 @@ class VisualEdge(object): flavor is meant to be 'uses' or 'defines' """ + def __init__(self, source, target, flavor, color): self.source = source self.target = target @@ -110,15 +107,11 @@ class VisualEdge(object): self.color = color def __repr__(self): - return ( - 'Edge(' + self.source.label + ' ' + self.flavor + ' ' + - self.target.label + ')') + return "Edge(" + self.source.label + " " + self.flavor + " " + self.target.label + ")" class VisualGraph(object): - def __init__( - self, id, label, nodes=None, edges=None, subgraphs=None, - grouped=False): + def __init__(self, id, label, nodes=None, edges=None, subgraphs=None, grouped=False): self.id = id self.label = label self.nodes = nodes or [] @@ -128,13 +121,13 @@ class VisualGraph(object): @classmethod def from_visitor(cls, visitor, options=None, logger=None): - colored = options.get('colored', False) - nested = options.get('nested_groups', False) - grouped_alt = options.get('grouped_alt', False) - grouped = nested or options.get('grouped', False) # nested -> grouped - annotated = options.get('annotated', False) - draw_defines = options.get('draw_defines', False) - draw_uses = options.get('draw_uses', False) + colored = options.get("colored", False) + nested = options.get("nested_groups", False) + grouped_alt = options.get("grouped_alt", False) + grouped = nested or options.get("grouped", False) # nested -> grouped + annotated = options.get("annotated", False) + draw_defines = options.get("draw_defines", False) + draw_uses = options.get("draw_uses", False) # Terminology: # - what Node calls "label" is a computer-friendly unique identifier @@ -146,12 +139,18 @@ class VisualGraph(object): if annotated: if grouped: # group label includes namespace already - def labeler(n): return n.get_annotated_name() + def labeler(n): + return n.get_annotated_name() + else: # the node label is the only place to put the namespace info - def labeler(n): return n.get_long_annotated_name() + def labeler(n): + return n.get_long_annotated_name() + else: - def labeler(n): return n.get_short_name() + + def labeler(n): + return n.get_short_name() logger = logger or logging.getLogger(__name__) @@ -168,34 +167,35 @@ class VisualGraph(object): for node in visited_nodes: filenames.add(node.filename) return filenames - colorizer = Colorizer(num_colors=len(find_filenames()) + 1, - colored=colored, logger=logger) + + colorizer = Colorizer(num_colors=len(find_filenames()) + 1, colored=colored, logger=logger) nodes_dict = dict() - root_graph = cls('G', label='', grouped=grouped) + root_graph = cls("G", label="", grouped=grouped) subgraph = root_graph namespace_stack = [] - prev_namespace = '' # The namespace '' is first in visited_nodes. + prev_namespace = "" # The namespace '' is first in visited_nodes. for node in visited_nodes: - logger.info('Looking at %s' % node.name) + logger.info("Looking at %s" % node.name) # Create the node itself and add it to nodes_dict idx, fill_RGBA, text_RGB = colorizer.make_colors(node) visual_node = VisualNode( - id=node.get_label(), - label=labeler(node), - flavor=repr(node.flavor), - fill_color=fill_RGBA, - text_color=text_RGB, - group=idx) + id=node.get_label(), + label=labeler(node), + flavor=repr(node.flavor), + fill_color=fill_RGBA, + text_color=text_RGB, + group=idx, + ) nodes_dict[node] = visual_node # next namespace? if grouped and node.namespace != prev_namespace: if not prev_namespace: - logger.info('New namespace %s' % (node.namespace)) + logger.info("New namespace %s" % (node.namespace)) else: - logger.info('New namespace %s, old was %s' % (node.namespace, prev_namespace)) + logger.info("New namespace %s, old was %s" % (node.namespace, prev_namespace)) prev_namespace = node.namespace label = node.get_namespace_label() @@ -209,14 +209,11 @@ class VisualGraph(object): m = re.match(namespace_stack[-1].label, node.namespace) # The '.' check catches siblings in cases like # MeshGenerator vs. Mesh. - while (m is None or - m.end() == len(node.namespace) or - node.namespace[m.end()] != '.'): + while m is None or m.end() == len(node.namespace) or node.namespace[m.end()] != ".": namespace_stack.pop() if not len(namespace_stack): break - m = re.match( - namespace_stack[-1].label, node.namespace) + m = re.match(namespace_stack[-1].label, node.namespace) parentgraph = namespace_stack[-1] if len(namespace_stack) else root_graph parentgraph.subgraphs.append(subgraph) @@ -236,17 +233,12 @@ class VisualGraph(object): # place closer together those nodes that are linked by a # defines relationship. # - color = "#838b8b" if draw_defines else '#ffffff00' + color = "#838b8b" if draw_defines else "#ffffff00" for n in visitor.defines_edges: if n.defined: for n2 in visitor.defines_edges[n]: if n2.defined: - root_graph.edges.append( - VisualEdge( - nodes_dict[n], - nodes_dict[n2], - 'defines', - color)) + root_graph.edges.append(VisualEdge(nodes_dict[n], nodes_dict[n2], "defines", color)) if draw_uses: color = "#000000" @@ -254,11 +246,6 @@ class VisualGraph(object): if n.defined: for n2 in visitor.uses_edges[n]: if n2.defined: - root_graph.edges.append( - VisualEdge( - nodes_dict[n], - nodes_dict[n2], - 'uses', - color)) + root_graph.edges.append(VisualEdge(nodes_dict[n], nodes_dict[n2], "uses", color)) return root_graph diff --git a/pyan/writers.py b/pyan/writers.py index a48e8ba..5b3335d 100644 --- a/pyan/writers.py +++ b/pyan/writers.py @@ -3,11 +3,12 @@ """Graph markup writers.""" +import io +import logging import os import subprocess import sys -import logging -import io + from jinja2 import Template @@ -17,7 +18,7 @@ class Writer(object): self.output = output self.logger = logger or logging.getLogger(__name__) self.indent_level = 0 - self.tabstop = tabstop * ' ' + self.tabstop = tabstop * " " def log(self, msg): self.logger.info(msg) @@ -29,15 +30,15 @@ class Writer(object): self.indent_level -= level def write(self, line): - self.outstream.write(self.tabstop * self.indent_level + line + '\n') + self.outstream.write(self.tabstop * self.indent_level + line + "\n") def run(self): - self.log('%s running' % type(self)) + self.log("%s running" % type(self)) try: if isinstance(self.output, io.StringIO): # write to stream self.outstream = self.output else: - self.outstream = open(self.output, 'w') # write to file + self.outstream = open(self.output, "w") # write to file except TypeError: self.outstream = sys.stdout self.start_graph() @@ -88,100 +89,77 @@ class Writer(object): class TgfWriter(Writer): def __init__(self, graph, output=None, logger=None): - Writer.__init__( - self, graph, - output=output, - logger=logger) + Writer.__init__(self, graph, output=output, logger=logger) self.i = 1 self.id_map = {} def write_node(self, node): - self.write('%d %s' % (self.i, node.label)) + self.write("%d %s" % (self.i, node.label)) self.id_map[node] = self.i self.i += 1 def start_edges(self): - self.write('#') + self.write("#") def write_edge(self, edge): - flavor = 'U' if edge.flavor == 'uses' else 'D' - self.write( - '%s %s %s' % - (self.id_map[edge.source], self.id_map[edge.target], flavor)) + flavor = "U" if edge.flavor == "uses" else "D" + self.write("%s %s %s" % (self.id_map[edge.source], self.id_map[edge.target], flavor)) class DotWriter(Writer): - def __init__(self, graph, - options=None, output=None, logger=None, tabstop=4): - Writer.__init__( - self, graph, - output=output, - logger=logger, - tabstop=tabstop) + def __init__(self, graph, options=None, output=None, logger=None, tabstop=4): + Writer.__init__(self, graph, output=output, logger=logger, tabstop=tabstop) options = options or [] if graph.grouped: options += ['clusterrank="local"'] - self.options = ', '.join(options) + self.options = ", ".join(options) self.grouped = graph.grouped def start_graph(self): - self.write('digraph G {') - self.write(' graph [' + self.options + '];') + self.write("digraph G {") + self.write(" graph [" + self.options + "];") self.indent() def start_subgraph(self, graph): - self.log('Start subgraph %s' % graph.label) + self.log("Start subgraph %s" % graph.label) # Name must begin with "cluster" to be recognized as a cluster by GraphViz. - self.write( - "subgraph cluster_%s {\n" % graph.id) + self.write("subgraph cluster_%s {\n" % graph.id) self.indent() # translucent gray (no hue to avoid visual confusion with any # group of colored nodes) - self.write( - 'graph [style="filled,rounded",' - 'fillcolor="#80808018", label="%s"];' - % graph.label) + self.write('graph [style="filled,rounded",' 'fillcolor="#80808018", label="%s"];' % graph.label) def finish_subgraph(self, graph): - self.log('Finish subgraph %s' % graph.label) + self.log("Finish subgraph %s" % graph.label) # terminate previous subgraph self.dedent() - self.write('}') + self.write("}") def write_node(self, node): - self.log('Write node %s' % node.label) + self.log("Write node %s" % node.label) self.write( '%s [label="%s", style="filled", fillcolor="%s",' - ' fontcolor="%s", group="%s"];' - % ( - node.id, node.label, - node.fill_color, node.text_color, node.group)) + ' fontcolor="%s", group="%s"];' % (node.id, node.label, node.fill_color, node.text_color, node.group) + ) def write_edge(self, edge): source = edge.source target = edge.target color = edge.color - if edge.flavor == 'defines': - self.write( - ' %s -> %s [style="dashed",' - ' color="%s"];' - % (source.id, target.id, color)) + if edge.flavor == "defines": + self.write(' %s -> %s [style="dashed",' ' color="%s"];' % (source.id, target.id, color)) else: # edge.flavor == 'uses': - self.write( - ' %s -> %s [style="solid",' - ' color="%s"];' - % (source.id, target.id, color)) + self.write(' %s -> %s [style="solid",' ' color="%s"];' % (source.id, target.id, color)) def finish_graph(self): - self.write('}') # terminate "digraph G {" + self.write("}") # terminate "digraph G {" class SVGWriter(DotWriter): - def run(self): # write dot file - self.log('%s running' % type(self)) + self.log("%s running" % type(self)) self.outstream = io.StringIO() self.start_graph() self.write_subgraph(self.graph) @@ -190,10 +168,7 @@ class SVGWriter(DotWriter): # convert to svg svg = subprocess.run( - f"dot -Tsvg", - shell=True, - stdout=subprocess.PIPE, - input=self.outstream.getvalue().encode() + "dot -Tsvg", shell=True, stdout=subprocess.PIPE, input=self.outstream.getvalue().encode() ).stdout.decode() if self.output: @@ -207,7 +182,6 @@ class SVGWriter(DotWriter): class HTMLWriter(SVGWriter): - def run(self): with io.StringIO() as svg_stream: # run SVGWriter with stream as output @@ -234,11 +208,7 @@ class HTMLWriter(SVGWriter): class YedWriter(Writer): def __init__(self, graph, output=None, logger=None, tabstop=2): - Writer.__init__( - self, graph, - output=output, - logger=logger, - tabstop=tabstop) + Writer.__init__(self, graph, output=output, logger=logger, tabstop=tabstop) self.grouped = graph.grouped self.indent_level = 0 self.edge_id = 0 @@ -246,18 +216,19 @@ class YedWriter(Writer): def start_graph(self): self.write('') self.write( - '') + '' + ) self.indent() self.write('') self.write('') @@ -265,96 +236,87 @@ class YedWriter(Writer): self.indent() def start_subgraph(self, graph): - self.log('Start subgraph %s' % graph.label) + self.log("Start subgraph %s" % graph.label) self.write('' % graph.id) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('') - self.write('%s' - % graph.label) + self.write( + '%s' % graph.label + ) self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.write('' % graph.id) self.indent() def finish_subgraph(self, graph): - self.log('Finish subgraph %s' % graph.label) + self.log("Finish subgraph %s" % graph.label) self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def write_node(self, node): - self.log('Write node %s' % node.label) + self.log("Write node %s" % node.label) width = 20 + 10 * len(node.label) self.write('' % node.id) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('' % ("30", width)) - self.write('' - % node.fill_color) - self.write('') - self.write('%s' - % node.label) + self.write('' % node.fill_color) + self.write('') + self.write("%s" % node.label) self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def write_edge(self, edge): self.edge_id += 1 source = edge.source target = edge.target - self.write( - '' - % (self.edge_id, source.id, target.id)) + self.write('' % (self.edge_id, source.id, target.id)) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() - if edge.flavor == 'defines': - self.write('' - % edge.color) + if edge.flavor == "defines": + self.write('' % edge.color) else: - self.write('' - % edge.color) + self.write('' % edge.color) self.write('') self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def finish_graph(self): self.dedent(2) - self.write(' ') + self.write(" ") self.dedent() - self.write('') + self.write("") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5c2e34e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[tool.isort] +profile = "black" +honor_noqa = true +line_length = 120 +combine_as_imports = true +force_sort_within_sections = true +known_first_party = "pyan" + +[tool.black] +line-length = 120 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | egg-info + | buck-out + | build + | dist + | env +)/ +''' diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3392002 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[flake8] +max-line-length = 120 +show-source = true +ignore = + E203, # space before : (needed for how black formats slicing) + W503, # line break before binary operator + W504, # line break after binary operator + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + E741, # ignore not easy to read variables like i l I etc. + C406, # Unnecessary list literal - rewrite as a dict literal. + C408, # Unnecessary dict call - rewrite as a literal. + C409, # Unnecessary list passed to tuple() - rewrite as a tuple literal. + S001, # found modulo formatter (incorrect picks up mod operations) + F401 # unused imports + W605 # invalid escape sequence (e.g. for LaTeX) +exclude = docs/build/*.py, + node_modules/*.py, + .eggs/*.py, + versioneer.py, + venv/*, + .venv/*, + .git/* + .history/* diff --git a/setup.py b/setup.py index 81f7f93..f6b2aca 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,9 @@ or python3 setup.py --help bdist_wheel # or any command """ -import os import ast +import os + from setuptools import setup ######################################################### @@ -32,12 +33,13 @@ SHORTDESC = "Offline call graph generator for Python 3" # Long description for package homepage on PyPI # DESC = ( - 'Generate approximate call graphs for Python programs.\n' - '\n' - 'Pyan takes one or more Python source files, performs a ' - '(rather superficial) static analysis, and constructs a directed graph of ' - 'the objects in the combined source, and how they define or ' - 'use each other. The graph can be output for rendering by GraphViz or yEd.') + "Generate approximate call graphs for Python programs.\n" + "\n" + "Pyan takes one or more Python source files, performs a " + "(rather superficial) static analysis, and constructs a directed graph of " + "the objects in the combined source, and how they define or " + "use each other. The graph can be output for rendering by GraphViz or yEd." +) ######################################################### # Init @@ -49,7 +51,7 @@ DESC = ( # # https://stackoverflow.com/q/2058802/1959808 # -init_py_path = os.path.join('pyan', '__init__.py') +init_py_path = os.path.join("pyan", "__init__.py") version = None try: with open(init_py_path) as f: @@ -78,48 +80,41 @@ setup( author="Juha Jeronen", author_email="juha.m.jeronen@gmail.com", url="https://github.com/Technologicat/pyan", - description=SHORTDESC, long_description=DESC, - license="GPL 2.0", - # free-form text field; # https://stackoverflow.com/q/34994130/1959808 platforms=["Linux"], - # See # https://pypi.python.org/pypi?%3Aaction=list_classifiers # # for the standard classifiers. # - classifiers=["Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Developers", - ("License :: OSI Approved :: " - "GNU General Public License v2 (GPLv2)"), - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Topic :: Software Development" - ], - + classifiers=[ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + ("License :: OSI Approved :: " "GNU General Public License v2 (GPLv2)"), + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Topic :: Software Development", + ], # See # http://setuptools.readthedocs.io/en/latest/setuptools.html # setup_requires=["wheel"], install_requires=["jinja2"], provides=["pyan"], - # keywords for PyPI (in case you upload your project) # # e.g. the keywords your project uses as topics on GitHub, # minus "python" (if there) # keywords=["call-graph", "static-code-analysis"], - # Declare packages so that python -m setup build will copy .py files # (especially __init__.py). # @@ -127,14 +122,12 @@ setup( # so they must also be declared. # packages=["pyan"], - zip_safe=True, - package_data={'pyan': ["callgraph.html"]}, + package_data={"pyan": ["callgraph.html"]}, include_package_data=True, - entry_points={ - 'console_scripts': [ - 'pyan3 = pyan.main:main', + "console_scripts": [ + "pyan3 = pyan.main:main", ] }, ) diff --git a/tests/old_tests/issue3/testi.py b/tests/old_tests/issue3/testi.py index 5798231..4da7858 100644 --- a/tests/old_tests/issue3/testi.py +++ b/tests/old_tests/issue3/testi.py @@ -1,14 +1,21 @@ # -*- coding: utf-8; -*- # See issue #3 + def f(): return [x for x in range(10)] + def g(): return [(x, y) for x in range(10) for y in range(10)] -def h(): - return [([(name, allargs) for name, _, _, allargs, _ in recs], - {name: inargs for name, inargs, _, _, _ in recs}, - {name: meta for name, _, _, _, meta in recs}) - for recs in (results[key] for key in sorted(results.keys()))] + +# def h(): +# return [ +# ( +# [(name, allargs) for name, _, _, allargs, _ in recs], +# {name: inargs for name, inargs, _, _, _ in recs}, +# {name: meta for name, _, _, _, meta in recs}, +# ) +# for recs in (results[key] for key in sorted(results.keys())) +# ] diff --git a/tests/old_tests/issue5/meas_xrd.py b/tests/old_tests/issue5/meas_xrd.py index 1a4587e..072bcb8 100644 --- a/tests/old_tests/issue5/meas_xrd.py +++ b/tests/old_tests/issue5/meas_xrd.py @@ -3,6 +3,7 @@ import os.path import numpy as np import pandas.io.parsers + class MeasXRD: def __init__(self, path: str): if not os.path.isfile(path): @@ -23,11 +24,5 @@ class MeasXRD: line = file.readline() self.data = pandas.io.parsers.read_csv( - path, - skiprows=row_ind, - dtype={ - "Angle": np.float_, - "Intensity": np.int_ - }, - engine="c" + path, skiprows=row_ind, dtype={"Angle": np.float_, "Intensity": np.int_}, engine="c" ) diff --git a/tests/old_tests/issue5/plot_xrd.py b/tests/old_tests/issue5/plot_xrd.py index f2526e5..fc2d408 100644 --- a/tests/old_tests/issue5/plot_xrd.py +++ b/tests/old_tests/issue5/plot_xrd.py @@ -1,24 +1,13 @@ -import plotly.offline as py import plotly.graph_objs as go +import plotly.offline as py from . import meas_xrd -def plot_xrd(meas: meas_xrd.MeasXRD): - trace = go.Scatter( - x=meas.data["Angle"], - y=meas.data["Intensity"] - ) - layout = go.Layout( - title="XRD data", - xaxis=dict( - title="Angle" - ), - yaxis=dict( - title="Intensity", - type="log" - ) - ) +def plot_xrd(meas: meas_xrd.MeasXRD): + trace = go.Scatter(x=meas.data["Angle"], y=meas.data["Intensity"]) + + layout = go.Layout(title="XRD data", xaxis=dict(title="Angle"), yaxis=dict(title="Intensity", type="log")) data = [trace] fig = go.Figure(data=data, layout=layout) diff --git a/tests/old_tests/issue5/relimport.py b/tests/old_tests/issue5/relimport.py index 1bf9f9d..9abb7fa 100644 --- a/tests/old_tests/issue5/relimport.py +++ b/tests/old_tests/issue5/relimport.py @@ -1,7 +1,6 @@ # -*- coding: utf-8; -*- # See issue #5 -from .mod2 import foo +from . import mod1, mod1 as moo from ..mod3 import bar -from . import mod1 -from . import mod1 as moo +from .mod2 import foo diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 0bc2b2a..8198445 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -1,10 +1,12 @@ -import logging from glob import glob +import logging import os + import pytest from pyan.analyzer import CallGraphVisitor + @pytest.fixture def callgraph(): filenames = glob(os.path.join(os.path.dirname(__file__), "test_code/**/*.py"), recursive=True) @@ -17,6 +19,7 @@ def get_node(nodes, name): assert len(filtered_nodes) == 1, f"Node with name {name} should exist" return filtered_nodes[0] + def get_in_dict(node_dict, name): return node_dict[get_node(node_dict.keys(), name)] diff --git a/tests/test_code/submodule1.py b/tests/test_code/submodule1.py index c7a7d31..d6893a4 100644 --- a/tests/test_code/submodule1.py +++ b/tests/test_code/submodule1.py @@ -1,23 +1,21 @@ -from test_code.subpackage1 import A from test_code import subpackage1 as subpackage +from test_code.subpackage1 import A def test_func1(a): return a + def test_func2(a): return a class B: - def __init__(self, k): self.a = 1 - def to_A(self): return A(self) def get_a_via_A(self): return test_func1(self.to_A().b.a) - diff --git a/tests/test_code/submodule2.py b/tests/test_code/submodule2.py index d5f66ca..76706b5 100644 --- a/tests/test_code/submodule2.py +++ b/tests/test_code/submodule2.py @@ -1,7 +1,9 @@ -from . import submodule1 import test_code.submodule1 as b +from . import submodule1 + A = 32 + def test_2(a): - return submodule1.test_func2(a) + A + b.test_func1(a) \ No newline at end of file + return submodule1.test_func2(a) + A + b.test_func1(a) diff --git a/tests/test_code/subpackage1/__init__.py b/tests/test_code/subpackage1/__init__.py index 9b81aef..d213d49 100644 --- a/tests/test_code/subpackage1/__init__.py +++ b/tests/test_code/subpackage1/__init__.py @@ -1,3 +1,3 @@ from test_code.subpackage1.submodule1 import A -__all__ = ["A"] \ No newline at end of file +__all__ = ["A"] diff --git a/tests/test_code/subpackage1/submodule1.py b/tests/test_code/subpackage1/submodule1.py index 10204f5..7798ee2 100644 --- a/tests/test_code/subpackage1/submodule1.py +++ b/tests/test_code/subpackage1/submodule1.py @@ -1,7 +1,6 @@ - from ..submodule2 import test_2 -class A: +class A: def __init__(self, b): - self.b = test_2(b) \ No newline at end of file + self.b = test_2(b) diff --git a/tests/test_code/subpackage2/submodule1.py b/tests/test_code/subpackage2/submodule1.py new file mode 100644 index 0000000..5d2722d --- /dev/null +++ b/tests/test_code/subpackage2/submodule1.py @@ -0,0 +1,2 @@ +def test_func1(): + pass From b1fb96033a42fc60fac9df3e996a3bc65602622b Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 14:00:40 +0000 Subject: [PATCH 105/117] Add missing end of file blank lines --- .gitignore | 1 - AUTHORS.md | 2 +- LICENSE.md | 20 ++++++++++---------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index ba62d7b..990fdc0 100644 --- a/.gitignore +++ b/.gitignore @@ -162,4 +162,3 @@ htmlcov .idea/ .history/ .vscode/ - diff --git a/AUTHORS.md b/AUTHORS.md index 921d71f..b7af3fb 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -14,6 +14,6 @@ This Python 3 port, analyzer expansion, and additional refactoring by Juha Jeron HTML and SVG export by Jan Beitner. -Support for relative imports by Jan Beitner and Rakan Alanazi. +Support for relative imports by Jan Beitner and Rakan Alanazi. Further contributions by Ioannis Filippidis, Jan Malek, José Eduardo Montenegro Cavalcanti de Oliveira, Mantas Zimnickas, Sam Basak, Brady Deetz, and GitHub user dmfreemon. diff --git a/LICENSE.md b/LICENSE.md index af5153d..00c115f 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -2,7 +2,7 @@ Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc. + Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies @@ -96,17 +96,17 @@ portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: - + **a)** You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. - + **b)** You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. - + **c)** If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement @@ -143,12 +143,12 @@ the scope of this License. under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: - + **a)** Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, - + **b)** Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable @@ -156,7 +156,7 @@ copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, - + **c)** Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the @@ -331,7 +331,7 @@ when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome - to redistribute it under certain conditions; type `show c' + to redistribute it under certain conditions; type `show c' for details. The hypothetical commands \`show w' and \`show c' should show the @@ -346,7 +346,7 @@ if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' - (which makes passes at compilers) written + (which makes passes at compilers) written by James Hacker. signature of Ty Coon, 1 April 1989 @@ -358,4 +358,4 @@ you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the [GNU Lesser General Public License](http://www.gnu.org/licenses/lgpl.html) instead of this -License. \ No newline at end of file +License. From c851d384708973afe08c66dcf7420b38aab76f00 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 14:03:43 +0000 Subject: [PATCH 106/117] Remove notebook --- Untitled.ipynb | 231 ------------------------------------------------- 1 file changed, 231 deletions(-) delete mode 100644 Untitled.ipynb diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index 5941ec3..0000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,231 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['../qvc/pricing/pricing/data/assets.py']\n" - ] - } - ], - "source": [ - "import pyan as p\n", - "from glob import glob\n", - "import importlib\n", - "importlib.reload(p)\n", - "\n", - "filenames = glob(f\"../qvc/pricing/pricing/data/assets.py\", recursive=True)\n", - "print(filenames)\n", - "import logging\n", - "logging.basicConfig(level=logging.ERROR)\n", - "visitor = p.analyzer.CallGraphVisitor(filenames, logging.getLogger())" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def get_related_nodes(visitor, node, namespace=\"pricing\", i=10):\n", - " new_nodes = [node]\n", - " if i < 0:\n", - " return new_nodes\n", - "\n", - " for n in visitor.uses_edges.get(node, []):\n", - " if n in visitor.uses_edges and n not in new_nodes and n.namespace.startswith(namespace):\n", - " new_nodes.extend(get_related_nodes(visitor, n, namespace=namespace, i=i - 1))\n", - "\n", - " for n in visitor.defines_edges.get(node, []):\n", - " if n in visitor.defines_edges and n not in new_nodes and n.namespace.startswith(namespace):\n", - " new_nodes.extend(get_related_nodes(visitor, n, namespace=namespace, i=i - 1))\n", - " return new_nodes\n", - "\n", - "node = [\n", - " n\n", - " for n in visitor.uses_edges.keys()\n", - " if repr(n.flavor) == \"function\" and n.namespace.startswith(\"pricing.data.assets\")\n", - " ][1]\n", - "node\n", - "get_related_nodes(visitor, node)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "node" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{: None,\n", - " : None,\n", - " : None,\n", - " : None,\n", - " : None,\n", - " : None}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "{n: n.namespace for n in visitor.uses_edges[node]}" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m{\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamespace\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefines_edges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m: " - ] - } - ], - "source": [ - "{n: n.namespace for n in visitor.defines_edges[node]}" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'func': <_ast.Attribute object at 0x7fc0e18d7048>, 'args': [<_ast.Name object at 0x7fc0e18d70b8>], 'keywords': [], 'lineno': 285, 'col_offset': 8}\n" - ] - } - ], - "source": [ - "def print_func(f):\n", - " if isinstance(f, list):\n", - " for s in f:\n", - " print_func(s)\n", - " else:\n", - " print(f.__dict__)\n", - "print_func(node.ast_node.body[2].value.func.value)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['../qvc/pricing/pricing/data/assets.py']" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "node.ast_node" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvisitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_node_of_current_namespace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/Documents/Github/pyan/pyan/analyzer.py\u001b[0m in \u001b[0;36mget_node_of_current_namespace\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1105\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mno\u001b[0m \u001b[0massociated\u001b[0m \u001b[0mAST\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1106\u001b[0m \"\"\"\n\u001b[0;32m-> 1107\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_stack\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# name_stack should never be empty (always at least module name)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1109\u001b[0m \u001b[0mnamespace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'.'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_stack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAssertionError\u001b[0m: " - ] - } - ], - "source": [ - "visitor.get_node_of_current_namespace()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 758b59c50e0a2a87c3a62926b0ce01aec24949a8 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 14:07:43 +0000 Subject: [PATCH 107/117] Combine strings where possible --- modvis.py | 2 +- pyan/writers.py | 14 +++++++------- setup.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modvis.py b/modvis.py index e048b7f..2431014 100644 --- a/modvis.py +++ b/modvis.py @@ -235,7 +235,7 @@ class ImportVisitor(ast.NodeVisitor): def main(): usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" - desc = "Analyse one or more Python source files and generate an" "approximate module dependency graph." + desc = "Analyse one or more Python source files and generate an approximate module dependency graph." parser = OptionParser(usage=usage, description=desc) parser.add_option("--dot", action="store_true", default=False, help="output in GraphViz dot format") parser.add_option("--tgf", action="store_true", default=False, help="output in Trivial Graph Format") diff --git a/pyan/writers.py b/pyan/writers.py index 5b3335d..9531cc9 100644 --- a/pyan/writers.py +++ b/pyan/writers.py @@ -128,7 +128,7 @@ class DotWriter(Writer): # translucent gray (no hue to avoid visual confusion with any # group of colored nodes) - self.write('graph [style="filled,rounded",' 'fillcolor="#80808018", label="%s"];' % graph.label) + self.write('graph [style="filled,rounded", fillcolor="#80808018", label="%s"];' % graph.label) def finish_subgraph(self, graph): self.log("Finish subgraph %s" % graph.label) @@ -148,9 +148,9 @@ class DotWriter(Writer): target = edge.target color = edge.color if edge.flavor == "defines": - self.write(' %s -> %s [style="dashed",' ' color="%s"];' % (source.id, target.id, color)) + self.write(' %s -> %s [style="dashed", color="%s"];' % (source.id, target.id, color)) else: # edge.flavor == 'uses': - self.write(' %s -> %s [style="solid",' ' color="%s"];' % (source.id, target.id, color)) + self.write(' %s -> %s [style="solid", color="%s"];' % (source.id, target.id, color)) def finish_graph(self): self.write("}") # terminate "digraph G {" @@ -250,7 +250,7 @@ class YedWriter(Writer): self.indent() self.write('') self.write( - '%s' % graph.label + '%s' % graph.label ) self.write('') self.dedent() @@ -282,7 +282,7 @@ class YedWriter(Writer): self.indent() self.write('' % ("30", width)) self.write('' % node.fill_color) - self.write('') + self.write('') self.write("%s" % node.label) self.write('') self.dedent() @@ -303,9 +303,9 @@ class YedWriter(Writer): self.write("") self.indent() if edge.flavor == "defines": - self.write('' % edge.color) + self.write('' % edge.color) else: - self.write('' % edge.color) + self.write('' % edge.color) self.write('') self.write('') self.dedent() diff --git a/setup.py b/setup.py index f6b2aca..9dfae43 100644 --- a/setup.py +++ b/setup.py @@ -95,7 +95,7 @@ setup( "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", - ("License :: OSI Approved :: " "GNU General Public License v2 (GPLv2)"), + "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", "Operating System :: POSIX :: Linux", "Programming Language :: Python", "Programming Language :: Python :: 3", From b9746aec1626694a5ce5f45436179b6cdae1f950 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 14:10:53 +0000 Subject: [PATCH 108/117] Separate back into 2 import statements --- tests/old_tests/issue5/relimport.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/old_tests/issue5/relimport.py b/tests/old_tests/issue5/relimport.py index 9abb7fa..1145c25 100644 --- a/tests/old_tests/issue5/relimport.py +++ b/tests/old_tests/issue5/relimport.py @@ -1,6 +1,7 @@ # -*- coding: utf-8; -*- # See issue #5 -from . import mod1, mod1 as moo +from . import mod1 # noqa +from . import mod1 as moo # noqa from ..mod3 import bar from .mod2 import foo From 21417f04e69f76793dd1a015d8da9a1afde4b589 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 14:12:56 +0000 Subject: [PATCH 109/117] make valid function h in old tests --- tests/old_tests/issue3/testi.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/old_tests/issue3/testi.py b/tests/old_tests/issue3/testi.py index 4da7858..f4bfd4d 100644 --- a/tests/old_tests/issue3/testi.py +++ b/tests/old_tests/issue3/testi.py @@ -10,12 +10,12 @@ def g(): return [(x, y) for x in range(10) for y in range(10)] -# def h(): -# return [ -# ( -# [(name, allargs) for name, _, _, allargs, _ in recs], -# {name: inargs for name, inargs, _, _, _ in recs}, -# {name: meta for name, _, _, _, meta in recs}, -# ) -# for recs in (results[key] for key in sorted(results.keys())) -# ] +def h(results): + return [ + ( + [(name, allargs) for name, _, _, allargs, _ in recs], + {name: inargs for name, inargs, _, _, _ in recs}, + {name: meta for name, _, _, _, meta in recs}, + ) + for recs in (results[key] for key in sorted(results.keys())) + ] From 617d543f269e8726d90392adda7126a545d01871 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 14:15:00 +0000 Subject: [PATCH 110/117] Resolve modules even if packages do not contain inits - allow direct specification of root --- pyan/__init__.py | 4 +- pyan/analyzer.py | 5 ++- pyan/anutils.py | 38 ++++++++++++------- pyan/main.py | 16 +++++++- pyan/sphinx.py | 1 + tests/test_analyzer.py | 14 +++++++ .../{submodule1.py => submodule_hidden1.py} | 0 7 files changed, 60 insertions(+), 18 deletions(-) rename tests/test_code/subpackage2/{submodule1.py => submodule_hidden1.py} (100%) diff --git a/pyan/__init__.py b/pyan/__init__.py index 2f76619..a62cfae 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -16,6 +16,7 @@ __version__ = "1.1.2" # TODO: fix code duplication with main.py, should have just one implementation. def create_callgraph( filenames: Union[List[str], str] = "**/*.py", + root: str = None, function: Union[str, None] = None, namespace: Union[str, None] = None, format: str = "dot", @@ -36,6 +37,7 @@ def create_callgraph( filenames: glob pattern or list of glob patterns to identify filenames to parse (`**` for multiple directories) example: **/*.py for all python files + root: path to known root directory at which package root sits. Defaults to None, i.e. it will be infered. function: if defined, function name to filter for, e.g. "my_module.my_function" to only include calls that are related to `my_function` namespace: if defined, namespace to filter for, e.g. "my_module", it is highly @@ -71,7 +73,7 @@ def create_callgraph( "annotated": annotated, } - v = CallGraphVisitor(filenames) + v = CallGraphVisitor(filenames, root=root) if function or namespace: if function: function_name = function.split(".")[-1] diff --git a/pyan/analyzer.py b/pyan/analyzer.py index fc83874..1b01687 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -51,7 +51,7 @@ class CallGraphVisitor(ast.NodeVisitor): all files. This way use information between objects in different files can be gathered.""" - def __init__(self, filenames, logger=None): + def __init__(self, filenames, root: str = None, logger=None): self.logger = logger or logging.getLogger(__name__) # full module names for all given files @@ -60,6 +60,7 @@ class CallGraphVisitor(ast.NodeVisitor): mod_name = get_module_name(filename) self.module_to_filename[mod_name] = filename self.filenames = filenames + self.root = root # data gathered from analysis self.defines_edges = {} @@ -103,7 +104,7 @@ class CallGraphVisitor(ast.NodeVisitor): with open(filename, "rt", encoding="utf-8") as f: content = f.read() self.filename = filename - self.module_name = get_module_name(filename) + self.module_name = get_module_name(filename, root=self.root) self.analyze_scopes(content, filename) # add to the currently known scopes self.visit(ast.parse(content, filename)) self.module_name = None diff --git a/pyan/anutils.py b/pyan/anutils.py index 3edd08a..8063e61 100644 --- a/pyan/anutils.py +++ b/pyan/anutils.py @@ -20,27 +20,37 @@ def tail(lst): return [] -def get_module_name(filename): +def get_module_name(filename, root: str = None): """Try to determine the full module name of a source file, by figuring out - if its directory looks like a package (i.e. has an __init__.py file).""" + if its directory looks like a package (i.e. has an __init__.py file or + there is a .py file in it ).""" if os.path.basename(filename) == "__init__.py": - return get_module_name(os.path.dirname(filename)) + # init file means module name is directory name + module_path = os.path.dirname(filename) + else: + # otherwise it is the filename without extension + module_path = filename.replace(".py", "") - init_path = os.path.join(os.path.dirname(filename), "__init__.py") - mod_name = os.path.basename(filename).replace(".py", "") + # find the module root - walk up the tree and check if it contains .py files - if yes. it is the new root + directories = [(module_path, True)] + if root is None: + while directories[0][0] != os.path.dirname(directories[0][0]): + potential_root = os.path.dirname(directories[0][0]) + is_root = any([f == "__init__.py" for f in os.listdir(potential_root)]) + directories.insert(0, (potential_root, is_root)) - if not os.path.exists(init_path): - return mod_name + # keep directories where itself of parent is root + while not directories[0][1]: + directories.pop(0) - # blank path means we're looking at __init__.py, in cwd, so its module name is "__init__" - if not filename: - return "__init__" + else: # root is already known - just walk up until it is matched + while directories[0][0] != root: + potential_root = os.path.dirname(directories[0][0]) + directories.insert(0, (potential_root, True)) - if not os.path.dirname(filename): - return mod_name - - return get_module_name(os.path.dirname(filename)) + "." + mod_name + mod_name = ".".join([os.path.basename(f[0]) for f in directories]) + return mod_name def format_alias(x): diff --git a/pyan/main.py b/pyan/main.py index c13ed83..18821a1 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -12,6 +12,7 @@ from argparse import ArgumentParser from glob import glob import logging +import os from .analyzer import CallGraphVisitor from .visgraph import VisualGraph @@ -149,10 +150,23 @@ def main(cli_args=None): help="annotate with module and source line number", ) + parser.add_argument( + "--root", + default=None, + dest="root", + help="Package root directory. Is inferred by default.", + ) + known_args, unknown_args = parser.parse_known_args(cli_args) filenames = [fn2 for fn in unknown_args for fn2 in glob(fn, recursive=True)] + # determine root + if known_args.root is not None: + root = os.path.abspath(known_args.root) + else: + root = None + if len(unknown_args) == 0: parser.error("Need one or more filenames to process") elif len(filenames) == 0: @@ -189,7 +203,7 @@ def main(cli_args=None): handler = logging.FileHandler(known_args.logname) logger.addHandler(handler) - v = CallGraphVisitor(filenames, logger) + v = CallGraphVisitor(filenames, logger, root=root) if known_args.function or known_args.namespace: diff --git a/pyan/sphinx.py b/pyan/sphinx.py index 717c07c..72c45ff 100644 --- a/pyan/sphinx.py +++ b/pyan/sphinx.py @@ -74,6 +74,7 @@ class CallgraphDirective(SphinxDirective): direction = self.options["direction"] dotcode = create_callgraph( filenames=f"{base_path}/**/*.py", + root=base_path, function=func_name, namespace=base_name, format="dot", diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 8198445..f1e1d57 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -48,3 +48,17 @@ def test_resolve_use_in_function(callgraph): uses = get_in_dict(callgraph.uses_edges, "test_code.submodule2.test_2") get_node(uses, "test_code.submodule1.test_func1") get_node(uses, "test_code.submodule1.test_func2") + + +def test_resolve_package_without___init__(callgraph): + defines = get_in_dict(callgraph.defines_edges, "test_code.subpackage2.submodule_hidden1") + get_node(defines, "test_code.subpackage2.submodule_hidden1.test_func1") + + +def test_resolve_package_with_known_root(): + dirname = os.path.dirname(__file__) + filenames = glob(os.path.join(dirname, "test_code/**/*.py"), recursive=True) + callgraph = CallGraphVisitor(filenames, logger=logging.getLogger(), root=dirname) + dirname_base = os.path.basename(dirname) + defines = get_in_dict(callgraph.defines_edges, f"{dirname_base}.test_code.subpackage2.submodule_hidden1") + get_node(defines, f"{dirname_base}.test_code.subpackage2.submodule_hidden1.test_func1") diff --git a/tests/test_code/subpackage2/submodule1.py b/tests/test_code/subpackage2/submodule_hidden1.py similarity index 100% rename from tests/test_code/subpackage2/submodule1.py rename to tests/test_code/subpackage2/submodule_hidden1.py From 8eb3cd55ddedfb660c6fa44c4b09b9fd5c65d8c8 Mon Sep 17 00:00:00 2001 From: Jan Beitner Date: Thu, 14 Jan 2021 15:02:38 +0000 Subject: [PATCH 111/117] fix typo --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index a62cfae..be23b8d 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -37,7 +37,7 @@ def create_callgraph( filenames: glob pattern or list of glob patterns to identify filenames to parse (`**` for multiple directories) example: **/*.py for all python files - root: path to known root directory at which package root sits. Defaults to None, i.e. it will be infered. + root: path to known root directory at which package root sits. Defaults to None, i.e. it will be inferred. function: if defined, function name to filter for, e.g. "my_module.my_function" to only include calls that are related to `my_function` namespace: if defined, namespace to filter for, e.g. "my_module", it is highly From ce3c5bf24823d261ad6310e626221ee439e76736 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 11 Feb 2021 14:28:14 +0200 Subject: [PATCH 112/117] version bump, since new functionality --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index be23b8d..5cdb2d5 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -10,7 +10,7 @@ from .main import main # noqa: F401, for export only. from .visgraph import VisualGraph from .writers import DotWriter, HTMLWriter, SVGWriter -__version__ = "1.1.2" +__version__ = "1.2.0" # TODO: fix code duplication with main.py, should have just one implementation. From d321df8b182796ddd9328dd0f754aac84dc7e523 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 11 Feb 2021 15:17:57 +0200 Subject: [PATCH 113/117] fix #62 --- pyan/analyzer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 1b01687..75e9cb1 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -774,9 +774,11 @@ class CallGraphVisitor(ast.NodeVisitor): self.last_value = None if node.value is not None: value = sanitize_exprs(node.value) + # issue #62: value may be an empty list, so it doesn't always have any elements + # even after `sanitize_exprs`. self.logger.debug( "AnnAssign %s %s, %s:%s" - % (get_ast_node_name(target[0]), get_ast_node_name(value[0]), self.filename, node.lineno) + % (get_ast_node_name(target[0]), get_ast_node_name(value), self.filename, node.lineno) ) self.analyze_binding(target, value) else: # just a type declaration From 1b56e564971ca620f89807fe5c44b762346dd5de Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 11 Feb 2021 15:22:05 +0200 Subject: [PATCH 114/117] fix stray whitespace in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9dfae43..e741070 100644 --- a/setup.py +++ b/setup.py @@ -95,7 +95,7 @@ setup( "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", - "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", + "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", "Operating System :: POSIX :: Linux", "Programming Language :: Python", "Programming Language :: Python :: 3", From cf13cc7e2981511733a4d9dd4851f4f9299552c4 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 11 Feb 2021 15:30:07 +0200 Subject: [PATCH 115/117] pre-emptive version bump --- pyan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyan/__init__.py b/pyan/__init__.py index 5cdb2d5..f23ab45 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -10,7 +10,7 @@ from .main import main # noqa: F401, for export only. from .visgraph import VisualGraph from .writers import DotWriter, HTMLWriter, SVGWriter -__version__ = "1.2.0" +__version__ = "1.2.1" # TODO: fix code duplication with main.py, should have just one implementation. From bc1df0602e8824311ac16a3c9ada5b14eb2dcc14 Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 11 Feb 2021 15:30:28 +0200 Subject: [PATCH 116/117] fix stray backtick in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 098f204..81c3115 100644 --- a/README.md +++ b/README.md @@ -221,7 +221,7 @@ class MyClass: def dostuff(self) self.f() -```` +``` By tracking the name `self.f`, the analyzer will see that `MyClass.dostuff()` uses `some_func()`. From d1dac4c06a63781fd2ce724d4d5c5166e49095de Mon Sep 17 00:00:00 2001 From: Juha Jeronen Date: Thu, 11 Feb 2021 15:31:08 +0200 Subject: [PATCH 117/117] fix more markdown typoage in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 81c3115..d1f19dc 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ Example to create a callgraph for the function `pyan.create_callgraph` that is zoomable, is defined from left to right and links each node to the API documentation that was created at the toctree path `api`. -```` +``` .. callgraph:: pyan.create_callgraph :toctree: api :zoomable: @@ -212,7 +212,7 @@ From the viewpoint of graphing the defines and uses relations, the interesting p Bindings are tracked, with lexical scoping, to determine which type of object, or which function, each name points to at any given point in the source code being analyzed. This allows tracking things like: ```python -def some_func() +def some_func(): pass class MyClass: