diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..990fdc0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# based on https://github.com/github/gitignore/blob/master/Python.gitignore +*.csv +*.pkl +*.joblib +*.msgpack +.DS_Store +.ipynb_checkpoints +.venv/ +Endpoint_test/ +run_simulator.py +__pycache__/ + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/source/api + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + + +# others +VERSION +coverage.xml +junit.xml +htmlcov + +# editors +.idea/ +.history/ +.vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..42391e5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - repo: https://gitlab.com/pycqa/flake8 + rev: "" + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/mirrors-isort + rev: v5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..b7af3fb --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,19 @@ +Original [pyan.py](https://github.com/ejrh/ejrh/blob/master/utils/pyan.py) for Python 2 by Edmund Horner, 2012. [Original blog post with explanation](http://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/). + +[Coloring and grouping](https://ejrh.wordpress.com/2012/08/18/coloured-call-graphs/) for GraphViz output by Juha Jeronen. + +[Git repository cleanup](https://github.com/davidfraser/pyan/) and maintenance by David Fraser. + +[yEd GraphML output, and framework for easily adding new output formats](https://github.com/davidfraser/pyan/pull/1) by Patrick Massot. + +A bugfix [[2]](https://github.com/davidfraser/pyan/pull/2) and the option `--dot-rankdir` [[3]](https://github.com/davidfraser/pyan/pull/3) contributed by GitHub user ch41rmn. + +A bug in `.tgf` output [[4]](https://github.com/davidfraser/pyan/pull/4) pointed out and fix suggested by Adam Eijdenberg. + +This Python 3 port, analyzer expansion, and additional refactoring by Juha Jeronen. + +HTML and SVG export by Jan Beitner. + +Support for relative imports by Jan Beitner and Rakan Alanazi. + +Further contributions by Ioannis Filippidis, Jan Malek, José Eduardo Montenegro Cavalcanti de Oliveira, Mantas Zimnickas, Sam Basak, Brady Deetz, and GitHub user dmfreemon. diff --git a/LICENSE.md b/LICENSE.md index af5153d..00c115f 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -2,7 +2,7 @@ Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc. + Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies @@ -96,17 +96,17 @@ portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: - + **a)** You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. - + **b)** You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. - + **c)** If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement @@ -143,12 +143,12 @@ the scope of this License. under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: - + **a)** Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, - + **b)** Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable @@ -156,7 +156,7 @@ copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, - + **c)** Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the @@ -331,7 +331,7 @@ when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome - to redistribute it under certain conditions; type `show c' + to redistribute it under certain conditions; type `show c' for details. The hypothetical commands \`show w' and \`show c' should show the @@ -346,7 +346,7 @@ if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' - (which makes passes at compilers) written + (which makes passes at compilers) written by James Hacker. signature of Ty Coon, 1 April 1989 @@ -358,4 +358,4 @@ you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the [GNU Lesser General Public License](http://www.gnu.org/licenses/lgpl.html) instead of this -License. \ No newline at end of file +License. diff --git a/README.md b/README.md index 6cfc9d7..d1f19dc 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,36 @@ -# Pyan3: Offline call graph generator for Python 3 +# Pyan3 -Generate approximate call graphs for Python programs. +Offline call graph generator for Python 3 + +[![Build Status](https://travis-ci.com/edumco/pyan.svg?branch=master)](https://travis-ci.com/edumco/pyan) +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fedumco%2Fpyan.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fedumco%2Fpyan?ref=badge_shield) +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/7cba5ba5d3694a42a1252243e3634b5e)](https://www.codacy.com/manual/edumco/pyan?utm_source=github.com&utm_medium=referral&utm_content=edumco/pyan&utm_campaign=Badge_Grade) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyan3) Pyan takes one or more Python source files, performs a (rather superficial) static analysis, and constructs a directed graph of the objects in the combined source, and how they define or use each other. The graph can be output for rendering by GraphViz or yEd. -*And now it is available for Python 3!* +This project has 2 official repositories: -Note: The previous Python 2-compatible version is tagged as `pre-python3` +- The original stable [davidfraser/pyan](https://github.com/davidfraser/pyan). +- The development repository [Technologicat/pyan](https://github.com/Technologicat/pyan) + +> The PyPI package [pyan3](https://pypi.org/project/pyan3/) is built from development + +## About [![Example output](graph0.png "Example: GraphViz rendering of Pyan output (click for .svg)")](graph0.svg) -**Defines** relations are drawn with *dotted gray arrows*. +**Defines** relations are drawn with _dotted gray arrows_. -**Uses** relations are drawn with *black solid arrows*. Recursion is indicated by an arrow from a node to itself. [Mutual recursion](https://en.wikipedia.org/wiki/Mutual_recursion#Basic_examples) between nodes X and Y is indicated by a pair of arrows, one pointing from X to Y, and the other from Y to X. +**Uses** relations are drawn with _black solid arrows_. Recursion is indicated by an arrow from a node to itself. [Mutual recursion](https://en.wikipedia.org/wiki/Mutual_recursion#Basic_examples) between nodes X and Y is indicated by a pair of arrows, one pointing from X to Y, and the other from Y to X. **Nodes** are always filled, and made translucent to clearly show any arrows passing underneath them. This is especially useful for large graphs with GraphViz's `fdp` filter. If colored output is not enabled, the fill is white. -In **node coloring**, the [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV) color model is used. The **hue** is determined by the *filename* the node comes from. The **lightness** is determined by *depth of namespace nesting*, with darker meaning more deeply nested. Saturation is constant. The spacing between different hues depends on the number of files analyzed; better results are obtained for fewer files. +In **node coloring**, the [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV) color model is used. The **hue** is determined by the _filename_ the node comes from. The **lightness** is determined by _depth of namespace nesting_, with darker meaning more deeply nested. Saturation is constant. The spacing between different hues depends on the number of files analyzed; better results are obtained for fewer files. **Groups** are filled with translucent gray to avoid clashes with any node color. -The nodes can be **annotated** by *filename and source line number* information. +The nodes can be **annotated** by _filename and source line number_ information. ## Note @@ -28,10 +38,13 @@ The static analysis approach Pyan takes is different from running the code and s In Pyan3, the analyzer was ported from `compiler` ([good riddance](https://stackoverflow.com/a/909172)) to a combination of `ast` and `symtable`, and slightly extended. +# Install + + pip install pyan3 # Usage -See `pyan --help`. +See `pyan3 --help`. Example: @@ -41,9 +54,66 @@ Then render using your favorite GraphViz filter, mainly `dot` or `fdp`: `dot -Tsvg myuses.dot >myuses.svg` +Or use directly + +`pyan *.py --uses --no-defines --colored --grouped --annotated --svg >myuses.svg` + +You can also export as an interactive HTML + +`pyan *.py --uses --no-defines --colored --grouped --annotated --html > myuses.html` + +Alternatively, you can call `pyan` from a script + +```shell script +import pyan +from IPython.display import HTML +HTML(pyan.create_callgraph(filenames="**/*.py", format="html")) +``` + +#### Sphinx integration + +You can integrate callgraphs into Sphinx. +Install graphviz (e.g. via `sudo apt-get install graphviz`) and modify `source/conf.py` so that + +``` +# modify extensions +extensions = [ + ... + "sphinx.ext.graphviz" + "pyan.sphinx", +] + +# add graphviz options +graphviz_output_format = "svg" +``` + +Now, there is a callgraph directive which has all the options of the [graphviz directive](https://www.sphinx-doc.org/en/master/usage/extensions/graphviz.html) +and in addition: + +- **:no-groups:** (boolean flag): do not group +- **:no-defines:** (boolean flag): if to not draw edges that show which functions, methods and classes are defined by a class or module +- **:no-uses:** (boolean flag): if to not draw edges that show how a function uses other functions +- **:no-colors:** (boolean flag): if to not color in callgraph (default is coloring) +- **:nested-grops:** (boolean flag): if to group by modules and submodules +- **:annotated:** (boolean flag): annotate callgraph with file names +- **:direction:** (string): "horizontal" or "vertical" callgraph +- **:toctree:** (string): path to toctree (as used with autosummary) to link elements of callgraph to documentation (makes all nodes clickable) +- **:zoomable:** (boolean flag): enables users to zoom and pan callgraph + +Example to create a callgraph for the function `pyan.create_callgraph` that is +zoomable, is defined from left to right and links each node to the API documentation that +was created at the toctree path `api`. + +``` +.. callgraph:: pyan.create_callgraph + :toctree: api + :zoomable: + :direction: horizontal +``` + #### Troubleshooting -If GraphViz says *trouble in init_rank*, try adding `-Gnewrank=true`, as in: +If GraphViz says _trouble in init_rank_, try adding `-Gnewrank=true`, as in: `dot -Gnewrank=true -Tsvg myuses.dot >myuses.svg` @@ -55,86 +125,85 @@ If the graph is visually unreadable due to too much detail, consider visualizing Currently Pyan always operates at the level of individual functions and methods; an option to visualize only relations between namespaces may (or may not) be added in a future version. - # Features -*Items tagged with ☆ are new in Pyan3.* +_Items tagged with ☆ are new in Pyan3._ **Graph creation**: - - Nodes for functions and classes - - Edges for defines - - Edges for uses - - This includes recursive calls ☆ - - Grouping to represent defines, with or without nesting - - Coloring of nodes by filename - - Unlimited number of hues ☆ +- Nodes for functions and classes +- Edges for defines +- Edges for uses + - This includes recursive calls ☆ +- Grouping to represent defines, with or without nesting +- Coloring of nodes by filename + - Unlimited number of hues ☆ **Analysis**: - - Name lookup across the given set of files - - Nested function definitions - - Nested class definitions ☆ - - Nested attribute accesses like `self.a.b` ☆ - - Inherited attributes ☆ - - Pyan3 looks up also in base classes when resolving attributes. In the old Pyan, calls to inherited methods used to be picked up by `contract_nonexistents()` followed by `expand_unknowns()`, but that often generated spurious uses edges (because the wildcard to `*.name` expands to `X.name` *for all* `X` that have an attribute called `name`.). - - Resolution of `super()` based on the static type at the call site ☆ - - MRO is (statically) respected in looking up inherited attributes and `super()` ☆ - - Assignment tracking with lexical scoping - - E.g. if `self.a = MyFancyClass()`, the analyzer knows that any references to `self.a` point to `MyFancyClass` - - All binding forms are supported (assign, augassign, for, comprehensions, generator expressions, with) ☆ - - Name clashes between `for` loop counter variables and functions or classes defined elsewhere no longer confuse Pyan. - - `self` is defined by capturing the name of the first argument of a method definition, like Python does. ☆ - - Simple item-by-item tuple assignments like `x,y,z = a,b,c` ☆ - - Chained assignments `a = b = c` ☆ - - Local scope for lambda, listcomp, setcomp, dictcomp, genexpr ☆ - - Keep in mind that list comprehensions gained a local scope (being treated like a function) only in Python 3. Thus, Pyan3, when applied to legacy Python 2 code, will give subtly wrong results if the code uses list comprehensions. - - Source filename and line number annotation ☆ - - The annotation is appended to the node label. If grouping is off, namespace is included in the annotation. If grouping is on, only source filename and line number information is included, because the group title already shows the namespace. +- Name lookup across the given set of files +- Nested function definitions +- Nested class definitions ☆ +- Nested attribute accesses like `self.a.b` ☆ +- Inherited attributes ☆ + - Pyan3 looks up also in base classes when resolving attributes. In the old Pyan, calls to inherited methods used to be picked up by `contract_nonexistents()` followed by `expand_unknowns()`, but that often generated spurious uses edges (because the wildcard to `*.name` expands to `X.name` _for all_ `X` that have an attribute called `name`.). +- Resolution of `super()` based on the static type at the call site ☆ +- MRO is (statically) respected in looking up inherited attributes and `super()` ☆ +- Assignment tracking with lexical scoping + - E.g. if `self.a = MyFancyClass()`, the analyzer knows that any references to `self.a` point to `MyFancyClass` + - All binding forms are supported (assign, augassign, for, comprehensions, generator expressions, with) ☆ + - Name clashes between `for` loop counter variables and functions or classes defined elsewhere no longer confuse Pyan. +- `self` is defined by capturing the name of the first argument of a method definition, like Python does. ☆ +- Simple item-by-item tuple assignments like `x,y,z = a,b,c` ☆ +- Chained assignments `a = b = c` ☆ +- Local scope for lambda, listcomp, setcomp, dictcomp, genexpr ☆ + - Keep in mind that list comprehensions gained a local scope (being treated like a function) only in Python 3. Thus, Pyan3, when applied to legacy Python 2 code, will give subtly wrong results if the code uses list comprehensions. +- Source filename and line number annotation ☆ + - The annotation is appended to the node label. If grouping is off, namespace is included in the annotation. If grouping is on, only source filename and line number information is included, because the group title already shows the namespace. ## TODO - - Determine confidence of detected edges (probability that the edge is correct). Start with a binary system, with only values 1.0 and 0.0. - - A fully resolved reference to a name, based on lexical scoping, has confidence 1.0. - - A reference to an unknown name has confidence 0.0. - - Attributes: - - A fully resolved reference to a known attribute of a known object has confidence 1.0. - - A reference to an unknown attribute of a known object has confidence 1.0. These are mainly generated by imports, when the imported file is not in the analyzed set. (Does this need a third value, such as 0.5?) - - A reference to an attribute of an unknown object has confidence 0.0. - - A wildcard and its expansions have confidence 0.0. - - Effects of binding analysis? The system should not claim full confidence in a bound value, unless it fully understands both the binding syntax and the value. (Note that this is very restrictive. A function call or a list in the expression for the value will currently spoil the full analysis.) - - Confidence values may need updating in pass 2. - - Make the analyzer understand `del name` (probably seen as `isinstance(node.ctx, ast.Del)` in `visit_Name()`, `visit_Attribute()`) - - Prefix methods by class name in the graph; create a legend for annotations. See the discussion [here](https://github.com/johnyf/pyan/issues/4). - - Improve the wildcard resolution mechanism, see discussion [here](https://github.com/johnyf/pyan/issues/5). - - Could record the namespace of the use site upon creating the wildcard, and check any possible resolutions against that (requiring that the resolved name is in scope at the use site)? - - Add an option to visualize relations only between namespaces, useful for large projects. - - Scan the nodes and edges, basically generate a new graph and visualize that. - - Publish test cases. - - Get rid of `self.last_value`? - - Consider each specific kind of expression or statement being handled; get the relevant info directly (or by a more controlled kind of recursion) instead of `self.visit()`. - - At some point, may need a second visitor class that is just a catch-all that extracts names, which is then applied to only relevant branches of the AST. - - On the other hand, maybe `self.last_value` is the simplest implementation that extracts a value from an expression, and it only needs to be used in a controlled manner (as `analyze_binding()` currently does); i.e. reset before visiting, and reset immediately when done. +- Determine confidence of detected edges (probability that the edge is correct). Start with a binary system, with only values 1.0 and 0.0. + - A fully resolved reference to a name, based on lexical scoping, has confidence 1.0. + - A reference to an unknown name has confidence 0.0. + - Attributes: + - A fully resolved reference to a known attribute of a known object has confidence 1.0. + - A reference to an unknown attribute of a known object has confidence 1.0. These are mainly generated by imports, when the imported file is not in the analyzed set. (Does this need a third value, such as 0.5?) + - A reference to an attribute of an unknown object has confidence 0.0. + - A wildcard and its expansions have confidence 0.0. + - Effects of binding analysis? The system should not claim full confidence in a bound value, unless it fully understands both the binding syntax and the value. (Note that this is very restrictive. A function call or a list in the expression for the value will currently spoil the full analysis.) + - Confidence values may need updating in pass 2. +- Make the analyzer understand `del name` (probably seen as `isinstance(node.ctx, ast.Del)` in `visit_Name()`, `visit_Attribute()`) +- Prefix methods by class name in the graph; create a legend for annotations. See the discussion [here](https://github.com/johnyf/pyan/issues/4). +- Improve the wildcard resolution mechanism, see discussion [here](https://github.com/johnyf/pyan/issues/5). + - Could record the namespace of the use site upon creating the wildcard, and check any possible resolutions against that (requiring that the resolved name is in scope at the use site)? +- Add an option to visualize relations only between namespaces, useful for large projects. + - Scan the nodes and edges, basically generate a new graph and visualize that. +- Publish test cases. +- Get rid of `self.last_value`? + - Consider each specific kind of expression or statement being handled; get the relevant info directly (or by a more controlled kind of recursion) instead of `self.visit()`. + - At some point, may need a second visitor class that is just a catch-all that extracts names, which is then applied to only relevant branches of the AST. + - On the other hand, maybe `self.last_value` is the simplest implementation that extracts a value from an expression, and it only needs to be used in a controlled manner (as `analyze_binding()` currently does); i.e. reset before visiting, and reset immediately when done. The analyzer **does not currently support**: - - Tuples/lists as first-class values (currently ignores any assignment of a tuple/list to a single name). - - Support empty lists, too (for resolving method calls to `.append()` and similar). - - Starred assignment `a,*b,c = d,e,f,g,h` - - Slicing and indexing in assignment (`ast.Subscript`) - - Additional unpacking generalizations ([PEP 448](https://www.python.org/dev/peps/pep-0448/), Python 3.5+). - - Any **uses** on the RHS *at the binding site* in all of the above are already detected by the name and attribute analyzers, but the binding information from assignments of these forms will not be recorded (at least not correctly). - - Enums; need to mark the use of any of their attributes as use of the Enum. Need to detect `Enum` in `bases` during analysis of ClassDef; then tag the class as an enum and handle differently. - - Resolving results of function calls, except for a very limited special case for `super()`. - - Any binding of a name to a result of a function (or method) call - provided that the binding itself is understood by Pyan - will instead show in the output as binding the name to that function (or method). (This may generate some unintuitive uses edges in the graph.) - - Distinguishing between different Lambdas in the same namespace (to report uses of a particular `lambda` that has been stored in `self.something`). - - Type hints ([PEP 484](https://www.python.org/dev/peps/pep-0484/), Python 3.5+). - - Type inference for function arguments - - Either of these two could be used to bind function argument names to the appropriate object types, avoiding the need for wildcard references (especially for attribute accesses on objects passed in as function arguments). - - Type inference could run as pass 3, using additional information from the state of the graph after pass 2 to connect call sites to function definitions. Alternatively, no additional pass; store the AST nodes in the earlier pass. Type inference would allow resolving some wildcards by finding the method of the actual object instance passed in. - - Must understand, at the call site, whether the first positional argument in the function def is handled implicitly or not. This is found by looking at the flavor of the Node representing the call target. - - Async definitions are detected, but passed through to the corresponding non-async analyzers; could be annotated. - - Cython; could strip or comment out Cython-specific code as a preprocess step, then treat as Python (will need to be careful to get line numbers right). +- Tuples/lists as first-class values (currently ignores any assignment of a tuple/list to a single name). + - Support empty lists, too (for resolving method calls to `.append()` and similar). +- Starred assignment `a,*b,c = d,e,f,g,h` +- Slicing and indexing in assignment (`ast.Subscript`) +- Additional unpacking generalizations ([PEP 448](https://www.python.org/dev/peps/pep-0448/), Python 3.5+). + - Any **uses** on the RHS _at the binding site_ in all of the above are already detected by the name and attribute analyzers, but the binding information from assignments of these forms will not be recorded (at least not correctly). +- Enums; need to mark the use of any of their attributes as use of the Enum. Need to detect `Enum` in `bases` during analysis of ClassDef; then tag the class as an enum and handle differently. +- Resolving results of function calls, except for a very limited special case for `super()`. + - Any binding of a name to a result of a function (or method) call - provided that the binding itself is understood by Pyan - will instead show in the output as binding the name to that function (or method). (This may generate some unintuitive uses edges in the graph.) +- Distinguishing between different Lambdas in the same namespace (to report uses of a particular `lambda` that has been stored in `self.something`). +- Type hints ([PEP 484](https://www.python.org/dev/peps/pep-0484/), Python 3.5+). +- Type inference for function arguments + - Either of these two could be used to bind function argument names to the appropriate object types, avoiding the need for wildcard references (especially for attribute accesses on objects passed in as function arguments). + - Type inference could run as pass 3, using additional information from the state of the graph after pass 2 to connect call sites to function definitions. Alternatively, no additional pass; store the AST nodes in the earlier pass. Type inference would allow resolving some wildcards by finding the method of the actual object instance passed in. + - Must understand, at the call site, whether the first positional argument in the function def is handled implicitly or not. This is found by looking at the flavor of the Node representing the call target. +- Async definitions are detected, but passed through to the corresponding non-async analyzers; could be annotated. +- Cython; could strip or comment out Cython-specific code as a preprocess step, then treat as Python (will need to be careful to get line numbers right). # How it works @@ -143,7 +212,7 @@ From the viewpoint of graphing the defines and uses relations, the interesting p Bindings are tracked, with lexical scoping, to determine which type of object, or which function, each name points to at any given point in the source code being analyzed. This allows tracking things like: ```python -def some_func() +def some_func(): pass class MyClass: @@ -164,21 +233,8 @@ When a binding statement is encountered, the current namespace determines in whi # Authors -Original [pyan.py](https://github.com/ejrh/ejrh/blob/master/utils/pyan.py) by Edmund Horner. [Original post with explanation](http://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/). - -[Coloring and grouping](https://ejrh.wordpress.com/2012/08/18/coloured-call-graphs/) for GraphViz output by Juha Jeronen. - -[Git repository cleanup](https://github.com/davidfraser/pyan/) and maintenance by David Fraser. - -[yEd GraphML output, and framework for easily adding new output formats](https://github.com/davidfraser/pyan/pull/1) by Patrick Massot. - -A bugfix [[2]](https://github.com/davidfraser/pyan/pull/2) and the option `--dot-rankdir` [[3]](https://github.com/davidfraser/pyan/pull/3) contributed by GitHub user ch41rmn. - -A bug in `.tgf` output [[4]](https://github.com/davidfraser/pyan/pull/4) pointed out and fix suggested by Adam Eijdenberg. - -This Python 3 port, analyzer expansion, and additional refactoring by Juha Jeronen. +See [AUTHORS.md](AUTHORS.md). # License [GPL v2](LICENSE.md), as per [comments here](https://ejrh.wordpress.com/2012/08/18/coloured-call-graphs/). - diff --git a/makedist.sh b/makedist.sh new file mode 100755 index 0000000..338298d --- /dev/null +++ b/makedist.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python3 setup.py sdist bdist_wheel diff --git a/modvis.py b/modvis.py new file mode 100644 index 0000000..2431014 --- /dev/null +++ b/modvis.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; -*- +"""A simple import analyzer. Visualize dependencies between modules.""" + +import ast +from glob import glob +import logging +from optparse import OptionParser # TODO: migrate to argparse +import os + +import pyan.node +import pyan.visgraph +import pyan.writers + +# from pyan.anutils import get_module_name + + +def filename_to_module_name(fullpath): # we need to see __init__, hence we don't use anutils.get_module_name. + """'some/path/module.py' -> 'some.path.module'""" + if not fullpath.endswith(".py"): + raise ValueError("Expected a .py filename, got '{}'".format(fullpath)) + rel = ".{}".format(os.path.sep) # ./ + if fullpath.startswith(rel): + fullpath = fullpath[len(rel) :] + fullpath = fullpath[:-3] # remove .py + return fullpath.replace(os.path.sep, ".") + + +def split_module_name(m): + """'fully.qualified.name' -> ('fully.qualified', 'name')""" + k = m.rfind(".") + if k == -1: + return ("", m) + return (m[:k], m[(k + 1) :]) + + +# blacklist = (".git", "build", "dist", "test") +# def find_py_files(basedir): +# py_files = [] +# for root, dirs, files in os.walk(basedir): +# for x in blacklist: # don't visit blacklisted dirs +# if x in dirs: +# dirs.remove(x) +# for filename in files: +# if filename.endswith(".py"): +# fullpath = os.path.join(root, filename) +# py_files.append(fullpath) +# return py_files + + +def resolve(current_module, target_module, level): + """Return fully qualified name of the target_module in an import. + + If level == 0, the import is absolute, hence target_module is already the + fully qualified name (and will be returned as-is). + + Relative imports (level > 0) are resolved using current_module as the + starting point. Usually this is good enough (especially if you analyze your + project by invoking modvis in its top-level directory). + + For the exact implications, see the section "Import sibling packages" in: + https://alex.dzyoba.com/blog/python-import/ + and this SO discussion: + https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time + """ + if level < 0: + raise ValueError("Relative import level must be >= 0, got {}".format(level)) + if level == 0: # absolute import + return target_module + # level > 0 (let's have some simplistic support for relative imports) + if level > current_module.count(".") + 1: # foo.bar.baz -> max level 3, pointing to top level + raise ValueError("Relative import level {} too large for module name {}".format(level, current_module)) + base = current_module + for _ in range(level): + k = base.rfind(".") + if k == -1: + base = "" + break + base = base[:k] + return ".".join((base, target_module)) + + +class ImportVisitor(ast.NodeVisitor): + def __init__(self, filenames, logger): + self.modules = {} # modname: {dep0, dep1, ...} + self.fullpaths = {} # modname: fullpath + self.logger = logger + self.analyze(filenames) + + def analyze(self, filenames): + for fullpath in filenames: + with open(fullpath, "rt", encoding="utf-8") as f: + content = f.read() + m = filename_to_module_name(fullpath) + self.current_module = m + self.fullpaths[m] = fullpath + self.visit(ast.parse(content, fullpath)) + + def add_dependency(self, target_module): # source module is always self.current_module + m = self.current_module + if m not in self.modules: + self.modules[m] = set() + self.modules[m].add(target_module) + # Just in case the target (or one or more of its parents) is a package + # (we don't know that), add a dependency on the relevant __init__ module. + # + # If there's no matching __init__ (either no __init__.py provided, or + # the target is just a module), this is harmless - we just generate a + # spurious dependency on a module that doesn't even exist. + # + # Since nonexistent modules are not in the analyzed set (i.e. do not + # appear as keys of self.modules), prepare_graph will ignore them. + # + # TODO: This would be a problem for a simple plain-text output that doesn't use the graph. + modpath = target_module.split(".") + for k in range(1, len(modpath) + 1): + base = ".".join(modpath[:k]) + possible_init = base + ".__init__" + if possible_init != m: # will happen when current_module is somepackage.__init__ itself + self.modules[m].add(possible_init) + self.logger.debug(" added possible implicit use of '{}'".format(possible_init)) + + def visit_Import(self, node): + self.logger.debug( + "{}:{}: Import {}".format(self.current_module, node.lineno, [alias.name for alias in node.names]) + ) + for alias in node.names: + self.add_dependency(alias.name) # alias.asname not relevant for our purposes + + def visit_ImportFrom(self, node): + # from foo import some_symbol + if node.module: + self.logger.debug( + "{}:{}: ImportFrom '{}', relative import level {}".format( + self.current_module, node.lineno, node.module, node.level + ) + ) + absname = resolve(self.current_module, node.module, node.level) + if node.level > 0: + self.logger.debug(" resolved relative import to '{}'".format(absname)) + self.add_dependency(absname) + + # from . import foo --> module = None; now the **names** refer to modules + else: + for alias in node.names: + self.logger.debug( + "{}:{}: ImportFrom '{}', target module '{}', relative import level {}".format( + self.current_module, node.lineno, "." * node.level, alias.name, node.level + ) + ) + absname = resolve(self.current_module, alias.name, node.level) + if node.level > 0: + self.logger.debug(" resolved relative import to '{}'".format(absname)) + self.add_dependency(absname) + + # -------------------------------------------------------------------------------- + + def detect_cycles(self): + """Postprocessing. Detect import cycles. + + Return format is `[(prefix, cycle), ...]` where `prefix` is the + non-cyclic prefix of the import chain, and `cycle` contains only + the cyclic part (where the first and last elements are the same). + """ + cycles = [] + + def walk(m, seen=None, trace=None): + trace = (trace or []) + [m] + seen = seen or set() + if m in seen: + cycles.append(trace) + return + seen = seen | {m} + deps = self.modules[m] + for d in sorted(deps): + if d in self.modules: + walk(d, seen, trace) + + for root in sorted(self.modules): + walk(root) + + # For each detected cycle, report the non-cyclic prefix and the cycle separately + out = [] + for cycle in cycles: + offender = cycle[-1] + k = cycle.index(offender) + out.append((cycle[:k], cycle[k:])) + return out + + def prepare_graph(self): # same format as in pyan.analyzer + """Postprocessing. Prepare data for pyan.visgraph for graph file generation.""" + self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) + self.uses_edges = {} + # we have no defines_edges, which doesn't matter as long as we don't enable that option in visgraph. + + # TODO: Right now we care only about modules whose files we read. + # TODO: If we want to include in the graph also targets that are not in the analyzed set, + # TODO: then we could create nodes also for the modules listed in the *values* of self.modules. + for m in self.modules: + ns, mod = split_module_name(m) + package = os.path.dirname(self.fullpaths[m]) + # print("{}: ns={}, mod={}, fn={}".format(m, ns, mod, fn)) + # HACK: The `filename` attribute of the node determines the visual color. + # HACK: We are visualizing at module level, so color by package. + # TODO: If we are analyzing files from several projects in the same run, + # TODO: it could be useful to decide the hue by the top-level directory name + # TODO: (after the './' if any), and lightness by the depth in each tree. + # TODO: This would be most similar to how Pyan does it for functions/classes. + n = pyan.node.Node(namespace=ns, name=mod, ast_node=None, filename=package, flavor=pyan.node.Flavor.MODULE) + n.defined = True + # Pyan's analyzer.py allows several nodes to share the same short name, + # which is used as the key to self.nodes; but we use the fully qualified + # name as the key. Nevertheless, visgraph expects a format where the + # values in the visitor's `nodes` attribute are lists. + self.nodes[m] = [n] + + def add_uses_edge(from_node, to_node): + if from_node not in self.uses_edges: + self.uses_edges[from_node] = set() + self.uses_edges[from_node].add(to_node) + + for m, deps in self.modules.items(): + for d in deps: + n_from = self.nodes.get(m) + n_to = self.nodes.get(d) + if n_from and n_to: + add_uses_edge(n_from[0], n_to[0]) + + # sanity check output + for m, deps in self.uses_edges.items(): + assert m.get_name() in self.nodes + for d in deps: + assert d.get_name() in self.nodes + + +def main(): + usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" + desc = "Analyse one or more Python source files and generate an approximate module dependency graph." + parser = OptionParser(usage=usage, description=desc) + parser.add_option("--dot", action="store_true", default=False, help="output in GraphViz dot format") + parser.add_option("--tgf", action="store_true", default=False, help="output in Trivial Graph Format") + parser.add_option("--yed", action="store_true", default=False, help="output in yEd GraphML Format") + parser.add_option("-f", "--file", dest="filename", help="write graph to FILE", metavar="FILE", default=None) + parser.add_option("-l", "--log", dest="logname", help="write log to LOG", metavar="LOG") + parser.add_option("-v", "--verbose", action="store_true", default=False, dest="verbose", help="verbose output") + parser.add_option( + "-V", + "--very-verbose", + action="store_true", + default=False, + dest="very_verbose", + help="even more verbose output (mainly for debug)", + ) + parser.add_option( + "-c", + "--colored", + action="store_true", + default=False, + dest="colored", + help="color nodes according to namespace [dot only]", + ) + parser.add_option( + "-g", + "--grouped", + action="store_true", + default=False, + dest="grouped", + help="group nodes (create subgraphs) according to namespace [dot only]", + ) + parser.add_option( + "-e", + "--nested-groups", + action="store_true", + default=False, + dest="nested_groups", + help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]", + ) + parser.add_option( + "-C", + "--cycles", + action="store_true", + default=False, + dest="cycles", + help="detect import cycles and print report to stdout", + ) + parser.add_option( + "--dot-rankdir", + default="TB", + dest="rankdir", + help=( + "specifies the dot graph 'rankdir' property for " + "controlling the direction of the graph. " + "Allowed values: ['TB', 'LR', 'BT', 'RL']. " + "[dot only]" + ), + ) + parser.add_option( + "-a", "--annotated", action="store_true", default=False, dest="annotated", help="annotate with module location" + ) + + options, args = parser.parse_args() + filenames = [fn2 for fn in args for fn2 in glob(fn, recursive=True)] + if len(args) == 0: + parser.error("Need one or more filenames to process") + + if options.nested_groups: + options.grouped = True + + graph_options = { + "draw_defines": False, # we have no defines edges + "draw_uses": True, + "colored": options.colored, + "grouped_alt": False, + "grouped": options.grouped, + "nested_groups": options.nested_groups, + "annotated": options.annotated, + } + + # TODO: use an int argument for verbosity + logger = logging.getLogger(__name__) + if options.very_verbose: + logger.setLevel(logging.DEBUG) + elif options.verbose: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.WARN) + logger.addHandler(logging.StreamHandler()) + if options.logname: + handler = logging.FileHandler(options.logname) + logger.addHandler(handler) + + # run the analysis + v = ImportVisitor(filenames, logger) + + # Postprocessing: detect import cycles + # + # NOTE: Because this is a static analysis, it doesn't care about the order + # the code runs in any particular invocation of the software. Every + # analyzed module is considered as a possible entry point to the program, + # and all cycles (considering *all* possible branches *at any step* of + # *each* import chain) will be mapped recursively. + # + # Obviously, this easily leads to a combinatoric explosion. In a mid-size + # project (~20k SLOC), the analysis may find thousands of unique import + # cycles, most of which are harmless. + # + # Many cycles appear due to package A importing something from package B + # (possibly from one of its submodules) and vice versa, when both packages + # have an __init__ module. If they don't actually try to import any names + # that only become defined after the init has finished running, it's + # usually fine. + # + # (Init modules often import names from their submodules to the package's + # top-level namespace; those names can be reliably accessed only after the + # init module has finished running. But importing names directly from the + # submodule where they are defined is fine also during the init.) + # + # But if your program is crashing due to a cyclic import, you already know + # in any case *which* import cycle is causing it, just by looking at the + # stack trace. So this analysis is just extra information that says what + # other cycles exist, if any. + if options.cycles: + cycles = v.detect_cycles() + if not cycles: + print("No import cycles detected.") + else: + unique_cycles = set() + for prefix, cycle in cycles: + unique_cycles.add(tuple(cycle)) + print("Detected the following import cycles (n_results={}).".format(len(unique_cycles))) + + def stats(): + lengths = [len(x) - 1 for x in unique_cycles] # number of modules in the cycle + + def mean(lst): + return sum(lst) / len(lst) + + def median(lst): + tmp = list(sorted(lst)) + n = len(lst) + if n % 2 == 1: + return tmp[n // 2] # e.g. tmp[5] if n = 11 + else: + return (tmp[n // 2 - 1] + tmp[n // 2]) / 2 # e.g. avg of tmp[4] and tmp[5] if n = 10 + + return min(lengths), mean(lengths), median(lengths), max(lengths) + + print( + "Number of modules in a cycle: min = {}, average = {:0.2g}, median = {:0.2g}, max = {}".format(*stats()) + ) + for c in sorted(unique_cycles): + print(" {}".format(c)) + + # # we could generate a plaintext report like this (with caveats; see TODO above) + # ms = v.modules + # for m in sorted(ms): + # print(m) + # for d in sorted(ms[m]): + # print(" {}".format(d)) + + # Postprocessing: format graph report + make_graph = options.dot or options.tgf or options.yed + if make_graph: + v.prepare_graph() + # print(v.nodes, v.uses_edges) + graph = pyan.visgraph.VisualGraph.from_visitor(v, options=graph_options, logger=logger) + + if options.dot: + writer = pyan.writers.DotWriter( + graph, options=["rankdir=" + options.rankdir], output=options.filename, logger=logger + ) + if options.tgf: + writer = pyan.writers.TgfWriter(graph, output=options.filename, logger=logger) + if options.yed: + writer = pyan.writers.YedWriter(graph, output=options.filename, logger=logger) + if make_graph: + writer.run() + + +if __name__ == "__main__": + main() diff --git a/pyan.py b/pyan.py deleted file mode 100755 index fe3aa49..0000000 --- a/pyan.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import re -import sys - -from pyan import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/pyan/__init__.py b/pyan/__init__.py index d99f4f6..f23ab45 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -1,6 +1,102 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from .main import main +from glob import glob +import io +from typing import List, Union -__version__ = "1.0.2" +from .analyzer import CallGraphVisitor +from .main import main # noqa: F401, for export only. +from .visgraph import VisualGraph +from .writers import DotWriter, HTMLWriter, SVGWriter + +__version__ = "1.2.1" + + +# TODO: fix code duplication with main.py, should have just one implementation. +def create_callgraph( + filenames: Union[List[str], str] = "**/*.py", + root: str = None, + function: Union[str, None] = None, + namespace: Union[str, None] = None, + format: str = "dot", + rankdir: str = "LR", + nested_groups: bool = True, + draw_defines: bool = True, + draw_uses: bool = True, + colored: bool = True, + grouped_alt: bool = False, + annotated: bool = False, + grouped: bool = True, + max_iter: int = 1000, +) -> str: + """ + create callgraph based on static code analysis + + Args: + filenames: glob pattern or list of glob patterns + to identify filenames to parse (`**` for multiple directories) + example: **/*.py for all python files + root: path to known root directory at which package root sits. Defaults to None, i.e. it will be inferred. + function: if defined, function name to filter for, e.g. "my_module.my_function" + to only include calls that are related to `my_function` + namespace: if defined, namespace to filter for, e.g. "my_module", it is highly + recommended to define this filter + format: format to write callgraph to, of of "dot", "svg", "html". you need to have graphviz + installed for svg or html output + rankdir: direction of graph, e.g. "LR" for horizontal or "TB" for vertical + nested_groups: if to group by modules and submodules + draw_defines: if to draw defines edges (functions that are defines) + draw_uses: if to draw uses edges (functions that are used) + colored: if to color graph + grouped_alt: if to use alternative grouping + annotated: if to annotate graph with filenames + grouped: if to group by modules + max_iter: maximum number of iterations for filtering. Defaults to 1000. + + Returns: + str: callgraph + """ + if isinstance(filenames, str): + filenames = [filenames] + filenames = [fn2 for fn in filenames for fn2 in glob(fn, recursive=True)] + + if nested_groups: + grouped = True + graph_options = { + "draw_defines": draw_defines, + "draw_uses": draw_uses, + "colored": colored, + "grouped_alt": grouped_alt, + "grouped": grouped, + "nested_groups": nested_groups, + "annotated": annotated, + } + + v = CallGraphVisitor(filenames, root=root) + if function or namespace: + if function: + function_name = function.split(".")[-1] + function_namespace = ".".join(function.split(".")[:-1]) + node = v.get_node(function_namespace, function_name) + else: + node = None + v.filter(node=node, namespace=namespace, max_iter=max_iter) + graph = VisualGraph.from_visitor(v, options=graph_options) + + stream = io.StringIO() + if format == "dot": + writer = DotWriter(graph, options=["rankdir=" + rankdir], output=stream) + writer.run() + + elif format == "html": + writer = HTMLWriter(graph, options=["rankdir=" + rankdir], output=stream) + writer.run() + + elif format == "svg": + writer = SVGWriter(graph, options=["rankdir=" + rankdir], output=stream) + writer.run() + else: + raise ValueError(f"format {format} is unknown") + + return stream.getvalue() diff --git a/pyan/__main__.py b/pyan/__main__.py new file mode 100644 index 0000000..11946f1 --- /dev/null +++ b/pyan/__main__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + +import pyan + +if __name__ == "__main__": + pyan.main() diff --git a/pyan/analyzer.py b/pyan/analyzer.py index 0b8baad..75e9cb1 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -2,15 +2,23 @@ # -*- coding: utf-8 -*- """The AST visitor.""" -import logging import ast +import logging import symtable +from typing import Union -from .node import Node, Flavor -from .anutils import tail, get_module_name, format_alias, \ - get_ast_node_name, sanitize_exprs, \ - resolve_method_resolution_order, \ - Scope, ExecuteInInnerScope, UnresolvedSuperCallError +from .anutils import ( + ExecuteInInnerScope, + Scope, + UnresolvedSuperCallError, + format_alias, + get_ast_node_name, + get_module_name, + resolve_method_resolution_order, + sanitize_exprs, + tail, +) +from .node import Flavor, Node # TODO: add Cython support (strip type annotations in a preprocess step, then treat as Python) # TODO: built-in functions (range(), enumerate(), zip(), iter(), ...): @@ -32,6 +40,8 @@ from .anutils import tail, get_module_name, format_alias, \ # https://docs.python.org/2/library/compiler.html#module-compiler.ast # https://docs.python.org/3/library/ast.html#abstract-grammar # + + class CallGraphVisitor(ast.NodeVisitor): """A visitor that can be walked over a Python AST, and will derive information about the objects in the AST and how they use each other. @@ -41,37 +51,35 @@ class CallGraphVisitor(ast.NodeVisitor): all files. This way use information between objects in different files can be gathered.""" - def __init__(self, filenames, logger=None): + def __init__(self, filenames, root: str = None, logger=None): self.logger = logger or logging.getLogger(__name__) # full module names for all given files - self.module_names = {} self.module_to_filename = {} # inverse mapping for recording which file each AST node came from for filename in filenames: mod_name = get_module_name(filename) - short_name = mod_name.rsplit('.', 1)[-1] - self.module_names[short_name] = mod_name self.module_to_filename[mod_name] = filename self.filenames = filenames + self.root = root # data gathered from analysis self.defines_edges = {} self.uses_edges = {} - self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) + self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) self.scopes = {} # fully qualified name of namespace: Scope object self.class_base_ast_nodes = {} # pass 1: class Node: list of AST nodes - self.class_base_nodes = {} # pass 2: class Node: list of Node objects (local bases, no recursion) - self.mro = {} # pass 2: class Node: list of Node objects in Python's MRO order + self.class_base_nodes = {} # pass 2: class Node: list of Node objects (local bases, no recursion) + self.mro = {} # pass 2: class Node: list of Node objects in Python's MRO order # current context for analysis self.module_name = None self.filename = None - self.name_stack = [] # for building namespace name, node naming + self.name_stack = [] # for building namespace name, node naming self.scope_stack = [] # the Scope objects currently in scope self.class_stack = [] # Nodes for class definitions currently in scope self.context_stack = [] # for detecting which FunctionDefs are methods - self.last_value = None + self.last_value = None # Analyze. self.process() @@ -80,7 +88,7 @@ class CallGraphVisitor(ast.NodeVisitor): """Analyze the set of files, twice so that any forward-references are picked up.""" for pas in range(2): for filename in self.filenames: - self.logger.info("========== pass %d, file '%s' ==========" % (pas+1, filename)) + self.logger.info("========== pass %d, file '%s' ==========" % (pas + 1, filename)) self.process_one(filename) if pas == 0: self.resolve_base_classes() # must be done only after all files seen @@ -89,11 +97,14 @@ class CallGraphVisitor(ast.NodeVisitor): def process_one(self, filename): """Analyze the specified Python source file.""" if filename not in self.filenames: - raise ValueError("Filename '%s' has not been preprocessed (was not given to __init__, which got %s)" % (filename, self.filenames)) + raise ValueError( + "Filename '%s' has not been preprocessed (was not given to __init__, which got %s)" + % (filename, self.filenames) + ) with open(filename, "rt", encoding="utf-8") as f: content = f.read() self.filename = filename - self.module_name = get_module_name(filename) + self.module_name = get_module_name(filename, root=self.root) self.analyze_scopes(content, filename) # add to the currently known scopes self.visit(ast.parse(content, filename)) self.module_name = None @@ -116,7 +127,7 @@ class CallGraphVisitor(ast.NodeVisitor): if isinstance(ast_node, ast.Name): baseclass_node = self.get_value(ast_node.id) elif isinstance(ast_node, ast.Attribute): - _,baseclass_node = self.get_attribute(ast_node) # don't care about obj, just grab attr + _, baseclass_node = self.get_attribute(ast_node) # don't care about obj, just grab attr else: # give up baseclass_node = None @@ -149,6 +160,7 @@ class CallGraphVisitor(ast.NodeVisitor): # then remove any references pointing outside the analyzed file set. self.expand_unknowns() + self.resolve_imports() self.contract_nonexistents() self.cull_inherited() self.collapse_inner() @@ -161,11 +173,167 @@ class CallGraphVisitor(ast.NodeVisitor): # Python docs: # https://docs.python.org/3/library/ast.html#abstract-grammar + def resolve_imports(self): + """ + resolve relative imports and remap nodes + """ + # first find all imports and map to themselves. we will then remap those that are currently pointing + # to duplicates or into the void + imports_to_resolve = {n for items in self.nodes.values() for n in items if n.flavor == Flavor.IMPORTEDITEM} + # map real definitions + import_mapping = {} + while len(imports_to_resolve) > 0: + from_node = imports_to_resolve.pop() + if from_node in import_mapping: + continue + to_uses = self.uses_edges.get(from_node, set([from_node])) + assert len(to_uses) == 1 + to_node = to_uses.pop() # resolve alias + # resolve namespace and get module + if to_node.namespace == "": + module_node = to_node + else: + assert from_node.name == to_node.name + module_node = self.get_node("", to_node.namespace) + module_uses = self.uses_edges.get(module_node) + if module_uses is not None: + # check if in module item exists and if yes, map to it + for candidate_to_node in module_uses: + if candidate_to_node.name == from_node.name: + to_node = candidate_to_node + import_mapping[from_node] = to_node + if to_node.flavor == Flavor.IMPORTEDITEM and from_node is not to_node: # avoid self-recursion + imports_to_resolve.add(to_node) + break + + # set previously undefined nodes to defined + # go through undefined attributes + attribute_import_mapping = {} + for nodes in self.nodes.values(): + for node in nodes: + if not node.defined and node.flavor == Flavor.ATTRIBUTE: + # try to resolve namespace and find imported item mapping + for from_node, to_node in import_mapping.items(): + if ( + f"{from_node.namespace}.{from_node.name}" == node.namespace + and from_node.flavor == Flavor.IMPORTEDITEM + ): + # use define edges as potential candidates + for candidate_to_node in self.defines_edges[to_node]: # + if candidate_to_node.name == node.name: + attribute_import_mapping[node] = candidate_to_node + break + import_mapping.update(attribute_import_mapping) + + # remap nodes based on import mapping + self.nodes = {name: [import_mapping.get(n, n) for n in items] for name, items in self.nodes.items()} + self.uses_edges = { + import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes} + for from_node, to_nodes in self.uses_edges.items() + if len(to_nodes) > 0 + } + self.defines_edges = { + import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes} + for from_node, to_nodes in self.defines_edges.items() + if len(to_nodes) > 0 + } + + def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000): + """ + filter callgraph nodes that related to `node` or are in `namespace` + + Args: + node: pyan node for which related nodes should be found, if none, filter only for namespace + namespace: namespace to search in (name of top level module), + if None, determines namespace from `node` + max_iter: maximum number of iterations and nodes to iterate + + Returns: + self + """ + # filter the nodes to avoid cluttering the callgraph with irrelevant information + filtered_nodes = self.get_related_nodes(node, namespace=namespace, max_iter=max_iter) + + self.nodes = {name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items()} + self.uses_edges = { + node: {n for n in nodes if n in filtered_nodes} + for node, nodes in self.uses_edges.items() + if node in filtered_nodes + } + self.defines_edges = { + node: {n for n in nodes if n in filtered_nodes} + for node, nodes in self.defines_edges.items() + if node in filtered_nodes + } + return self + + def get_related_nodes( + self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000 + ) -> set: + """ + get nodes that related to `node` or are in `namespace` + + Args: + node: pyan node for which related nodes should be found, if none, filter only for namespace + namespace: namespace to search in (name of top level module), + if None, determines namespace from `node` + max_iter: maximum number of iterations and nodes to iterate + + Returns: + set: set of nodes related to `node` including `node` itself + """ + # check if searching through all nodes is necessary + if node is None: + queue = [] + if namespace is None: + new_nodes = {n for items in self.nodes.values() for n in items} + else: + new_nodes = { + n + for items in self.nodes.values() + for n in items + if n.namespace is not None and namespace in n.namespace + } + + else: + new_nodes = set() + if namespace is None: + namespace = node.namespace.strip(".").split(".", 1)[0] + queue = [node] + + # use queue system to search through nodes + # essentially add a node to the queue and then search all connected nodes which are in turn added to the queue + # until the queue itself is empty or the maximum limit of max_iter searches have been hit + i = max_iter + while len(queue) > 0: + item = queue.pop() + if item not in new_nodes: + new_nodes.add(item) + i -= 1 + if i < 0: + break + queue.extend( + [ + n + for n in self.uses_edges.get(item, []) + if n in self.uses_edges and n not in new_nodes and namespace in n.namespace + ] + ) + queue.extend( + [ + n + for n in self.defines_edges.get(item, []) + if n in self.defines_edges and n not in new_nodes and namespace in n.namespace + ] + ) + + return new_nodes + def visit_Module(self, node): - self.logger.debug("Module") + self.logger.debug("Module %s, %s" % (self.module_name, self.filename)) # Modules live in the top-level namespace, ''. - module_node = self.get_node('', self.module_name, node, flavor=Flavor.MODULE) + module_node = self.get_node("", self.module_name, node, flavor=Flavor.MODULE) self.associate_node(module_node, node, filename=self.filename) ns = self.module_name @@ -178,8 +346,11 @@ class CallGraphVisitor(ast.NodeVisitor): self.name_stack.pop() self.last_value = None + if self.add_defines_edge(module_node, None): + self.logger.info("Def Module %s" % node) + def visit_ClassDef(self, node): - self.logger.debug("ClassDef %s" % (node.name)) + self.logger.debug("ClassDef %s, %s:%s" % (node.name, self.filename, node.lineno)) from_node = self.get_node_of_current_namespace() ns = from_node.get_name() @@ -223,7 +394,7 @@ class CallGraphVisitor(ast.NodeVisitor): self.class_stack.pop() def visit_FunctionDef(self, node): - self.logger.debug("FunctionDef %s" % (node.name)) + self.logger.debug("FunctionDef %s, %s:%s" % (node.name, self.filename, node.lineno)) # To begin with: # @@ -237,7 +408,7 @@ class CallGraphVisitor(ast.NodeVisitor): # method or a class method. (For a class method, it represents cls, # but Pyan only cares about types, not instances.) # - self_name,flavor = self.analyze_functiondef(node) + self_name, flavor = self.analyze_functiondef(node) # Now we can create the Node. # @@ -261,27 +432,7 @@ class CallGraphVisitor(ast.NodeVisitor): # Capture which names correspond to function args. # - # In the function scope, set them to a nonsense Node, - # to prevent leakage of identifiers of matching name - # from the enclosing scope (due to the local value being None - # until we set it to this nonsense Node). - # - # As the name of the nonsense node, we can use any string that - # is not a valid Python identifier. - # - # It has no sensible flavor, so we leave its flavor unspecified. - # - sc = self.scopes[inner_ns] - nonsense_node = self.get_node(inner_ns, '^^^argument^^^', None) - all_args = node.args # args, vararg (*args), kwonlyargs, kwarg (**kwargs) - for a in all_args.args: # positional - sc.defs[a.arg] = nonsense_node - if all_args.vararg is not None: # *args if present - sc.defs[all_args.vararg] = nonsense_node - for a in all_args.kwonlyargs: - sc.defs[a.arg] = nonsense_node - if all_args.kwarg is not None: # **kwargs if present - sc.defs[all_args.kwarg] = nonsense_node + self.generate_args_nodes(node.args, inner_ns) # self_name is just an ordinary name in the method namespace, except # that its value is implicitly set by Python when the method is called. @@ -297,10 +448,11 @@ class CallGraphVisitor(ast.NodeVisitor): self.scopes[inner_ns].defs[self_name] = class_node self.logger.info('Method def: setting self name "%s" to %s' % (self_name, class_node)) - for d in node.args.defaults: - self.visit(d) - for d in node.args.kw_defaults: - self.visit(d) + # record bindings of args to the given default values, if present + self.analyze_arguments(node.args) + + # Analyze the function body + # for stmt in node.body: self.visit(stmt) @@ -314,99 +466,176 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit_FunctionDef(node) # TODO: alias for now; tag async functions in output in a future version? def visit_Lambda(self, node): - self.logger.debug("Lambda") + # TODO: avoid lumping together all lambdas in the same namespace. + self.logger.debug("Lambda, %s:%s" % (self.filename, node.lineno)) with ExecuteInInnerScope(self, "lambda"): - for d in node.args.defaults: - self.visit(d) - for d in node.args.kw_defaults: - self.visit(d) + inner_ns = self.get_node_of_current_namespace().get_name() + self.generate_args_nodes(node.args, inner_ns) + self.analyze_arguments(node.args) self.visit(node.body) # single expr + def generate_args_nodes(self, ast_args, inner_ns): + """Capture which names correspond to function args. + + In the function scope, set them to a nonsense Node, + to prevent leakage of identifiers of matching name + from the enclosing scope (due to the local value being None + until we set it to this nonsense Node). + + ast_args: node.args from a FunctionDef or Lambda + inner_ns: namespace of the function or lambda, for scope lookup + """ + sc = self.scopes[inner_ns] + # As the name of the nonsense node, we can use any string that + # is not a valid Python identifier. + # + # It has no sensible flavor, so we leave its flavor unspecified. + nonsense_node = self.get_node(inner_ns, "^^^argument^^^", None) + # args, vararg (*args), kwonlyargs, kwarg (**kwargs) + for a in ast_args.args: # positional + sc.defs[a.arg] = nonsense_node + if ast_args.vararg is not None: # *args if present + sc.defs[ast_args.vararg] = nonsense_node + for a in ast_args.kwonlyargs: # any after *args or * + sc.defs[a.arg] = nonsense_node + if ast_args.kwarg is not None: # **kwargs if present + sc.defs[ast_args.kwarg] = nonsense_node + + def analyze_arguments(self, ast_args): + """Analyze an arguments node of the AST. + + Record bindings of args to the given default values, if present. + + Used for analyzing FunctionDefs and Lambdas.""" + # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#arguments + if ast_args.defaults: + n = len(ast_args.defaults) + for tgt, val in zip(ast_args.args[-n:], ast_args.defaults): + targets = sanitize_exprs(tgt) + values = sanitize_exprs(val) + self.analyze_binding(targets, values) + if ast_args.kw_defaults: + n = len(ast_args.kw_defaults) + for tgt, val in zip(ast_args.kwonlyargs, ast_args.kw_defaults): + if val is not None: + targets = sanitize_exprs(tgt) + values = sanitize_exprs(val) + self.analyze_binding(targets, values) + def visit_Import(self, node): - self.logger.debug("Import %s" % [format_alias(x) for x in node.names]) + self.logger.debug("Import %s, %s:%s" % ([format_alias(x) for x in node.names], self.filename, node.lineno)) # TODO: add support for relative imports (path may be like "....something.something") # https://www.python.org/dev/peps/pep-0328/#id10 - # Do we need to? Seems that at least "from .foo import bar" works already? - for import_item in node.names: - src_name = import_item.name # what is being imported - tgt_name = import_item.asname if import_item.asname is not None else src_name # under which name - - # mark the use site - # - # where it is being imported to, i.e. the **user** - from_node = self.get_node_of_current_namespace() - # the thing **being used** (under the asname, if any) - to_node = self.get_node('', tgt_name, node, flavor=Flavor.IMPORTEDITEM) - - is_new_edge = self.add_uses_edge(from_node, to_node) - - # bind asname in the current namespace to the imported module - # - # conversion: possible short name -> fully qualified name - # (when analyzing a set of files in the same directory) - if src_name in self.module_names: - mod_name = self.module_names[src_name] - else: - mod_name = src_name - tgt_module = self.get_node('', mod_name, node, flavor=Flavor.MODULE) - # XXX: if there is no asname, it may happen that mod_name == tgt_name, - # in which case these will be the same Node. They are semantically - # distinct (Python name at receiving end, vs. module), but currently - # Pyan has no way of retaining that information. - if to_node is tgt_module: - to_node.flavor = Flavor.MODULE - self.set_value(tgt_name, tgt_module) - - # must do this after possibly munging flavor to avoid confusing - # the user reading the log - self.logger.debug("Use from %s to Import %s" % (from_node, to_node)) - if is_new_edge: - self.logger.info("New edge added for Use from %s to Import %s" % (from_node, to_node)) + for import_item in node.names: # the names are modules + self.analyze_module_import(import_item, node) def visit_ImportFrom(self, node): - self.logger.debug("ImportFrom: from %s import %s" % (node.module, [format_alias(x) for x in node.names])) - - tgt_name = node.module + self.logger.debug( + "ImportFrom: from %s import %s, %s:%s" + % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) + # Pyan needs to know the package structure, and how the program + # being analyzed is actually going to be invoked (!), to be able to + # resolve relative imports correctly. + # + # As a solution, we register imports here and later, when all files have been parsed, resolve them. from_node = self.get_node_of_current_namespace() - to_node = self.get_node('', tgt_name, node, flavor=Flavor.MODULE) # module, in top-level namespace - self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) - - if tgt_name in self.module_names: - mod_name = self.module_names[tgt_name] + if node.module is None: # resolve relative imports 'None' such as "from . import foo" + self.logger.debug( + "ImportFrom (original) from %s import %s, %s:%s" + % ("." * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) + tgt_level = node.level + current_module_namespace = self.module_name.rsplit(".", tgt_level)[0] + tgt_name = current_module_namespace + self.logger.debug( + "ImportFrom (resolved): from %s import %s, %s:%s" + % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) + elif node.level != 0: # resolve from ..module import foo + self.logger.debug( + "ImportFrom (original): from %s import %s, %s:%s" + % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) + tgt_level = node.level + current_module_namespace = self.module_name.rsplit(".", tgt_level)[0] + tgt_name = current_module_namespace + "." + node.module + self.logger.debug( + "ImportFrom (resolved): from %s import %s, %s:%s" + % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) else: - mod_name = tgt_name + tgt_name = node.module # normal from module.submodule import foo - for import_item in node.names: - name = import_item.name - new_name = import_item.asname if import_item.asname is not None else name - # we imported the identifier name from the module mod_name - tgt_id = self.get_node(mod_name, name, node, flavor=Flavor.IMPORTEDITEM) - self.set_value(new_name, tgt_id) - self.logger.info("From setting name %s to %s" % (new_name, tgt_id)) + # link each import separately + for alias in node.names: + # check if import is module + if tgt_name + "." + alias.name in self.module_to_filename: + to_node = self.get_node("", tgt_name + "." + alias.name, node, flavor=Flavor.MODULE) + else: + to_node = self.get_node(tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM) + # if there is alias, add extra edge between alias and node + if alias.asname is not None: + alias_name = alias.asname + else: + alias_name = alias.name + self.set_value(alias_name, to_node) # set node to be discoverable in module + self.logger.info("From setting name %s to %s" % (alias_name, to_node)) -# # Edmund Horner's original post has info on what this fixed in Python 2. -# # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ -# # -# # Essentially, this should make '.'.join(...) see str.join. -# # Pyan3 currently handles that in resolve_attribute() and get_attribute(). -# # -# # Python 3.4 does not have ast.Constant, but 3.6 does. Disabling for now. -# # TODO: revisit this part after upgrading Python. -# # -# def visit_Constant(self, node): -# self.logger.debug("Constant %s" % (node.value)) -# t = type(node.value) -# tn = t.__name__ -# self.last_value = self.get_node('', tn, node) + self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node)) + if self.add_uses_edge(from_node, to_node): + self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node)) + + def analyze_module_import(self, import_item, ast_node): + """Analyze a names AST node inside an Import or ImportFrom AST node. + + This handles the case where the objects being imported are modules. + + import_item: an item of ast_node.names + ast_node: for recording source location information + """ + src_name = import_item.name # what is being imported + + # mark the use site + # + # where it is being imported to, i.e. the **user** + from_node = self.get_node_of_current_namespace() + # the thing **being used** (under the asname, if any) + mod_node = self.get_node("", src_name, ast_node, flavor=Flavor.MODULE) + # if there is alias, add extra edge between alias and node + if import_item.asname is not None: + alias_name = import_item.asname + else: + alias_name = mod_node.name + self.add_uses_edge(from_node, mod_node) + self.logger.info("New edge added for Use import %s in %s" % (mod_node, from_node)) + self.set_value(alias_name, mod_node) # set node to be discoverable in module + self.logger.info("From setting name %s to %s" % (alias_name, mod_node)) + + # Edmund Horner's original post has info on what this fixed in Python 2. + # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/ + # + # Essentially, this should make '.'.join(...) see str.join. + # Pyan3 currently handles that in resolve_attribute() and get_attribute(). + # + # Python 3.4 does not have ast.Constant, but 3.6 does. + # TODO: actually test this with Python 3.6 or later. + # + def visit_Constant(self, node): + self.logger.debug("Constant %s, %s:%s" % (node.value, self.filename, node.lineno)) + t = type(node.value) + ns = self.get_node_of_current_namespace().get_name() + tn = t.__name__ + self.last_value = self.get_node(ns, tn, node, flavor=Flavor.ATTRIBUTE) # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Attribute(self, node): objname = get_ast_node_name(node.value) - self.logger.debug("Attribute %s of %s in context %s" % (node.attr, objname, type(node.ctx))) + self.logger.debug( + "Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno) + ) # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Attribute() only needs to handle @@ -416,7 +645,7 @@ class CallGraphVisitor(ast.NodeVisitor): new_value = self.last_value try: if self.set_attribute(node, new_value): - self.logger.info('setattr %s on %s to %s' % (node.attr, objname, new_value)) + self.logger.info("setattr %s on %s to %s" % (node.attr, objname, new_value)) except UnresolvedSuperCallError: # Trying to set something belonging to an unresolved super() # of something; just ignore this attempt to setattr. @@ -424,7 +653,7 @@ class CallGraphVisitor(ast.NodeVisitor): elif isinstance(node.ctx, ast.Load): try: - obj_node,attr_node = self.get_attribute(node) + obj_node, attr_node = self.get_attribute(node) except UnresolvedSuperCallError: # Avoid adding a wildcard if the lookup failed due to an # unresolved super() in the attribute chain. @@ -432,7 +661,7 @@ class CallGraphVisitor(ast.NodeVisitor): # Both object and attr known. if isinstance(attr_node, Node): - self.logger.info('getattr %s on %s returns %s' % (node.attr, objname, attr_node)) + self.logger.info("getattr %s on %s returns %s" % (node.attr, objname, attr_node)) # add uses edge from_node = self.get_node_of_current_namespace() @@ -466,34 +695,32 @@ class CallGraphVisitor(ast.NodeVisitor): from_node = self.get_node_of_current_namespace() ns = obj_node.get_name() # fully qualified namespace **of attr** to_node = self.get_node(ns, tgt_name, node, flavor=Flavor.ATTRIBUTE) - self.logger.debug("Use from %s to %s (target obj %s known but target attr %s not resolved; maybe fwd ref or unanalyzed import)" % (from_node, to_node, obj_node, node.attr)) + self.logger.debug( + f"Use from {from_node} to {to_node} (target obj {obj_node} known but target attr " + f"{node.attr} not resolved; maybe fwd ref or unanalyzed import)" + ) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (target obj %s known but target attr %s not resolved; maybe fwd ref or unanalyzed import)" % (from_node, to_node, obj_node, node.attr)) + self.logger.info( + "New edge added for Use from {from_node} to {to_node} (target obj {obj_node} known but " + f"target attr {node.attr} not resolved; maybe fwd ref or unanalyzed import)" + ) # remove resolved wildcard from current site to self.remove_wild(from_node, obj_node, node.attr) self.last_value = to_node - # Object unknown, add uses edge to a wildcard by attr name. + # pass on else: - tgt_name = node.attr - from_node = self.get_node_of_current_namespace() - to_node = self.get_node(None, tgt_name, node, flavor=Flavor.UNKNOWN) - self.logger.debug("Use from %s to %s (target obj %s not resolved; maybe fwd ref, function argument, or unanalyzed import)" % (from_node, to_node, objname)) - if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (target obj %s not resolved; maybe fwd ref, function argument, or unanalyzed import)" % (from_node, to_node, objname)) - - self.last_value = to_node + self.visit(node.value) # name access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Name(self, node): - self.logger.debug("Name %s in context %s" % (node.id, type(node.ctx))) + self.logger.debug("Name %s in context %s, %s:%s" % (node.id, type(node.ctx), self.filename, node.lineno)) # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Name() only needs to handle # the load context (i.e. detect uses of the name). - # if isinstance(node.ctx, ast.Store): # when we get here, self.last_value has been set by visit_Assign() self.set_value(node.id, self.last_value) @@ -504,8 +731,8 @@ class CallGraphVisitor(ast.NodeVisitor): to_node = self.get_value(tgt_name) # resolves "self" if needed current_class = self.get_current_class() if current_class is None or to_node is not current_class: # add uses edge only if not pointing to "self" - ###TODO if the name is a local variable (i.e. in the innermost scope), and - ###has no known value, then don't try to create a Node for it. + # TODO if the name is a local variable (i.e. in the innermost scope), and + # has no known value, then don't try to create a Node for it. if not isinstance(to_node, Node): # namespace=None means we don't know the namespace yet to_node = self.get_node(None, tgt_name, node, flavor=Flavor.UNKNOWN) @@ -531,20 +758,52 @@ class CallGraphVisitor(ast.NodeVisitor): values = sanitize_exprs(node.value) # values is the same for each set of targets for targets in node.targets: targets = sanitize_exprs(targets) - self.logger.debug("Assign %s %s" % ([get_ast_node_name(x) for x in targets], - [get_ast_node_name(x) for x in values])) + self.logger.debug( + "Assign %s %s, %s:%s" + % ( + [get_ast_node_name(x) for x in targets], + [get_ast_node_name(x) for x in values], + self.filename, + node.lineno, + ) + ) self.analyze_binding(targets, values) - def visit_AnnAssign(self, node): - self.visit_Assign(self, node) # TODO: alias for now; add the annotations to output in a future version? + def visit_AnnAssign(self, node): # PEP 526, Python 3.6+ + target = sanitize_exprs(node.target) + self.last_value = None + if node.value is not None: + value = sanitize_exprs(node.value) + # issue #62: value may be an empty list, so it doesn't always have any elements + # even after `sanitize_exprs`. + self.logger.debug( + "AnnAssign %s %s, %s:%s" + % (get_ast_node_name(target[0]), get_ast_node_name(value), self.filename, node.lineno) + ) + self.analyze_binding(target, value) + else: # just a type declaration + self.logger.debug( + "AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), self.filename, node.lineno) + ) + self.last_value = None + self.visit(target[0]) + # TODO: use the type annotation from node.annotation? + # http://greentreesnakes.readthedocs.io/en/latest/nodes.html#AnnAssign def visit_AugAssign(self, node): targets = sanitize_exprs(node.target) values = sanitize_exprs(node.value) # values is the same for each set of targets - self.logger.debug("AugAssign %s %s %s" % ([get_ast_node_name(x) for x in targets], - type(node.op), - [get_ast_node_name(x) for x in values])) + self.logger.debug( + "AugAssign %s %s %s, %s:%s" + % ( + [get_ast_node_name(x) for x in targets], + type(node.op), + [get_ast_node_name(x) for x in values], + self.filename, + node.lineno, + ) + ) # TODO: maybe no need to handle tuple unpacking in AugAssign? (but simpler to use the same implementation) self.analyze_binding(targets, values) @@ -557,7 +816,7 @@ class CallGraphVisitor(ast.NodeVisitor): # in use elsewhere.) # def visit_For(self, node): - self.logger.debug("For-loop") + self.logger.debug("For-loop, %s:%s" % (self.filename, node.lineno)) targets = sanitize_exprs(node.target) values = sanitize_exprs(node.iter) @@ -572,32 +831,61 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit_For(node) # TODO: alias for now; tag async for in output in a future version? def visit_ListComp(self, node): - self.logger.debug("ListComp") - with ExecuteInInnerScope(self, "listcomp"): - self.visit(node.elt) - self.analyze_generators(node.generators) + self.logger.debug("ListComp, %s:%s" % (self.filename, node.lineno)) + self.analyze_comprehension(node, "listcomp") def visit_SetComp(self, node): - self.logger.debug("SetComp") - with ExecuteInInnerScope(self, "setcomp"): - self.visit(node.elt) - self.analyze_generators(node.generators) + self.logger.debug("SetComp, %s:%s" % (self.filename, node.lineno)) + self.analyze_comprehension(node, "setcomp") def visit_DictComp(self, node): - self.logger.debug("DictComp") - with ExecuteInInnerScope(self, "dictcomp"): - self.visit(node.key) - self.visit(node.value) - self.analyze_generators(node.generators) + self.logger.debug("DictComp, %s:%s" % (self.filename, node.lineno)) + self.analyze_comprehension(node, "dictcomp", field1="key", field2="value") def visit_GeneratorExp(self, node): - self.logger.debug("GeneratorExp") - with ExecuteInInnerScope(self, "genexpr"): - self.visit(node.elt) - self.analyze_generators(node.generators) + self.logger.debug("GeneratorExp, %s:%s" % (self.filename, node.lineno)) + self.analyze_comprehension(node, "genexpr") + + def analyze_comprehension(self, node, label, field1="elt", field2=None): + # The outermost iterator is evaluated in the current scope; + # everything else in the new inner scope. + # + # See function symtable_handle_comprehension() in + # https://github.com/python/cpython/blob/master/Python/symtable.c + # For how it works, see + # https://stackoverflow.com/questions/48753060/what-are-these-extra-symbols-in-a-comprehensions-symtable + # For related discussion, see + # https://bugs.python.org/issue10544 + gens = node.generators # tuple of ast.comprehension + outermost = gens[0] + moregens = gens[1:] if len(gens) > 1 else [] + + outermost_iters = sanitize_exprs(outermost.iter) + outermost_targets = sanitize_exprs(outermost.target) + for expr in outermost_iters: + self.visit(expr) # set self.last_value (to something and hope for the best) + + with ExecuteInInnerScope(self, label): + for expr in outermost_targets: + self.visit(expr) # use self.last_value + self.last_value = None + for expr in outermost.ifs: + self.visit(expr) + + # TODO: there's also an is_async field we might want to use in a future version of Pyan. + for gen in moregens: + targets = sanitize_exprs(gen.target) + values = sanitize_exprs(gen.iter) + self.analyze_binding(targets, values) + for expr in gen.ifs: + self.visit(expr) + + self.visit(getattr(node, field1)) # e.g. node.elt + if field2: + self.visit(getattr(node, field2)) def visit_Call(self, node): - self.logger.debug("Call %s" % (get_ast_node_name(node.func))) + self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), self.filename, node.lineno)) # visit args to detect uses for arg in node.args: @@ -618,7 +906,9 @@ class CallGraphVisitor(ast.NodeVisitor): to_node = result_node self.logger.debug("Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)) + self.logger.info( + "New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node) + ) else: # generic function call # Visit the function name part last, so that inside a binding form, @@ -638,13 +928,15 @@ class CallGraphVisitor(ast.NodeVisitor): if self.last_value in self.class_base_ast_nodes: from_node = self.get_node_of_current_namespace() class_node = self.last_value - to_node = self.get_node(class_node.get_name(), '__init__', None, flavor=Flavor.METHOD) + to_node = self.get_node(class_node.get_name(), "__init__", None, flavor=Flavor.METHOD) self.logger.debug("Use from %s to %s (call creates an instance)" % (from_node, to_node)) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node)) + self.logger.info( + "New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node) + ) def visit_With(self, node): - self.logger.debug("With (context manager)") + self.logger.debug("With (context manager), %s:%s" % (self.filename, node.lineno)) def add_uses_enter_exit_of(graph_node): # add uses edges to __enter__ and __exit__ methods of given Node @@ -653,7 +945,7 @@ class CallGraphVisitor(ast.NodeVisitor): withed_obj_node = graph_node self.logger.debug("Use from %s to With %s" % (from_node, withed_obj_node)) - for methodname in ('__enter__', '__exit__'): + for methodname in ("__enter__", "__exit__"): to_node = self.get_node(withed_obj_node.get_name(), methodname, None, flavor=Flavor.METHOD) if self.add_uses_edge(from_node, to_node): self.logger.info("New edge added for Use from %s to %s" % (from_node, to_node)) @@ -699,7 +991,7 @@ class CallGraphVisitor(ast.NodeVisitor): or None if not applicable; and flavor is a Flavor, specifically one of FUNCTION, METHOD, STATICMETHOD or CLASSMETHOD.""" - if not isinstance(ast_node, ast.FunctionDef): + if not isinstance(ast_node, (ast.AsyncFunctionDef, ast.FunctionDef)): raise TypeError("Expected ast.FunctionDef; got %s" % (type(ast_node))) # Visit decorators @@ -781,35 +1073,18 @@ class CallGraphVisitor(ast.NodeVisitor): self.visit(value) # RHS -> set self.last_value captured_values.append(self.last_value) self.last_value = None - for tgt,val in zip(targets,captured_values): + for tgt, val in zip(targets, captured_values): self.last_value = val - self.visit(tgt) # LHS, name in a store context + self.visit(tgt) # LHS, name in a store context self.last_value = None else: # FIXME: for now, do the wrong thing in the non-trivial case # old code, no tuple unpacking support for value in values: self.visit(value) # set self.last_value to **something** on the RHS and hope for the best - for tgt in targets: # LHS, name in a store context + for tgt in targets: # LHS, name in a store context self.visit(tgt) self.last_value = None - def analyze_generators(self, generators): - """Analyze the generators in a comprehension form. - - Analyzes the binding part, and visits the "if" expressions (if any). - - generators: an iterable of ast.comprehension objects - """ - - for gen in generators: - # TODO: there's also an is_async field we might want to use in a future version. - targets = sanitize_exprs(gen.target) - values = sanitize_exprs(gen.iter) - self.analyze_binding(targets, values) - - for expr in gen.ifs: - self.visit(expr) - def resolve_builtins(self, ast_node): """Resolve those calls to built-in functions whose return values can be determined in a simple manner. @@ -871,7 +1146,10 @@ class CallGraphVisitor(ast.NodeVisitor): # build a temporary ast.Attribute AST node so that we can use get_attribute() tmp_astnode = ast.Attribute(value=obj_astnode, attr=attrname, ctx=obj_astnode.ctx) obj_node, attr_node = self.get_attribute(tmp_astnode) - self.logger.debug("Resolve %s() of %s: returning attr node %s" % (funcname, get_ast_node_name(obj_astnode), attr_node)) + self.logger.debug( + "Resolve %s() of %s: returning attr node %s" + % (funcname, get_ast_node_name(obj_astnode), attr_node) + ) return attr_node # add implementations for other built-in funcnames here if needed @@ -891,8 +1169,9 @@ class CallGraphVisitor(ast.NodeVisitor): if not isinstance(ast_node, ast.Attribute): raise TypeError("Expected ast.Attribute; got %s" % (type(ast_node))) - self.logger.debug("Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), - ast_node.attr, type(ast_node.ctx))) + self.logger.debug( + "Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), ast_node.attr, type(ast_node.ctx)) + ) # Resolve nested attributes # @@ -900,7 +1179,7 @@ class CallGraphVisitor(ast.NodeVisitor): # ast.Attribute(attr=c, value=ast.Attribute(attr=b, value=a)) # if isinstance(ast_node.value, ast.Attribute): - obj_node,attr_name = self.resolve_attribute(ast_node.value) + obj_node, attr_name = self.resolve_attribute(ast_node.value) if isinstance(obj_node, Node) and obj_node.namespace is not None: ns = obj_node.get_name() # fully qualified namespace **of attr** @@ -937,7 +1216,7 @@ class CallGraphVisitor(ast.NodeVisitor): # The CLASS flavor is the best match, as these constants # are object types. # - obj_node = self.get_node('', tn, None, flavor=Flavor.CLASS) + obj_node = self.get_node("", tn, None, flavor=Flavor.CLASS) # attribute of a function call. Detect cases like super().dostuff() elif isinstance(ast_node.value, ast.Call): @@ -974,12 +1253,14 @@ class CallGraphVisitor(ast.NodeVisitor): # in different scopes, as we should). # scopes = {} + def process(parent_ns, table): sc = Scope(table) ns = "%s.%s" % (parent_ns, sc.name) if len(sc.name) else parent_ns scopes[ns] = sc for t in table.get_children(): process(ns, t) + process(self.module_name, symtable.symtable(code, filename, compile_type="exec")) # add to existing scopes (while not overwriting any existing definitions with None) @@ -1011,7 +1292,7 @@ class CallGraphVisitor(ast.NodeVisitor): """ assert len(self.name_stack) # name_stack should never be empty (always at least module name) - namespace = '.'.join(self.name_stack[0:-1]) + namespace = ".".join(self.name_stack[0:-1]) name = self.name_stack[-1] return self.get_node(namespace, name, None, flavor=Flavor.NAMESPACE) @@ -1032,13 +1313,15 @@ class CallGraphVisitor(ast.NodeVisitor): if sc is not None: value = sc.defs[name] if isinstance(value, Node): - self.logger.info('Get %s in %s, found in %s, value %s' % (name, self.scope_stack[-1], sc, value)) + self.logger.info("Get %s in %s, found in %s, value %s" % (name, self.scope_stack[-1], sc, value)) return value else: # TODO: should always be a Node or None - self.logger.debug('Get %s in %s, found in %s: value %s is not a Node' % (name, self.scope_stack[-1], sc, value)) + self.logger.debug( + "Get %s in %s, found in %s: value %s is not a Node" % (name, self.scope_stack[-1], sc, value) + ) else: - self.logger.debug('Get %s in %s: no Node value (or name not in scope)' % (name, self.scope_stack[-1])) + self.logger.debug("Get %s in %s: no Node value (or name not in scope)" % (name, self.scope_stack[-1])) def set_value(self, name, value): """Set the value of name in the current scope. Value must be a Node.""" @@ -1053,12 +1336,12 @@ class CallGraphVisitor(ast.NodeVisitor): if sc is not None: if isinstance(value, Node): sc.defs[name] = value - self.logger.info('Set %s in %s to %s' % (name, sc, value)) + self.logger.info("Set %s in %s to %s" % (name, sc, value)) else: # TODO: should always be a Node or None - self.logger.debug('Set %s in %s: value %s is not a Node' % (name, sc, value)) + self.logger.debug("Set %s in %s: value %s is not a Node" % (name, sc, value)) else: - self.logger.debug('Set: name %s not in scope' % (name)) + self.logger.debug("Set: name %s not in scope" % (name)) ########################################################################### # Attribute getter and setter @@ -1083,7 +1366,7 @@ class CallGraphVisitor(ast.NodeVisitor): if not isinstance(ast_node.ctx, ast.Load): raise ValueError("Expected a load context, got %s" % (type(ast_node.ctx))) - obj_node,attr_name = self.resolve_attribute(ast_node) + obj_node, attr_name = self.resolve_attribute(ast_node) if isinstance(obj_node, Node) and obj_node.namespace is not None: ns = obj_node.get_name() # fully qualified namespace **of attr** @@ -1140,7 +1423,7 @@ class CallGraphVisitor(ast.NodeVisitor): if not isinstance(new_value, Node): return False - obj_node,attr_name = self.resolve_attribute(ast_node) + obj_node, attr_name = self.resolve_attribute(ast_node) if isinstance(obj_node, Node) and obj_node.namespace is not None: ns = obj_node.get_name() # fully qualified namespace **of attr** @@ -1215,10 +1498,10 @@ class CallGraphVisitor(ast.NodeVisitor): def get_parent_node(self, graph_node): """Get the parent node of the given Node. (Used in postprocessing.)""" - if '.' in graph_node.namespace: - ns,name = graph_node.namespace.rsplit('.', 1) + if "." in graph_node.namespace: + ns, name = graph_node.namespace.rsplit(".", 1) else: - ns,name = '',graph_node.namespace + ns, name = "", graph_node.namespace return self.get_node(ns, name, None) def associate_node(self, graph_node, ast_node, filename=None): @@ -1247,13 +1530,14 @@ class CallGraphVisitor(ast.NodeVisitor): def add_defines_edge(self, from_node, to_node): """Add a defines edge in the graph between two nodes. N.B. This will mark both nodes as defined.""" - + status = False if from_node not in self.defines_edges: self.defines_edges[from_node] = set() - if to_node in self.defines_edges[from_node]: - return False - self.defines_edges[from_node].add(to_node) + status = True from_node.defined = True + if to_node is None or to_node in self.defines_edges[from_node]: + return status + self.defines_edges[from_node].add(to_node) to_node.defined = True return True @@ -1313,6 +1597,9 @@ class CallGraphVisitor(ast.NodeVisitor): Used for cleaning up forward-references once resolved. This prevents spurious edges due to expand_unknowns().""" + if name is None: # relative imports may create nodes with name=None. + return + if from_node not in self.uses_edges: # no uses edges to remove return @@ -1427,18 +1714,27 @@ class CallGraphVisitor(ast.NodeVisitor): n.defined = False def cull_inherited(self): - """For each use edge from W to X.name, if it also has an edge to W to Y.name where Y is used by X, then remove the first edge.""" + """ + For each use edge from W to X.name, if it also has an edge to W to Y.name where + Y is used by X, then remove the first edge. + """ removed_uses_edges = [] for n in self.uses_edges: for n2 in self.uses_edges[n]: inherited = False for n3 in self.uses_edges[n]: - if n3.name == n2.name and n2.namespace is not None and n3.namespace is not None and n3.namespace != n2.namespace: + if ( + n3.name == n2.name + and n2.namespace is not None + and n3.namespace is not None + and n3.namespace != n2.namespace + ): pn2 = self.get_parent_node(n2) pn3 = self.get_parent_node(n3) + # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: + # remove the second edge W to Y.name (TODO: add an option to choose this) if pn2 in self.uses_edges and pn3 in self.uses_edges[pn2]: # remove the first edge W to X.name -# if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) inherited = True if inherited and n in self.uses_edges: @@ -1455,14 +1751,10 @@ class CallGraphVisitor(ast.NodeVisitor): # Lambdas and comprehensions do not define any names in the enclosing # scope, so we only need to treat the uses edges. - # TODO: currently we handle outgoing uses edges only. - # - # What about incoming uses edges? E.g. consider a lambda that is saved - # in an instance variable, then used elsewhere. How do we want the - # graph to look like in that case? - - for name in self.nodes: - if name in ('lambda', 'listcomp', 'setcomp', 'dictcomp', 'genexpr'): + # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) + # temporary solution is adding list to force a copy of 'self.nodes' + for name in list(self.nodes): + if name in ("lambda", "listcomp", "setcomp", "dictcomp", "genexpr"): for n in self.nodes[name]: pn = self.get_parent_node(n) if n in self.uses_edges: diff --git a/pyan/anutils.py b/pyan/anutils.py index 47659f5..8063e61 100644 --- a/pyan/anutils.py +++ b/pyan/anutils.py @@ -2,37 +2,56 @@ # -*- coding: utf-8 -*- """Utilities for analyzer.""" -import os.path import ast +import os.path + from .node import Flavor + def head(lst): if len(lst): return lst[0] + def tail(lst): if len(lst) > 1: return lst[1:] else: return [] -def get_module_name(filename): + +def get_module_name(filename, root: str = None): """Try to determine the full module name of a source file, by figuring out - if its directory looks like a package (i.e. has an __init__.py file).""" + if its directory looks like a package (i.e. has an __init__.py file or + there is a .py file in it ).""" - if os.path.basename(filename) == '__init__.py': - return get_module_name(os.path.dirname(filename)) + if os.path.basename(filename) == "__init__.py": + # init file means module name is directory name + module_path = os.path.dirname(filename) + else: + # otherwise it is the filename without extension + module_path = filename.replace(".py", "") - init_path = os.path.join(os.path.dirname(filename), '__init__.py') - mod_name = os.path.basename(filename).replace('.py', '') + # find the module root - walk up the tree and check if it contains .py files - if yes. it is the new root + directories = [(module_path, True)] + if root is None: + while directories[0][0] != os.path.dirname(directories[0][0]): + potential_root = os.path.dirname(directories[0][0]) + is_root = any([f == "__init__.py" for f in os.listdir(potential_root)]) + directories.insert(0, (potential_root, is_root)) - if not os.path.exists(init_path): - return mod_name + # keep directories where itself of parent is root + while not directories[0][1]: + directories.pop(0) - if not os.path.dirname(filename): - return mod_name + else: # root is already known - just walk up until it is matched + while directories[0][0] != root: + potential_root = os.path.dirname(directories[0][0]) + directories.insert(0, (potential_root, True)) + + mod_name = ".".join([os.path.basename(f[0]) for f in directories]) + return mod_name - return get_module_name(os.path.dirname(filename)) + '.' + mod_name def format_alias(x): """Return human-readable description of an ast.alias (used in Import and ImportFrom nodes).""" @@ -44,6 +63,7 @@ def format_alias(x): else: return "%s" % (x.name) + def get_ast_node_name(x): """Return human-readable name of ast.Attribute or ast.Name. Pass through anything else.""" if isinstance(x, ast.Attribute): @@ -54,19 +74,23 @@ def get_ast_node_name(x): else: return x + # Helper for handling binding forms. def sanitize_exprs(exprs): """Convert ast.Tuples in exprs to Python tuples; wrap result in a Python tuple.""" + def process(expr): if isinstance(expr, (ast.Tuple, ast.List)): return expr.elts # .elts is a Python tuple else: return [expr] + if isinstance(exprs, (tuple, list)): return [process(expr) for expr in exprs] else: return process(exprs) + def resolve_method_resolution_order(class_base_nodes, logger): """Compute the method resolution order (MRO) for each of the analyzed classes. @@ -81,17 +105,21 @@ def resolve_method_resolution_order(class_base_nodes, logger): from functools import reduce from operator import add + def C3_find_good_head(heads, tails): # find an element of heads which is not in any of the tails flat_tails = reduce(add, tails, []) # flatten the outer level for hd in heads: if hd not in flat_tails: break else: # no break only if there are cyclic dependencies. - raise LinearizationImpossible("MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails)) + raise LinearizationImpossible( + "MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails) + ) return hd def remove_all(elt, lst): # remove all occurrences of elt from lst, return a copy return [x for x in lst if x != elt] + def remove_all_in(elt, lists): # remove elt from all lists, return a copy return [remove_all(elt, lst) for lst in lists] @@ -113,6 +141,7 @@ def resolve_method_resolution_order(class_base_nodes, logger): mro = {} # result try: memo = {} # caching/memoization + def C3_linearize(node): logger.debug("MRO: C3 linearizing %s" % (node)) seen.add(node) @@ -133,6 +162,7 @@ def resolve_method_resolution_order(class_base_nodes, logger): memo[node] = [node] + C3_merge(lists) logger.debug("MRO: C3 linearized %s, result %s" % (node, memo[node])) return memo[node] + for node in class_base_nodes: logger.debug("MRO: analyzing class %s" % (node)) seen = set() # break cycles (separately for each class we start from) @@ -146,6 +176,7 @@ def resolve_method_resolution_order(class_base_nodes, logger): # analyzed is so badly formed that the MRO algorithm fails) memo = {} # caching/memoization + def lookup_bases_recursive(node): seen.add(node) if node not in memo: @@ -166,10 +197,13 @@ def resolve_method_resolution_order(class_base_nodes, logger): return mro + class UnresolvedSuperCallError(Exception): """For specifically signaling an unresolved super().""" + pass + class Scope: """Adaptor that makes scopes look somewhat like those from the Python 2 compiler module, as far as Pyan's CallGraphVisitor is concerned.""" @@ -177,15 +211,16 @@ class Scope: def __init__(self, table): """table: SymTable instance from symtable.symtable()""" name = table.get_name() - if name == 'top': - name = '' # Pyan defines the top level as anonymous + if name == "top": + name = "" # Pyan defines the top level as anonymous self.name = name self.type = table.get_type() # useful for __repr__() - self.defs = {iden:None for iden in table.get_identifiers()} # name:assigned_value + self.defs = {iden: None for iden in table.get_identifiers()} # name:assigned_value def __repr__(self): return "" % (self.type, self.name) + # A context manager, sort of a friend of CallGraphVisitor (depends on implementation details) class ExecuteInInnerScope: """Execute a code block with the scope stack augmented with an inner scope. diff --git a/pyan/callgraph.html b/pyan/callgraph.html new file mode 100644 index 0000000..141fca0 --- /dev/null +++ b/pyan/callgraph.html @@ -0,0 +1,72 @@ + + + + + + + + + +

Click node to highlight; Shift-scroll to zoom; Esc to unhighlight

+
+ + + + + + + + + diff --git a/pyan/main.py b/pyan/main.py index e78e17c..18821a1 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -9,124 +9,236 @@ for rendering by e.g. GraphViz or yEd. """ -import logging +from argparse import ArgumentParser from glob import glob -from optparse import OptionParser # TODO: migrate to argparse +import logging +import os from .analyzer import CallGraphVisitor from .visgraph import VisualGraph -from .writers import TgfWriter, DotWriter, YedWriter +from .writers import DotWriter, HTMLWriter, SVGWriter, TgfWriter, YedWriter -def main(): - usage = """usage: %prog FILENAME... [--dot|--tgf|--yed]""" - desc = ('Analyse one or more Python source files and generate an' - 'approximate call graph of the modules, classes and functions' - ' within them.') - parser = OptionParser(usage=usage, description=desc) - parser.add_option("--dot", - action="store_true", default=False, - help="output in GraphViz dot format") - parser.add_option("--tgf", - action="store_true", default=False, - help="output in Trivial Graph Format") - parser.add_option("--yed", - action="store_true", default=False, - help="output in yEd GraphML Format") - parser.add_option("-f", "--file", dest="filename", - help="write graph to FILE", metavar="FILE", default=None) - parser.add_option("-l", "--log", dest="logname", - help="write log to LOG", metavar="LOG") - parser.add_option("-v", "--verbose", - action="store_true", default=False, dest="verbose", - help="verbose output") - parser.add_option("-V", "--very-verbose", - action="store_true", default=False, dest="very_verbose", - help="even more verbose output (mainly for debug)") - parser.add_option("-d", "--defines", - action="store_true", default=True, dest="draw_defines", - help="add edges for 'defines' relationships [default]") - parser.add_option("-n", "--no-defines", - action="store_false", default=True, dest="draw_defines", - help="do not add edges for 'defines' relationships") - parser.add_option("-u", "--uses", - action="store_true", default=True, dest="draw_uses", - help="add edges for 'uses' relationships [default]") - parser.add_option("-N", "--no-uses", - action="store_false", default=True, dest="draw_uses", - help="do not add edges for 'uses' relationships") - parser.add_option("-c", "--colored", - action="store_true", default=False, dest="colored", - help="color nodes according to namespace [dot only]") - parser.add_option("-G", "--grouped-alt", - action="store_true", default=False, dest="grouped_alt", - help="suggest grouping by adding invisible defines edges [only useful with --no-defines]") - parser.add_option("-g", "--grouped", - action="store_true", default=False, dest="grouped", - help="group nodes (create subgraphs) according to namespace [dot only]") - parser.add_option("-e", "--nested-groups", - action="store_true", default=False, dest="nested_groups", - help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]") - parser.add_option("--dot-rankdir", default="TB", dest="rankdir", - help=( - "specifies the dot graph 'rankdir' property for " - "controlling the direction of the graph. " - "Allowed values: ['TB', 'LR', 'BT', 'RL']. " - "[dot only]")) - parser.add_option("-a", "--annotated", - action="store_true", default=False, dest="annotated", - help="annotate with module and source line number") - options, args = parser.parse_args() - filenames = [fn2 for fn in args for fn2 in glob(fn)] - if len(args) == 0: - parser.error('Need one or more filenames to process') +def main(cli_args=None): + usage = """%(prog)s FILENAME... [--dot|--tgf|--yed|--svg|--html]""" + desc = ( + "Analyse one or more Python source files and generate an" + "approximate call graph of the modules, classes and functions" + " within them." + ) - if options.nested_groups: - options.grouped = True + parser = ArgumentParser(usage=usage, description=desc) + + parser.add_argument("--dot", action="store_true", default=False, help="output in GraphViz dot format") + + parser.add_argument("--tgf", action="store_true", default=False, help="output in Trivial Graph Format") + + parser.add_argument("--svg", action="store_true", default=False, help="output in SVG Format") + + parser.add_argument("--html", action="store_true", default=False, help="output in HTML Format") + + parser.add_argument("--yed", action="store_true", default=False, help="output in yEd GraphML Format") + + parser.add_argument("--file", dest="filename", help="write graph to FILE", metavar="FILE", default=None) + + parser.add_argument("--namespace", dest="namespace", help="filter for NAMESPACE", metavar="NAMESPACE", default=None) + + parser.add_argument("--function", dest="function", help="filter for FUNCTION", metavar="FUNCTION", default=None) + + parser.add_argument("-l", "--log", dest="logname", help="write log to LOG", metavar="LOG") + + parser.add_argument("-v", "--verbose", action="store_true", default=False, dest="verbose", help="verbose output") + + parser.add_argument( + "-V", + "--very-verbose", + action="store_true", + default=False, + dest="very_verbose", + help="even more verbose output (mainly for debug)", + ) + + parser.add_argument( + "-d", + "--defines", + action="store_true", + dest="draw_defines", + help="add edges for 'defines' relationships [default]", + ) + + parser.add_argument( + "-n", + "--no-defines", + action="store_false", + default=True, + dest="draw_defines", + help="do not add edges for 'defines' relationships", + ) + + parser.add_argument( + "-u", + "--uses", + action="store_true", + default=True, + dest="draw_uses", + help="add edges for 'uses' relationships [default]", + ) + + parser.add_argument( + "-N", + "--no-uses", + action="store_false", + default=True, + dest="draw_uses", + help="do not add edges for 'uses' relationships", + ) + + parser.add_argument( + "-c", + "--colored", + action="store_true", + default=False, + dest="colored", + help="color nodes according to namespace [dot only]", + ) + + parser.add_argument( + "-G", + "--grouped-alt", + action="store_true", + default=False, + dest="grouped_alt", + help="suggest grouping by adding invisible defines edges [only useful with --no-defines]", + ) + + parser.add_argument( + "-g", + "--grouped", + action="store_true", + default=False, + dest="grouped", + help="group nodes (create subgraphs) according to namespace [dot only]", + ) + + parser.add_argument( + "-e", + "--nested-groups", + action="store_true", + default=False, + dest="nested_groups", + help="create nested groups (subgraphs) for nested namespaces (implies -g) [dot only]", + ) + + parser.add_argument( + "--dot-rankdir", + default="TB", + dest="rankdir", + help=( + "specifies the dot graph 'rankdir' property for " + "controlling the direction of the graph. " + "Allowed values: ['TB', 'LR', 'BT', 'RL']. " + "[dot only]" + ), + ) + + parser.add_argument( + "-a", + "--annotated", + action="store_true", + default=False, + dest="annotated", + help="annotate with module and source line number", + ) + + parser.add_argument( + "--root", + default=None, + dest="root", + help="Package root directory. Is inferred by default.", + ) + + known_args, unknown_args = parser.parse_known_args(cli_args) + + filenames = [fn2 for fn in unknown_args for fn2 in glob(fn, recursive=True)] + + # determine root + if known_args.root is not None: + root = os.path.abspath(known_args.root) + else: + root = None + + if len(unknown_args) == 0: + parser.error("Need one or more filenames to process") + elif len(filenames) == 0: + parser.error("No files found matching given glob: %s" % " ".join(unknown_args)) + + if known_args.nested_groups: + known_args.grouped = True graph_options = { - 'draw_defines': options.draw_defines, - 'draw_uses': options.draw_uses, - 'colored': options.colored, - 'grouped_alt' : options.grouped_alt, - 'grouped': options.grouped, - 'nested_groups': options.nested_groups, - 'annotated': options.annotated} + "draw_defines": known_args.draw_defines, + "draw_uses": known_args.draw_uses, + "colored": known_args.colored, + "grouped_alt": known_args.grouped_alt, + "grouped": known_args.grouped, + "nested_groups": known_args.nested_groups, + "annotated": known_args.annotated, + } # TODO: use an int argument for verbosity logger = logging.getLogger(__name__) - if options.very_verbose: + + if known_args.very_verbose: logger.setLevel(logging.DEBUG) - elif options.verbose: + + elif known_args.verbose: logger.setLevel(logging.INFO) + else: logger.setLevel(logging.WARN) + logger.addHandler(logging.StreamHandler()) - if options.logname: - handler = logging.FileHandler(options.logname) + + if known_args.logname: + handler = logging.FileHandler(known_args.logname) logger.addHandler(handler) - v = CallGraphVisitor(filenames, logger) + v = CallGraphVisitor(filenames, logger, root=root) + + if known_args.function or known_args.namespace: + + if known_args.function: + function_name = known_args.function.split(".")[-1] + namespace = ".".join(known_args.function.split(".")[:-1]) + node = v.get_node(namespace, function_name) + + else: + node = None + + v.filter(node=node, namespace=known_args.namespace) + graph = VisualGraph.from_visitor(v, options=graph_options, logger=logger) - if options.dot: - writer = DotWriter( - graph, - options=['rankdir='+options.rankdir], - output=options.filename, - logger=logger) - writer.run() + writer = None - if options.tgf: - writer = TgfWriter( - graph, output=options.filename, logger=logger) - writer.run() + if known_args.dot: + writer = DotWriter(graph, options=["rankdir=" + known_args.rankdir], output=known_args.filename, logger=logger) - if options.yed: - writer = YedWriter( - graph, output=options.filename, logger=logger) + if known_args.html: + writer = HTMLWriter(graph, options=["rankdir=" + known_args.rankdir], output=known_args.filename, logger=logger) + + if known_args.svg: + writer = SVGWriter(graph, options=["rankdir=" + known_args.rankdir], output=known_args.filename, logger=logger) + + if known_args.tgf: + writer = TgfWriter(graph, output=known_args.filename, logger=logger) + + if known_args.yed: + writer = YedWriter(graph, output=known_args.filename, logger=logger) + + if writer: writer.run() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/pyan/node.py b/pyan/node.py index cad6982..49c8c5b 100644 --- a/pyan/node.py +++ b/pyan/node.py @@ -1,34 +1,38 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- + """Abstract node representing data gathered from the analysis.""" from enum import Enum + def make_safe_label(label): """Avoid name clashes with GraphViz reserved words such as 'graph'.""" - unsafe_words = ("digraph", "graph", "cluster", "subgraph") + unsafe_words = ("digraph", "graph", "cluster", "subgraph", "node") out = label for word in unsafe_words: out = out.replace(word, "%sX" % word) - return out.replace('.', '__').replace('*', '') + return out.replace(".", "__").replace("*", "") + class Flavor(Enum): """Flavor describes the kind of object a node represents.""" - UNSPECIFIED = "---" # as it says on the tin - UNKNOWN = "???" # not determined by analysis (wildcard) - NAMESPACE = "namespace" # node representing a namespace - ATTRIBUTE = "attribute" # attr of something, but not known if class or func. + UNSPECIFIED = "---" # as it says on the tin + UNKNOWN = "???" # not determined by analysis (wildcard) - IMPORTEDITEM = "import" # imported item of unanalyzed type + NAMESPACE = "namespace" # node representing a namespace + ATTRIBUTE = "attribute" # attr of something, but not known if class or func. - MODULE = "module" - CLASS = "class" - FUNCTION = "function" - METHOD = "method" # instance method + IMPORTEDITEM = "import" # imported item of unanalyzed type + + MODULE = "module" + CLASS = "class" + FUNCTION = "function" + METHOD = "method" # instance method STATICMETHOD = "staticmethod" - CLASSMETHOD = "classmethod" - NAME = "name" # Python name (e.g. "x" in "x = 42") + CLASSMETHOD = "classmethod" + NAME = "name" # Python name (e.g. "x" in "x = 42") # Flavors have a partial ordering in specificness of the information. # @@ -50,6 +54,7 @@ class Flavor(Enum): def __repr__(self): return self.value + class Node: """A node is an object in the call graph. @@ -96,7 +101,7 @@ class Node: Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: return self.name @@ -104,7 +109,7 @@ class Node: """Return the short name, plus module and line number of definition site, if available. Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: if self.get_level() >= 1 and self.ast_node is not None: return "%s\\n(%s:%d)" % (self.name, self.filename, self.ast_node.lineno) @@ -115,11 +120,17 @@ class Node: """Return the short name, plus namespace, and module and line number of definition site, if available. Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: if self.get_level() >= 1: if self.ast_node is not None: - return "%s\\n\\n(%s:%d,\\n%s in %s)" % (self.name, self.filename, self.ast_node.lineno, repr(self.flavor), self.namespace) + return "%s\\n\\n(%s:%d,\\n%s in %s)" % ( + self.name, + self.filename, + self.ast_node.lineno, + repr(self.flavor), + self.namespace, + ) else: return "%s\\n\\n(%s in %s)" % (self.name, repr(self.flavor), self.namespace) else: @@ -128,12 +139,12 @@ class Node: def get_name(self): """Return the full name of this node.""" - if self.namespace == '': + if self.namespace == "": return self.name elif self.namespace is None: - return '*.' + self.name + return "*." + self.name else: - return self.namespace + '.' + self.name + return self.namespace + "." + self.name def get_level(self): """Return the level of this node (in terms of nested namespaces). @@ -145,7 +156,7 @@ class Node: if self.namespace == "": return 0 else: - return 1 + self.namespace.count('.') + return 1 + self.namespace.count(".") def get_toplevel_namespace(self): """Return the name of the top-level namespace of this node, or "" if none.""" @@ -154,7 +165,7 @@ class Node: if self.namespace is None: # group all unknowns in one namespace, "*" return "*" - idx = self.namespace.find('.') + idx = self.namespace.find(".") if idx > -1: return self.namespace[0:idx] else: @@ -175,4 +186,4 @@ class Node: return make_safe_label(self.namespace) def __repr__(self): - return '' % (repr(self.flavor), self.get_name()) + return "" % (repr(self.flavor), self.get_name()) diff --git a/pyan/sphinx.py b/pyan/sphinx.py new file mode 100644 index 0000000..72c45ff --- /dev/null +++ b/pyan/sphinx.py @@ -0,0 +1,171 @@ +""" +Simple sphinx extension that allows including callgraphs in documentation. + +Example usage: + +``` +.. callgraph:: + + +Options are + +- **:no-groups:** (boolean flag): do not group +- **:no-defines:** (boolean flag): if to not draw edges that show which + functions, methods and classes are defined by a class or module +- **:no-uses:** (boolean flag): if to not draw edges that show how a function + uses other functions +- **:no-colors:** (boolean flag): if to not color in callgraph (default is + coloring) +- **:nested-grops:** (boolean flag): if to group by modules and submodules +- **:annotated:** (boolean flag): annotate callgraph with file names +- **:direction:** (string): "horizontal" or "vertical" callgraph +- **:toctree:** (string): path to toctree (as used with autosummary) to link + elements of callgraph to documentation (makes all nodes clickable) +- **:zoomable:** (boolean flag): enables users to zoom and pan callgraph +``` +""" +import re +from typing import Any + +from docutils.parsers.rst import directives +from sphinx.ext.graphviz import align_spec, figure_wrapper, graphviz +from sphinx.util.docutils import SphinxDirective + +from pyan import create_callgraph + + +def direction_spec(argument: Any) -> str: + return directives.choice(argument, ("vertical", "horizontal")) + + +class CallgraphDirective(SphinxDirective): + + # this enables content in the directive + has_content = True + + option_spec = { + # graphviz + "alt": directives.unchanged, + "align": align_spec, + "caption": directives.unchanged, + "name": directives.unchanged, + "class": directives.class_option, + # pyan + "no-groups": directives.unchanged, + "no-defines": directives.unchanged, + "no-uses": directives.unchanged, + "no-colors": directives.unchanged, + "nested-groups": directives.unchanged, + "annotated": directives.unchanged, + "direction": direction_spec, + "toctree": directives.unchanged, + "zoomable": directives.unchanged, + } + + def run(self): + func_name = self.content[0] + base_name = func_name.split(".")[0] + if len(func_name.split(".")) == 1: + func_name = None + base_path = __import__(base_name).__path__[0] + + direction = "vertical" + if "direction" in self.options: + direction = self.options["direction"] + dotcode = create_callgraph( + filenames=f"{base_path}/**/*.py", + root=base_path, + function=func_name, + namespace=base_name, + format="dot", + grouped="no-groups" not in self.options, + draw_uses="no-uses" not in self.options, + draw_defines="no-defines" not in self.options, + nested_groups="nested-groups" in self.options, + colored="no-colors" not in self.options, + annotated="annotated" in self.options, + rankdir={"horizontal": "LR", "vertical": "TB"}[direction], + ) + node = graphviz() + + # insert link targets into groups: first insert link, then reformat link + if "toctree" in self.options: + path = self.options["toctree"].strip("/") + # create raw link + dotcode = re.sub( + r'([\w\d]+)(\s.+), (style="filled")', + r'\1\2, href="../' + path + r'/\1.html", target="_blank", \3', + dotcode, + ) + + def create_link(dot_name): + raw_link = re.sub(r"__(\w)", r".\1", dot_name) + # determine if name this is a class by checking if its first letter is capital + # (heuristic but should work almost always) + splits = raw_link.rsplit(".", 2) + if len(splits) > 1 and splits[-2][0].capitalize() == splits[-2][0]: + # is class + link = ".".join(splits[:-1]) + ".html#" + raw_link + '"' + else: + link = raw_link + '.html"' + return link + + dotcode = re.sub( + r'(href="../' + path + r'/)(\w+)(\.html")', + lambda m: m.groups()[0] + create_link(m.groups()[1]), + dotcode, + ) + + node["code"] = dotcode + node["options"] = {"docname": self.env.docname} + if "graphviz_dot" in self.options: + node["options"]["graphviz_dot"] = self.options["graphviz_dot"] + if "layout" in self.options: + node["options"]["graphviz_dot"] = self.options["layout"] + if "alt" in self.options: + node["alt"] = self.options["alt"] + if "align" in self.options: + node["align"] = self.options["align"] + + if "class" in self.options: + classes = self.options["class"] + else: + classes = [] + if "zoomable" in self.options: + if len(classes) == 0: + classes = ["zoomable-callgraph"] + else: + classes.append("zoomable-callgraph") + if len(classes) > 0: + node["classes"] = classes + + if "caption" not in self.options: + self.add_name(node) + return [node] + else: + figure = figure_wrapper(self, node, self.options["caption"]) + self.add_name(figure) + return [figure] + + +def setup(app): + + app.add_directive("callgraph", CallgraphDirective) + app.add_js_file("https://cdn.jsdelivr.net/npm/svg-pan-zoom@3.6.1/dist/svg-pan-zoom.min.js") + + # script to find zoomable svgs + script = """ + window.addEventListener('load', () => { + Array.from(document.getElementsByClassName('zoomable-callgraph')).forEach(function(element) { + svgPanZoom(element); + }); + }) + """ + + app.add_js_file(None, body=script) + + return { + "version": "0.1", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/pyan/visgraph.py b/pyan/visgraph.py index 5b27086..748e67a 100644 --- a/pyan/visgraph.py +++ b/pyan/visgraph.py @@ -2,24 +2,28 @@ # -*- coding: utf-8 -*- """Format-agnostic representation of the output graph.""" -import re -import logging import colorsys +import logging +import re + -# Set node color by filename. -# -# HSL: hue = top-level namespace, lightness = nesting level, saturation constant. -# -# The "" namespace (for *.py files) gets the first color. Since its -# level is 0, its lightness will be 1.0, i.e. pure white regardless -# of the hue. -# class Colorizer: + """Output graph color manager. + + We set node color by filename. + + HSL: hue = top-level namespace, lightness = nesting level, saturation constant. + + The "" namespace (for *.py files) gets the first color. Since its + level is 0, its lightness will be 1.0, i.e. pure white regardless + of the hue. + """ + def __init__(self, num_colors, colored=True, logger=None): self.logger = logger or logging.getLogger(__name__) self.colored = colored - self._hues = [j/num_colors for j in range(num_colors)] + self._hues = [j / num_colors for j in range(num_colors)] self._idx_of = {} # top-level namespace: hue index self._idx = 0 @@ -40,42 +44,41 @@ class Colorizer: def get(self, node): # return (group number, hue index) idx = self._node_to_idx(node) - return (idx,self._hues[idx]) + return (idx, self._hues[idx]) def make_colors(self, node): # return (group number, fill color, text color) if self.colored: - idx,H = self.get(node) - L = max( [1.0 - 0.1*node.get_level(), 0.1] ) + idx, H = self.get(node) + L = max([1.0 - 0.1 * node.get_level(), 0.1]) S = 1.0 A = 0.7 # make nodes translucent (to handle possible overlaps) - fill_RGBA = self.htmlize_rgb(*colorsys.hls_to_rgb(H,L,S), A=A) + fill_RGBA = self.htmlize_rgb(*colorsys.hls_to_rgb(H, L, S), A=A) # black text on light nodes, white text on (very) dark nodes. text_RGB = "#000000" if L >= 0.5 else "#ffffff" else: - idx,_ = self.get(node) + idx, _ = self.get(node) fill_RGBA = self.htmlize_rgb(1.0, 1.0, 1.0, 0.7) text_RGB = "#000000" return idx, fill_RGBA, text_RGB @staticmethod - def htmlize_rgb(R,G,B,A=None): + def htmlize_rgb(R, G, B, A=None): if A is not None: - R,G,B,A = [int(255.0*x) for x in (R,G,B,A)] - return "#%02x%02x%02x%02x" % (R,G,B,A) + R, G, B, A = [int(255.0 * x) for x in (R, G, B, A)] + return "#%02x%02x%02x%02x" % (R, G, B, A) else: - R,G,B = [int(255.0*x) for x in (R,G,B)] - return "#%02x%02x%02x" % (R,G,B) + R, G, B = [int(255.0 * x) for x in (R, G, B)] + return "#%02x%02x%02x" % (R, G, B) class VisualNode(object): """ A node in the output graph: colors, internal ID, human-readable label, ... """ - def __init__( - self, id, label='', flavor='', - fill_color='', text_color='', group=''): - self.id = id # graphing software friendly label (no special chars) + + def __init__(self, id, label="", flavor="", fill_color="", text_color="", group=""): + self.id = id # graphing software friendly label (no special chars) self.label = label # human-friendly label self.flavor = flavor self.fill_color = fill_color @@ -83,15 +86,11 @@ class VisualNode(object): self.group = group def __repr__(self): - optionals = [ - repr(s) for s in [ - self.label, self.flavor, - self.fill_color, self.text_color, self.group] if s] + optionals = [repr(s) for s in [self.label, self.flavor, self.fill_color, self.text_color, self.group] if s] if optionals: - return ('VisualNode(' + repr(self.id) + - ', ' + ', '.join(optionals)+')') + return "VisualNode(" + repr(self.id) + ", " + ", ".join(optionals) + ")" else: - return 'VisualNode(' + repr(self.id) + ')' + return "VisualNode(" + repr(self.id) + ")" class VisualEdge(object): @@ -100,22 +99,19 @@ class VisualEdge(object): flavor is meant to be 'uses' or 'defines' """ + def __init__(self, source, target, flavor, color): self.source = source self.target = target self.flavor = flavor - self.color = color + self.color = color def __repr__(self): - return ( - 'Edge('+self.source.label+' '+self.flavor+' ' + - self.target.label+')') + return "Edge(" + self.source.label + " " + self.flavor + " " + self.target.label + ")" class VisualGraph(object): - def __init__( - self, id, label, nodes=None, edges=None, subgraphs=None, - grouped=False): + def __init__(self, id, label, nodes=None, edges=None, subgraphs=None, grouped=False): self.id = id self.label = label self.nodes = nodes or [] @@ -125,13 +121,13 @@ class VisualGraph(object): @classmethod def from_visitor(cls, visitor, options=None, logger=None): - colored = options.get('colored', False) - nested = options.get('nested_groups', False) - grouped_alt = options.get('grouped_alt', False) - grouped = nested or options.get('grouped', False) # nested -> grouped - annotated = options.get('annotated', False) - draw_defines = options.get('draw_defines', False) - draw_uses = options.get('draw_uses', False) + colored = options.get("colored", False) + nested = options.get("nested_groups", False) + grouped_alt = options.get("grouped_alt", False) + grouped = nested or options.get("grouped", False) # nested -> grouped + annotated = options.get("annotated", False) + draw_defines = options.get("draw_defines", False) + draw_uses = options.get("draw_uses", False) # Terminology: # - what Node calls "label" is a computer-friendly unique identifier @@ -143,12 +139,18 @@ class VisualGraph(object): if annotated: if grouped: # group label includes namespace already - labeler = lambda n: n.get_annotated_name() + def labeler(n): + return n.get_annotated_name() + else: # the node label is the only place to put the namespace info - labeler = lambda n: n.get_long_annotated_name() + def labeler(n): + return n.get_long_annotated_name() + else: - labeler = lambda n: n.get_short_name() + + def labeler(n): + return n.get_short_name() logger = logger or logging.getLogger(__name__) @@ -165,33 +167,36 @@ class VisualGraph(object): for node in visited_nodes: filenames.add(node.filename) return filenames - colorizer = Colorizer(num_colors=len(find_filenames())+1, - colored=colored, logger=logger) + + colorizer = Colorizer(num_colors=len(find_filenames()) + 1, colored=colored, logger=logger) nodes_dict = dict() - root_graph = cls('G', label='', grouped=grouped) + root_graph = cls("G", label="", grouped=grouped) subgraph = root_graph namespace_stack = [] - prev_namespace = '' # The namespace '' is first in visited_nodes. + prev_namespace = "" # The namespace '' is first in visited_nodes. for node in visited_nodes: - logger.info('Looking at %s' % node.name) + logger.info("Looking at %s" % node.name) # Create the node itself and add it to nodes_dict idx, fill_RGBA, text_RGB = colorizer.make_colors(node) visual_node = VisualNode( - id=node.get_label(), - label=labeler(node), - flavor=repr(node.flavor), - fill_color=fill_RGBA, - text_color=text_RGB, - group=idx) + id=node.get_label(), + label=labeler(node), + flavor=repr(node.flavor), + fill_color=fill_RGBA, + text_color=text_RGB, + group=idx, + ) nodes_dict[node] = visual_node # next namespace? if grouped and node.namespace != prev_namespace: - logger.info( - 'New namespace %s, old was %s' - % (node.namespace, prev_namespace)) + if not prev_namespace: + logger.info("New namespace %s" % (node.namespace)) + else: + logger.info("New namespace %s, old was %s" % (node.namespace, prev_namespace)) + prev_namespace = node.namespace label = node.get_namespace_label() subgraph = cls(label, node.namespace) @@ -204,14 +209,11 @@ class VisualGraph(object): m = re.match(namespace_stack[-1].label, node.namespace) # The '.' check catches siblings in cases like # MeshGenerator vs. Mesh. - while (m is None or - m.end() == len(node.namespace) or - node.namespace[m.end()] != '.'): + while m is None or m.end() == len(node.namespace) or node.namespace[m.end()] != ".": namespace_stack.pop() if not len(namespace_stack): break - m = re.match( - namespace_stack[-1].label, node.namespace) + m = re.match(namespace_stack[-1].label, node.namespace) parentgraph = namespace_stack[-1] if len(namespace_stack) else root_graph parentgraph.subgraphs.append(subgraph) @@ -231,17 +233,12 @@ class VisualGraph(object): # place closer together those nodes that are linked by a # defines relationship. # - color = "#838b8b" if draw_defines else '#ffffff00' + color = "#838b8b" if draw_defines else "#ffffff00" for n in visitor.defines_edges: if n.defined: for n2 in visitor.defines_edges[n]: if n2.defined: - root_graph.edges.append( - VisualEdge( - nodes_dict[n], - nodes_dict[n2], - 'defines', - color)) + root_graph.edges.append(VisualEdge(nodes_dict[n], nodes_dict[n2], "defines", color)) if draw_uses: color = "#000000" @@ -249,11 +246,6 @@ class VisualGraph(object): if n.defined: for n2 in visitor.uses_edges[n]: if n2.defined: - root_graph.edges.append( - VisualEdge( - nodes_dict[n], - nodes_dict[n2], - 'uses', - color)) + root_graph.edges.append(VisualEdge(nodes_dict[n], nodes_dict[n2], "uses", color)) return root_graph diff --git a/pyan/writers.py b/pyan/writers.py index 94ce89f..9531cc9 100644 --- a/pyan/writers.py +++ b/pyan/writers.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- + """Graph markup writers.""" -import sys +import io import logging +import os +import subprocess +import sys + +from jinja2 import Template class Writer(object): @@ -12,7 +18,7 @@ class Writer(object): self.output = output self.logger = logger or logging.getLogger(__name__) self.indent_level = 0 - self.tabstop = tabstop*' ' + self.tabstop = tabstop * " " def log(self, msg): self.logger.info(msg) @@ -24,19 +30,22 @@ class Writer(object): self.indent_level -= level def write(self, line): - self.outstream.write(self.tabstop*self.indent_level+line+'\n') + self.outstream.write(self.tabstop * self.indent_level + line + "\n") def run(self): - self.log('%s running' % type(self)) + self.log("%s running" % type(self)) try: - self.outstream = open(self.output, 'w') + if isinstance(self.output, io.StringIO): # write to stream + self.outstream = self.output + else: + self.outstream = open(self.output, "w") # write to file except TypeError: self.outstream = sys.stdout self.start_graph() self.write_subgraph(self.graph) self.write_edges() self.finish_graph() - if self.output: + if self.output and not isinstance(self.output, io.StringIO): self.outstream.close() def write_subgraph(self, graph): @@ -80,102 +89,126 @@ class Writer(object): class TgfWriter(Writer): def __init__(self, graph, output=None, logger=None): - Writer.__init__( - self, graph, - output=output, - logger=logger) + Writer.__init__(self, graph, output=output, logger=logger) self.i = 1 self.id_map = {} def write_node(self, node): - self.write('%d %s' % (self.i, node.label)) + self.write("%d %s" % (self.i, node.label)) self.id_map[node] = self.i self.i += 1 def start_edges(self): - self.write('#') + self.write("#") def write_edge(self, edge): - flavor = 'U' if edge.flavor == 'uses' else 'D' - self.write( - '%s %s %s' % - (self.id_map[edge.source], self.id_map[edge.target], flavor)) + flavor = "U" if edge.flavor == "uses" else "D" + self.write("%s %s %s" % (self.id_map[edge.source], self.id_map[edge.target], flavor)) class DotWriter(Writer): - def __init__(self, graph, - options=None, output=None, logger=None, tabstop=4): - Writer.__init__( - self, graph, - output=output, - logger=logger, - tabstop=tabstop) + def __init__(self, graph, options=None, output=None, logger=None, tabstop=4): + Writer.__init__(self, graph, output=output, logger=logger, tabstop=tabstop) options = options or [] if graph.grouped: options += ['clusterrank="local"'] - self.options = ', '.join(options) + self.options = ", ".join(options) self.grouped = graph.grouped def start_graph(self): - self.write('digraph G {') - self.write(' graph [' + self.options + '];') + self.write("digraph G {") + self.write(" graph [" + self.options + "];") self.indent() def start_subgraph(self, graph): - self.log('Start subgraph %s' % graph.label) + self.log("Start subgraph %s" % graph.label) # Name must begin with "cluster" to be recognized as a cluster by GraphViz. - self.write( - "subgraph cluster_%s {\n" % graph.id) + self.write("subgraph cluster_%s {\n" % graph.id) self.indent() # translucent gray (no hue to avoid visual confusion with any # group of colored nodes) - self.write( - 'graph [style="filled,rounded",' - 'fillcolor="#80808018", label="%s"];' - % graph.label) + self.write('graph [style="filled,rounded", fillcolor="#80808018", label="%s"];' % graph.label) def finish_subgraph(self, graph): - self.log('Finish subgraph %s' % graph.label) + self.log("Finish subgraph %s" % graph.label) # terminate previous subgraph self.dedent() - self.write('}') + self.write("}") def write_node(self, node): - self.log('Write node %s' % node.label) + self.log("Write node %s" % node.label) self.write( '%s [label="%s", style="filled", fillcolor="%s",' - ' fontcolor="%s", group="%s"];' - % ( - node.id, node.label, - node.fill_color, node.text_color, node.group)) + ' fontcolor="%s", group="%s"];' % (node.id, node.label, node.fill_color, node.text_color, node.group) + ) def write_edge(self, edge): source = edge.source target = edge.target - color = edge.color - if edge.flavor == 'defines': - self.write( - ' %s -> %s [style="dashed",' - ' color="%s"];' - % (source.id, target.id, color)) - else: # edge.flavor == 'uses': - self.write( - ' %s -> %s [style="solid",' - ' color="%s"];' - % (source.id, target.id, color)) + color = edge.color + if edge.flavor == "defines": + self.write(' %s -> %s [style="dashed", color="%s"];' % (source.id, target.id, color)) + else: # edge.flavor == 'uses': + self.write(' %s -> %s [style="solid", color="%s"];' % (source.id, target.id, color)) def finish_graph(self): - self.write('}') # terminate "digraph G {" + self.write("}") # terminate "digraph G {" + + +class SVGWriter(DotWriter): + def run(self): + # write dot file + self.log("%s running" % type(self)) + self.outstream = io.StringIO() + self.start_graph() + self.write_subgraph(self.graph) + self.write_edges() + self.finish_graph() + + # convert to svg + svg = subprocess.run( + "dot -Tsvg", shell=True, stdout=subprocess.PIPE, input=self.outstream.getvalue().encode() + ).stdout.decode() + + if self.output: + if isinstance(self.output, io.StringIO): + self.output.write(svg) + else: + with open(self.output, "w") as f: + f.write(svg) + else: + print(svg) + + +class HTMLWriter(SVGWriter): + def run(self): + with io.StringIO() as svg_stream: + # run SVGWriter with stream as output + output = self.output + self.output = svg_stream + super().run() + svg = svg_stream.getvalue() + self.output = output + + # insert svg into html + with open(os.path.join(os.path.dirname(__file__), "callgraph.html"), "r") as f: + template = Template(f.read()) + + html = template.render(svg=svg) + if self.output: + if isinstance(self.output, io.StringIO): + self.output.write(html) + else: + with open(self.output, "w") as f: + f.write(html) + else: + print(html) class YedWriter(Writer): def __init__(self, graph, output=None, logger=None, tabstop=2): - Writer.__init__( - self, graph, - output=output, - logger=logger, - tabstop=tabstop) + Writer.__init__(self, graph, output=output, logger=logger, tabstop=tabstop) self.grouped = graph.grouped self.indent_level = 0 self.edge_id = 0 @@ -183,18 +216,19 @@ class YedWriter(Writer): def start_graph(self): self.write('') self.write( - '') + '' + ) self.indent() self.write('') self.write('') @@ -202,96 +236,87 @@ class YedWriter(Writer): self.indent() def start_subgraph(self, graph): - self.log('Start subgraph %s' % graph.label) + self.log("Start subgraph %s" % graph.label) self.write('' % graph.id) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('') - self.write('%s' - % graph.label) + self.write( + '%s' % graph.label + ) self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.write('' % graph.id) self.indent() def finish_subgraph(self, graph): - self.log('Finish subgraph %s' % graph.label) + self.log("Finish subgraph %s" % graph.label) self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def write_node(self, node): - self.log('Write node %s' % node.label) - width = 20 + 10*len(node.label) + self.log("Write node %s" % node.label) + width = 20 + 10 * len(node.label) self.write('' % node.id) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('' % ("30", width)) - self.write('' - % node.fill_color) - self.write('') - self.write('%s' - % node.label) + self.write('' % node.fill_color) + self.write('') + self.write("%s" % node.label) self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def write_edge(self, edge): self.edge_id += 1 source = edge.source target = edge.target - self.write( - '' - % (self.edge_id, source.id, target.id)) + self.write('' % (self.edge_id, source.id, target.id)) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() - if edge.flavor == 'defines': - self.write('' - % edge.color) + if edge.flavor == "defines": + self.write('' % edge.color) else: - self.write('' - % edge.color) + self.write('' % edge.color) self.write('') self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def finish_graph(self): self.dedent(2) - self.write(' ') + self.write(" ") self.dedent() - self.write('') + self.write("") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5c2e34e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[tool.isort] +profile = "black" +honor_noqa = true +line_length = 120 +combine_as_imports = true +force_sort_within_sections = true +known_first_party = "pyan" + +[tool.black] +line-length = 120 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | egg-info + | buck-out + | build + | dist + | env +)/ +''' diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..2c6c8ef --- /dev/null +++ b/pytest.ini @@ -0,0 +1,15 @@ +[pytest] +addopts = + -rsxX + -vv + + --cov-config=.coveragerc + --cov=pyan + --cov-report=html + --cov-report=term-missing:skip-covered + --no-cov-on-fail +testpaths = tests/ +log_cli_level = ERROR +log_format = %(asctime)s %(levelname)s %(message)s +log_date_format = %Y-%m-%d %H:%M:%S +cache_dir = .cache diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..42e6fb3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +coverage>=5.3 +pytest>=6.1.2 +pytest-cov>=2.10.1 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3392002 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[flake8] +max-line-length = 120 +show-source = true +ignore = + E203, # space before : (needed for how black formats slicing) + W503, # line break before binary operator + W504, # line break after binary operator + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + E741, # ignore not easy to read variables like i l I etc. + C406, # Unnecessary list literal - rewrite as a dict literal. + C408, # Unnecessary dict call - rewrite as a literal. + C409, # Unnecessary list passed to tuple() - rewrite as a tuple literal. + S001, # found modulo formatter (incorrect picks up mod operations) + F401 # unused imports + W605 # invalid escape sequence (e.g. for LaTeX) +exclude = docs/build/*.py, + node_modules/*.py, + .eggs/*.py, + versioneer.py, + venv/*, + .venv/*, + .git/* + .history/* diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e741070 --- /dev/null +++ b/setup.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +"""setuptools-based setup.py for pyan3. + +Tested on Python 3.6. + +Usage as usual with setuptools: + python3 setup.py build + python3 setup.py sdist + python3 setup.py bdist_wheel --universal + python3 setup.py install + +For details, see + http://setuptools.readthedocs.io/en/latest/setuptools.html#command-reference +or + python3 setup.py --help + python3 setup.py --help-commands + python3 setup.py --help bdist_wheel # or any command +""" + +import ast +import os + +from setuptools import setup + +######################################################### +# General config +######################################################### + +# Short description for package list on PyPI +# +SHORTDESC = "Offline call graph generator for Python 3" + +# Long description for package homepage on PyPI +# +DESC = ( + "Generate approximate call graphs for Python programs.\n" + "\n" + "Pyan takes one or more Python source files, performs a " + "(rather superficial) static analysis, and constructs a directed graph of " + "the objects in the combined source, and how they define or " + "use each other. The graph can be output for rendering by GraphViz or yEd." +) + +######################################################### +# Init +######################################################### + +# Extract __version__ from the package __init__.py +# (since it's not a good idea to actually run __init__.py during the +# build process). +# +# https://stackoverflow.com/q/2058802/1959808 +# +init_py_path = os.path.join("pyan", "__init__.py") +version = None +try: + with open(init_py_path) as f: + for line in f: + if line.startswith("__version__"): + module = ast.parse(line) + expr = module.body[0] + v = expr.value + if type(v) is ast.Constant: + version = v.value + elif type(v) is ast.Str: # TODO: Python 3.8: remove ast.Str + version = v.s + break +except FileNotFoundError: + pass +if not version: + raise RuntimeError(f"Version information not found in {init_py_path}") + +######################################################### +# Call setup() +######################################################### + +setup( + name="pyan3", + version=version, + author="Juha Jeronen", + author_email="juha.m.jeronen@gmail.com", + url="https://github.com/Technologicat/pyan", + description=SHORTDESC, + long_description=DESC, + license="GPL 2.0", + # free-form text field; + # https://stackoverflow.com/q/34994130/1959808 + platforms=["Linux"], + # See + # https://pypi.python.org/pypi?%3Aaction=list_classifiers + # + # for the standard classifiers. + # + classifiers=[ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Topic :: Software Development", + ], + # See + # http://setuptools.readthedocs.io/en/latest/setuptools.html + # + setup_requires=["wheel"], + install_requires=["jinja2"], + provides=["pyan"], + # keywords for PyPI (in case you upload your project) + # + # e.g. the keywords your project uses as topics on GitHub, + # minus "python" (if there) + # + keywords=["call-graph", "static-code-analysis"], + # Declare packages so that python -m setup build will copy .py files + # (especially __init__.py). + # + # This **does not** automatically recurse into subpackages, + # so they must also be declared. + # + packages=["pyan"], + zip_safe=True, + package_data={"pyan": ["callgraph.html"]}, + include_package_data=True, + entry_points={ + "console_scripts": [ + "pyan3 = pyan.main:main", + ] + }, +) diff --git a/tests/old_tests/issue2/pyan_err.py b/tests/old_tests/issue2/pyan_err.py new file mode 100644 index 0000000..0b21eaf --- /dev/null +++ b/tests/old_tests/issue2/pyan_err.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8; -*- +# See issue #2 + +""" +This works fine +a = 3 +b = 4 +print(a + b) +""" + +# But this did not (#2) +a: int = 3 +b = 4 +print(a + b) diff --git a/tests/old_tests/issue2/run.sh b/tests/old_tests/issue2/run.sh new file mode 100644 index 0000000..9a73808 --- /dev/null +++ b/tests/old_tests/issue2/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +pyan pyan_err.py -V >out.dot diff --git a/tests/old_tests/issue3/testi.py b/tests/old_tests/issue3/testi.py new file mode 100644 index 0000000..f4bfd4d --- /dev/null +++ b/tests/old_tests/issue3/testi.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8; -*- +# See issue #3 + + +def f(): + return [x for x in range(10)] + + +def g(): + return [(x, y) for x in range(10) for y in range(10)] + + +def h(results): + return [ + ( + [(name, allargs) for name, _, _, allargs, _ in recs], + {name: inargs for name, inargs, _, _, _ in recs}, + {name: meta for name, _, _, _, meta in recs}, + ) + for recs in (results[key] for key in sorted(results.keys())) + ] diff --git a/tests/old_tests/issue5/meas_xrd.py b/tests/old_tests/issue5/meas_xrd.py new file mode 100644 index 0000000..072bcb8 --- /dev/null +++ b/tests/old_tests/issue5/meas_xrd.py @@ -0,0 +1,28 @@ +import os.path + +import numpy as np +import pandas.io.parsers + + +class MeasXRD: + def __init__(self, path: str): + if not os.path.isfile(path): + raise FileNotFoundError("Invalid XRD file path:", path) + + row_ind = 2 + self.params = {} + with open(path, "r") as file: + line = file.readline() + if line != "[Measurement conditions]\n": + raise ValueError("XRD measurement file does not contain a valid header") + + line = file.readline() + while line not in ["[Scan points]\n", ""]: + row_ind += 1 + columns = line.rstrip("\n").split(",", 1) + self.params[columns[0]] = columns[1] + line = file.readline() + + self.data = pandas.io.parsers.read_csv( + path, skiprows=row_ind, dtype={"Angle": np.float_, "Intensity": np.int_}, engine="c" + ) diff --git a/tests/old_tests/issue5/plot_xrd.py b/tests/old_tests/issue5/plot_xrd.py new file mode 100644 index 0000000..fc2d408 --- /dev/null +++ b/tests/old_tests/issue5/plot_xrd.py @@ -0,0 +1,14 @@ +import plotly.graph_objs as go +import plotly.offline as py + +from . import meas_xrd + + +def plot_xrd(meas: meas_xrd.MeasXRD): + trace = go.Scatter(x=meas.data["Angle"], y=meas.data["Intensity"]) + + layout = go.Layout(title="XRD data", xaxis=dict(title="Angle"), yaxis=dict(title="Intensity", type="log")) + + data = [trace] + fig = go.Figure(data=data, layout=layout) + return py.plot(fig, output_type="div", include_plotlyjs=False) diff --git a/tests/old_tests/issue5/relimport.py b/tests/old_tests/issue5/relimport.py new file mode 100644 index 0000000..1145c25 --- /dev/null +++ b/tests/old_tests/issue5/relimport.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8; -*- +# See issue #5 + +from . import mod1 # noqa +from . import mod1 as moo # noqa +from ..mod3 import bar +from .mod2 import foo diff --git a/tests/old_tests/issue5/run.sh b/tests/old_tests/issue5/run.sh new file mode 100644 index 0000000..6829e6d --- /dev/null +++ b/tests/old_tests/issue5/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +pyan plot_xrd.py --uses --colored --grouped --annotated --dot > test.dot diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py new file mode 100644 index 0000000..f1e1d57 --- /dev/null +++ b/tests/test_analyzer.py @@ -0,0 +1,64 @@ +from glob import glob +import logging +import os + +import pytest + +from pyan.analyzer import CallGraphVisitor + + +@pytest.fixture +def callgraph(): + filenames = glob(os.path.join(os.path.dirname(__file__), "test_code/**/*.py"), recursive=True) + v = CallGraphVisitor(filenames, logger=logging.getLogger()) + return v + + +def get_node(nodes, name): + filtered_nodes = [node for node in nodes if node.get_name() == name] + assert len(filtered_nodes) == 1, f"Node with name {name} should exist" + return filtered_nodes[0] + + +def get_in_dict(node_dict, name): + return node_dict[get_node(node_dict.keys(), name)] + + +def test_resolve_import_as(callgraph): + imports = get_in_dict(callgraph.uses_edges, "test_code.submodule2") + get_node(imports, "test_code.submodule1") + assert len(imports) == 1, "only one effective import" + + imports = get_in_dict(callgraph.uses_edges, "test_code.submodule1") + get_node(imports, "test_code.subpackage1.submodule1.A") + get_node(imports, "test_code.subpackage1") + + +def test_import_relative(callgraph): + imports = get_in_dict(callgraph.uses_edges, "test_code.subpackage1.submodule1") + get_node(imports, "test_code.submodule2.test_2") + + +def test_resolve_use_in_class(callgraph): + uses = get_in_dict(callgraph.uses_edges, "test_code.subpackage1.submodule1.A.__init__") + get_node(uses, "test_code.submodule2.test_2") + + +def test_resolve_use_in_function(callgraph): + uses = get_in_dict(callgraph.uses_edges, "test_code.submodule2.test_2") + get_node(uses, "test_code.submodule1.test_func1") + get_node(uses, "test_code.submodule1.test_func2") + + +def test_resolve_package_without___init__(callgraph): + defines = get_in_dict(callgraph.defines_edges, "test_code.subpackage2.submodule_hidden1") + get_node(defines, "test_code.subpackage2.submodule_hidden1.test_func1") + + +def test_resolve_package_with_known_root(): + dirname = os.path.dirname(__file__) + filenames = glob(os.path.join(dirname, "test_code/**/*.py"), recursive=True) + callgraph = CallGraphVisitor(filenames, logger=logging.getLogger(), root=dirname) + dirname_base = os.path.basename(dirname) + defines = get_in_dict(callgraph.defines_edges, f"{dirname_base}.test_code.subpackage2.submodule_hidden1") + get_node(defines, f"{dirname_base}.test_code.subpackage2.submodule_hidden1.test_func1") diff --git a/tests/test_code/__init__.py b/tests/test_code/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_code/submodule1.py b/tests/test_code/submodule1.py new file mode 100644 index 0000000..d6893a4 --- /dev/null +++ b/tests/test_code/submodule1.py @@ -0,0 +1,21 @@ +from test_code import subpackage1 as subpackage +from test_code.subpackage1 import A + + +def test_func1(a): + return a + + +def test_func2(a): + return a + + +class B: + def __init__(self, k): + self.a = 1 + + def to_A(self): + return A(self) + + def get_a_via_A(self): + return test_func1(self.to_A().b.a) diff --git a/tests/test_code/submodule2.py b/tests/test_code/submodule2.py new file mode 100644 index 0000000..76706b5 --- /dev/null +++ b/tests/test_code/submodule2.py @@ -0,0 +1,9 @@ +import test_code.submodule1 as b + +from . import submodule1 + +A = 32 + + +def test_2(a): + return submodule1.test_func2(a) + A + b.test_func1(a) diff --git a/tests/test_code/subpackage1/__init__.py b/tests/test_code/subpackage1/__init__.py new file mode 100644 index 0000000..d213d49 --- /dev/null +++ b/tests/test_code/subpackage1/__init__.py @@ -0,0 +1,3 @@ +from test_code.subpackage1.submodule1 import A + +__all__ = ["A"] diff --git a/tests/test_code/subpackage1/submodule1.py b/tests/test_code/subpackage1/submodule1.py new file mode 100644 index 0000000..7798ee2 --- /dev/null +++ b/tests/test_code/subpackage1/submodule1.py @@ -0,0 +1,6 @@ +from ..submodule2 import test_2 + + +class A: + def __init__(self, b): + self.b = test_2(b) diff --git a/tests/test_code/subpackage2/submodule_hidden1.py b/tests/test_code/subpackage2/submodule_hidden1.py new file mode 100644 index 0000000..5d2722d --- /dev/null +++ b/tests/test_code/subpackage2/submodule_hidden1.py @@ -0,0 +1,2 @@ +def test_func1(): + pass diff --git a/uploaddist.sh b/uploaddist.sh new file mode 100755 index 0000000..d75f5f7 --- /dev/null +++ b/uploaddist.sh @@ -0,0 +1,3 @@ +#!/bin/bash +VERSION="$1" +twine upload dist/pyan3-${VERSION}.tar.gz dist/pyan3-${VERSION}-py3-none-any.whl diff --git a/visualize_pyan_architecture.sh b/visualize_pyan_architecture.sh index f7471c4..22c6334 100755 --- a/visualize_pyan_architecture.sh +++ b/visualize_pyan_architecture.sh @@ -1,4 +1,4 @@ #!/bin/bash echo -ne "Pyan architecture: generating architecture.{dot,svg}\n" -./pyan.py pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log +python3 -m pyan pyan/*.py --no-defines --uses --colored --annotate --dot -V >architecture.dot 2>architecture.log dot -Tsvg architecture.dot >architecture.svg