Resolve modules even if packages do not contain inits - allow direct specification of root

This commit is contained in:
Jan Beitner
2021-01-14 14:15:00 +00:00
parent 3850dcac90
commit 617d543f26
7 changed files with 60 additions and 18 deletions

View File

@@ -16,6 +16,7 @@ __version__ = "1.1.2"
# TODO: fix code duplication with main.py, should have just one implementation.
def create_callgraph(
filenames: Union[List[str], str] = "**/*.py",
root: str = None,
function: Union[str, None] = None,
namespace: Union[str, None] = None,
format: str = "dot",
@@ -36,6 +37,7 @@ def create_callgraph(
filenames: glob pattern or list of glob patterns
to identify filenames to parse (`**` for multiple directories)
example: **/*.py for all python files
root: path to known root directory at which package root sits. Defaults to None, i.e. it will be infered.
function: if defined, function name to filter for, e.g. "my_module.my_function"
to only include calls that are related to `my_function`
namespace: if defined, namespace to filter for, e.g. "my_module", it is highly
@@ -71,7 +73,7 @@ def create_callgraph(
"annotated": annotated,
}
v = CallGraphVisitor(filenames)
v = CallGraphVisitor(filenames, root=root)
if function or namespace:
if function:
function_name = function.split(".")[-1]

View File

@@ -51,7 +51,7 @@ class CallGraphVisitor(ast.NodeVisitor):
all files. This way use information between objects in different files
can be gathered."""
def __init__(self, filenames, logger=None):
def __init__(self, filenames, root: str = None, logger=None):
self.logger = logger or logging.getLogger(__name__)
# full module names for all given files
@@ -60,6 +60,7 @@ class CallGraphVisitor(ast.NodeVisitor):
mod_name = get_module_name(filename)
self.module_to_filename[mod_name] = filename
self.filenames = filenames
self.root = root
# data gathered from analysis
self.defines_edges = {}
@@ -103,7 +104,7 @@ class CallGraphVisitor(ast.NodeVisitor):
with open(filename, "rt", encoding="utf-8") as f:
content = f.read()
self.filename = filename
self.module_name = get_module_name(filename)
self.module_name = get_module_name(filename, root=self.root)
self.analyze_scopes(content, filename) # add to the currently known scopes
self.visit(ast.parse(content, filename))
self.module_name = None

View File

@@ -20,27 +20,37 @@ def tail(lst):
return []
def get_module_name(filename):
def get_module_name(filename, root: str = None):
"""Try to determine the full module name of a source file, by figuring out
if its directory looks like a package (i.e. has an __init__.py file)."""
if its directory looks like a package (i.e. has an __init__.py file or
there is a .py file in it )."""
if os.path.basename(filename) == "__init__.py":
return get_module_name(os.path.dirname(filename))
# init file means module name is directory name
module_path = os.path.dirname(filename)
else:
# otherwise it is the filename without extension
module_path = filename.replace(".py", "")
init_path = os.path.join(os.path.dirname(filename), "__init__.py")
mod_name = os.path.basename(filename).replace(".py", "")
# find the module root - walk up the tree and check if it contains .py files - if yes. it is the new root
directories = [(module_path, True)]
if root is None:
while directories[0][0] != os.path.dirname(directories[0][0]):
potential_root = os.path.dirname(directories[0][0])
is_root = any([f == "__init__.py" for f in os.listdir(potential_root)])
directories.insert(0, (potential_root, is_root))
if not os.path.exists(init_path):
return mod_name
# keep directories where itself of parent is root
while not directories[0][1]:
directories.pop(0)
# blank path means we're looking at __init__.py, in cwd, so its module name is "__init__"
if not filename:
return "__init__"
else: # root is already known - just walk up until it is matched
while directories[0][0] != root:
potential_root = os.path.dirname(directories[0][0])
directories.insert(0, (potential_root, True))
if not os.path.dirname(filename):
return mod_name
return get_module_name(os.path.dirname(filename)) + "." + mod_name
mod_name = ".".join([os.path.basename(f[0]) for f in directories])
return mod_name
def format_alias(x):

View File

@@ -12,6 +12,7 @@
from argparse import ArgumentParser
from glob import glob
import logging
import os
from .analyzer import CallGraphVisitor
from .visgraph import VisualGraph
@@ -149,10 +150,23 @@ def main(cli_args=None):
help="annotate with module and source line number",
)
parser.add_argument(
"--root",
default=None,
dest="root",
help="Package root directory. Is inferred by default.",
)
known_args, unknown_args = parser.parse_known_args(cli_args)
filenames = [fn2 for fn in unknown_args for fn2 in glob(fn, recursive=True)]
# determine root
if known_args.root is not None:
root = os.path.abspath(known_args.root)
else:
root = None
if len(unknown_args) == 0:
parser.error("Need one or more filenames to process")
elif len(filenames) == 0:
@@ -189,7 +203,7 @@ def main(cli_args=None):
handler = logging.FileHandler(known_args.logname)
logger.addHandler(handler)
v = CallGraphVisitor(filenames, logger)
v = CallGraphVisitor(filenames, logger, root=root)
if known_args.function or known_args.namespace:

View File

@@ -74,6 +74,7 @@ class CallgraphDirective(SphinxDirective):
direction = self.options["direction"]
dotcode = create_callgraph(
filenames=f"{base_path}/**/*.py",
root=base_path,
function=func_name,
namespace=base_name,
format="dot",

View File

@@ -48,3 +48,17 @@ def test_resolve_use_in_function(callgraph):
uses = get_in_dict(callgraph.uses_edges, "test_code.submodule2.test_2")
get_node(uses, "test_code.submodule1.test_func1")
get_node(uses, "test_code.submodule1.test_func2")
def test_resolve_package_without___init__(callgraph):
defines = get_in_dict(callgraph.defines_edges, "test_code.subpackage2.submodule_hidden1")
get_node(defines, "test_code.subpackage2.submodule_hidden1.test_func1")
def test_resolve_package_with_known_root():
dirname = os.path.dirname(__file__)
filenames = glob(os.path.join(dirname, "test_code/**/*.py"), recursive=True)
callgraph = CallGraphVisitor(filenames, logger=logging.getLogger(), root=dirname)
dirname_base = os.path.basename(dirname)
defines = get_in_dict(callgraph.defines_edges, f"{dirname_base}.test_code.subpackage2.submodule_hidden1")
get_node(defines, f"{dirname_base}.test_code.subpackage2.submodule_hidden1.test_func1")