Skip to content

Commit 448c9ba

Browse files
authored
Merge pull request #26 from dgrigonis/collapse_externals_only
possibility to collapse only external deps to packages
2 parents a8df40c + 257e5c5 commit 448c9ba

File tree

7 files changed

+190
-44
lines changed

7 files changed

+190
-44
lines changed

findimports.py

+134-39
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
--duplicate)
3131
-N, --noext omit external dependencies
3232
-p, --packages convert the module graph to a package graph
33+
-pE, --package-externals
34+
convert external modules to a packages.
3335
-l PACKAGELEVEL, --level PACKAGELEVEL
3436
collapse subpackages to the topmost Nth levels. Only
3537
used if --packages is given. Default: no limit
@@ -43,7 +45,12 @@
4345
-I FILE, --ignore FILE
4446
ignore a file or directory; this option can be used
4547
multiple times. Default: ['venv']
46-
48+
-R PREFIX [PREFIX ...], --rmprefix PREFIX [PREFIX ...]
49+
remove PREFIX from displayed node names. This
50+
operation is applied last. Names that collapses to
51+
nothing are removed.
52+
-D MAX_DEPTH, --depth MAX_DEPTH
53+
import depth in ast tree. Default: no limit
4754
4855
FindImports requires Python 3.6 or later.
4956
@@ -242,7 +249,29 @@ def __repr__(self):
242249
)
243250

244251

245-
class ImportFinder(ast.NodeVisitor):
252+
class DepthVisitor:
253+
def __init__(self, max_depth=None):
254+
self.max_depth = max_depth
255+
256+
def visit(self, node, depth=0):
257+
"""Visit a node."""
258+
method = f'visit_{node.__class__.__name__}'
259+
visitor = getattr(self, method, self.generic_visit)
260+
return visitor(node, depth)
261+
262+
def generic_visit(self, node, depth):
263+
"""Called if no explicit visitor function exists for a node."""
264+
if self.max_depth is None or depth < self.max_depth:
265+
for field, value in ast.iter_fields(node):
266+
if isinstance(value, list):
267+
for item in value:
268+
if isinstance(item, ast.AST):
269+
self.visit(item, depth + 1)
270+
elif isinstance(value, ast.AST):
271+
self.visit(value, depth + 1)
272+
273+
274+
class ImportFinder(DepthVisitor):
246275
"""AST visitor that collects all imported names in its imports attribute.
247276
248277
For example, the following import statements in the AST tree
@@ -265,9 +294,10 @@ class ImportFinder(ast.NodeVisitor):
265294

266295
lineno_offset = 0 # needed when recursively parsing docstrings
267296

268-
def __init__(self, filename):
297+
def __init__(self, filename, max_depth=None):
269298
self.imports = []
270299
self.filename = filename
300+
super().__init__(max_depth)
271301

272302
def processImport(self, name, imported_as, full_name, level, node):
273303
lineno = adjust_lineno(self.filename,
@@ -276,12 +306,12 @@ def processImport(self, name, imported_as, full_name, level, node):
276306
info = ImportInfo(full_name, self.filename, lineno, level)
277307
self.imports.append(info)
278308

279-
def visit_Import(self, node):
309+
def visit_Import(self, node, depth):
280310
for alias in node.names:
281311
self.processImport(alias.name, alias.asname, alias.name, None,
282312
node)
283313

284-
def visit_ImportFrom(self, node):
314+
def visit_ImportFrom(self, node, depth):
285315
if node.module == '__future__':
286316
return
287317

@@ -291,17 +321,18 @@ def visit_ImportFrom(self, node):
291321
fullname = f"{node.module}.{name}" if node.module else name
292322
self.processImport(name, imported_as, fullname, node.level, node)
293323

294-
def visitSomethingWithADocstring(self, node):
324+
def visitSomethingWithADocstring(self, node, depth):
295325
# ClassDef and FunctionDef have a 'lineno' attribute, Module doesn't.
296326
lineno = getattr(node, 'lineno', None)
297-
self.processDocstring(ast.get_docstring(node, clean=False), lineno)
298-
self.generic_visit(node)
327+
docstring = ast.get_docstring(node, clean=False)
328+
self.processDocstring(docstring, lineno, depth)
329+
self.generic_visit(node, depth)
299330

300331
visit_Module = visitSomethingWithADocstring
301332
visit_ClassDef = visitSomethingWithADocstring
302333
visit_FunctionDef = visitSomethingWithADocstring
303334

304-
def processDocstring(self, docstring, lineno):
335+
def processDocstring(self, docstring, lineno, depth):
305336
if not docstring:
306337
return
307338
if lineno is None:
@@ -325,7 +356,7 @@ def processDocstring(self, docstring, lineno):
325356
filename=self.filename, lineno=lineno), file=sys.stderr)
326357
else:
327358
self.lineno_offset += lineno + example.lineno
328-
self.visit(node)
359+
self.visit(node, depth)
329360
self.lineno_offset -= lineno + example.lineno
330361

331362

@@ -367,8 +398,8 @@ class ImportFinderAndNameTracker(ImportFinder):
367398
warn_about_duplicates = False
368399
verbose = False
369400

370-
def __init__(self, filename):
371-
ImportFinder.__init__(self, filename)
401+
def __init__(self, filename, max_depth=None):
402+
ImportFinder.__init__(self, filename, max_depth)
372403
self.scope = self.top_level = Scope(name=filename)
373404
self.scope_stack = []
374405
self.unused_names = []
@@ -388,14 +419,14 @@ def leaveAllScopes(self):
388419
self.unused_names += self.scope.unused_names.values()
389420
self.unused_names.sort(key=attrgetter('lineno'))
390421

391-
def processDocstring(self, docstring, lineno):
422+
def processDocstring(self, docstring, lineno, depth):
392423
self.newScope(self.top_level, 'docstring')
393-
ImportFinder.processDocstring(self, docstring, lineno)
424+
ImportFinder.processDocstring(self, docstring, lineno, depth)
394425
self.leaveScope()
395426

396-
def visit_FunctionDef(self, node):
427+
def visit_FunctionDef(self, node, depth):
397428
self.newScope(self.scope, f"function {node.name}")
398-
ImportFinder.visit_FunctionDef(self, node)
429+
ImportFinder.visit_FunctionDef(self, node, depth)
399430
self.leaveScope()
400431

401432
def processImport(self, name, imported_as, full_name, level, node):
@@ -420,10 +451,10 @@ def processImport(self, name, imported_as, full_name, level, node):
420451
else:
421452
self.scope.addImport(imported_as, self.filename, level, lineno)
422453

423-
def visit_Name(self, node):
454+
def visit_Name(self, node, depth):
424455
self.scope.useName(node.id)
425456

426-
def visit_Attribute(self, node):
457+
def visit_Attribute(self, node, depth):
427458
full_name = [node.attr]
428459
parent = node.value
429460
while isinstance(parent, ast.Attribute):
@@ -439,30 +470,30 @@ def visit_Attribute(self, node):
439470
else:
440471
name += part
441472
self.scope.useName(name)
442-
self.generic_visit(node)
473+
self.generic_visit(node, depth)
443474

444475

445-
def find_imports(filename):
476+
def find_imports(filename, max_depth=None):
446477
"""Find all imported names in a given file.
447478
448479
Returns a list of ImportInfo objects.
449480
"""
450481
with tokenize.open(filename) as f:
451482
root = ast.parse(f.read(), filename)
452-
visitor = ImportFinder(filename)
483+
visitor = ImportFinder(filename, max_depth=max_depth)
453484
visitor.visit(root)
454485
return visitor.imports
455486

456487

457488
def find_imports_and_track_names(filename, warn_about_duplicates=False,
458-
verbose=False):
489+
verbose=False, max_depth=None):
459490
"""Find all imported names in a given file.
460491
461492
Returns ``(imports, unused)``. Both are lists of ImportInfo objects.
462493
"""
463494
with tokenize.open(filename) as f:
464495
root = ast.parse(f.read(), filename)
465-
visitor = ImportFinderAndNameTracker(filename)
496+
visitor = ImportFinderAndNameTracker(filename, max_depth)
466497
visitor.warn_about_duplicates = warn_about_duplicates
467498
visitor.verbose = verbose
468499
visitor.visit(root)
@@ -517,6 +548,7 @@ class ModuleGraph(object):
517548
warn_about_duplicates = False
518549
verbose = False
519550
external_dependencies = True
551+
max_depth = None
520552

521553
# some builtin modules do not exist as separate .so files on disk
522554
builtin_modules = sys.builtin_module_names
@@ -592,18 +624,24 @@ def parseFile(self, filename, ignore_stdlib_modules):
592624
module.imported_names, module.unused_names = (
593625
find_imports_and_track_names(filename,
594626
self.warn_about_duplicates,
595-
self.verbose)
627+
self.verbose,
628+
self.max_depth)
596629
)
597630
else:
598-
module.imported_names = find_imports(filename)
631+
module.imported_names = find_imports(filename, self.max_depth)
599632
module.unused_names = None
600633
dir = os.path.dirname(filename)
634+
635+
if ignore_stdlib_modules:
636+
module.imported_names = [
637+
info for info in module.imported_names
638+
if info.name.split('.')[0] not in STDLIB_MODNAMES_SET
639+
]
601640
module.imports = {
602641
self.findModuleOfName(imp.name, imp.level, filename, dir)
603642
for imp in module.imported_names}
643+
# NOTE: Remove when certain that this is 100% dealt with above
604644
if ignore_stdlib_modules:
605-
module.imported_names = [info for info in module.imported_names
606-
if info.name not in STDLIB_MODNAMES_SET]
607645
module.imports -= STDLIB_MODNAMES_SET
608646

609647
def filenameToModname(self, filename):
@@ -729,6 +767,17 @@ def packageOf(self, dotted_name, packagelevel=None):
729767
dotted_name = '.'.join(dotted_name.split('.')[:packagelevel])
730768
return dotted_name
731769

770+
def isExternal(self, modname):
771+
"""Package is external if not present in modules"""
772+
return modname not in self.modules
773+
774+
def maybePackageOf(self, dotted_name,
775+
packagelevel=None, externals_only=False):
776+
"""Provides a flag to not convert internal modules to packages"""
777+
if externals_only and not self.isExternal(dotted_name):
778+
return dotted_name
779+
return self.packageOf(dotted_name, packagelevel)
780+
732781
def removeTestPackage(self, dotted_name, pkgnames=['tests', 'ftests']):
733782
"""Remove tests subpackages from dotted_name."""
734783
result = []
@@ -746,23 +795,43 @@ def listModules(self):
746795
modules.sort()
747796
return [module for name, module in modules]
748797

749-
def packageGraph(self, packagelevel=None):
798+
def packageGraph(self, packagelevel=None, externals_only=False):
750799
"""Convert a module graph to a package graph."""
751800
packages = {}
752801
for module in self.listModules():
753-
package_name = self.packageOf(module.modname, packagelevel)
802+
package_name = self.maybePackageOf(
803+
module.modname, packagelevel, externals_only)
754804
if package_name not in packages:
755805
dirname = os.path.dirname(module.filename)
756806
packages[package_name] = Module(package_name, dirname)
757807
package = packages[package_name]
758808
for name in module.imports:
759-
package_name = self.packageOf(name, packagelevel)
809+
package_name = self.maybePackageOf(
810+
name, packagelevel, externals_only)
760811
if package_name != package.modname: # no loops
761812
package.imports.add(package_name)
762813
graph = ModuleGraph()
763814
graph.modules = packages
764815
return graph
765816

817+
def removePrefixes(self, prefixes):
818+
"""Remove prefixes. Only applies 1st hit."""
819+
prfx_union = '|'.join(map(re.escape, prefixes))
820+
reg_cmp = re.compile(r'^(({})\.)?'.format(prfx_union))
821+
packages = {}
822+
for module in self.listModules():
823+
new_modname = reg_cmp.sub('', module.modname)
824+
if new_modname:
825+
packages[new_modname] = Module(new_modname, module.filename)
826+
for name in module.imports:
827+
new_name = reg_cmp.sub('', name)
828+
if new_name and new_name != new_modname: # no loops
829+
packages[new_modname].imports.add(new_name)
830+
graph = ModuleGraph()
831+
packages = dict(sorted(packages.items(), key=lambda x: x[0]))
832+
graph.modules = packages
833+
return graph
834+
766835
def collapseTests(self, pkgnames=['tests', 'ftests']):
767836
"""Collapse test packages with parent packages.
768837
@@ -893,33 +962,40 @@ def printUnusedImports(self):
893962
continue
894963
print(f"{module.filename}:{lineno}: {name} not used")
895964

896-
def printDot(self):
965+
def constructDot(self):
897966
"""Produce a dependency graph in dot format."""
898-
print("digraph ModuleDependencies {")
899-
print(" node[shape=box];")
967+
lines = list()
968+
lines.append("digraph ModuleDependencies {")
969+
lines.append(" node[shape=box];")
900970
allNames = set()
901971
nameDict = {}
902972
for n, module in enumerate(self.listModules()):
903973
module._dot_name = f"mod{n}"
904974
nameDict[module.modname] = module._dot_name
905-
print(f" {module._dot_name}[label=\"{quote(module.label)}\"];")
975+
line = f" {module._dot_name}[label=\"{quote(module.label)}\"];"
976+
lines.append(line)
906977
allNames |= module.imports
907-
print(" node[style=dotted];")
978+
lines.append(" node[style=dotted];")
908979
if self.external_dependencies:
909980
myNames = set(self.modules)
910981
extNames = list(allNames - myNames)
911982
extNames.sort()
912983
for n, name in enumerate(extNames):
913984
nameDict[name] = id = f"extmod{n}"
914-
print(f" {id}[label=\"{name}\"];")
985+
lines.append(f" {id}[label=\"{name}\"];")
915986
for modname, module in sorted(self.modules.items()):
916987
for other in sorted(module.imports):
917988
if other in nameDict:
918-
print(" {0} -> {1};".format(
989+
lines.append(" {0} -> {1};".format(
919990
nameDict[module.modname],
920991
nameDict[other]
921992
))
922-
print("}")
993+
lines.append("}")
994+
return '\n'.join(lines)
995+
996+
def printDot(self):
997+
"""Print a dependency graph in dot format."""
998+
print(self.constructDot())
923999

9241000

9251001
def quote(s):
@@ -981,6 +1057,9 @@ def main(argv=None):
9811057
options.add_argument('-p', '--packages', action='store_true',
9821058
dest='condense_to_packages',
9831059
help='convert the module graph to a package graph')
1060+
options.add_argument('-pE', '--package-externals', action='store_true',
1061+
dest='condense_to_packages_externals',
1062+
help='convert external modules to a packages.')
9841063
options.add_argument('-l', '--level', type=int,
9851064
dest='packagelevel',
9861065
help='collapse subpackages to the topmost Nth levels.'
@@ -1001,12 +1080,22 @@ def main(argv=None):
10011080
help="ignore a file or directory;"
10021081
" this option can be used multiple times."
10031082
" Default: ['venv']")
1083+
options.add_argument('-R', '--rmprefix', metavar="PREFIX", nargs="+",
1084+
help="remove PREFIX from displayed node names. "
1085+
"This operation is applied last. "
1086+
"Names that collapses to nothing are removed.")
1087+
options.add_argument('-D', '--depth', type=int,
1088+
dest='max_depth',
1089+
help='import depth in ast tree. Default: no limit')
10041090
try:
10051091
args = parser.parse_args(args=argv[1:] if argv else None)
1092+
if args.condense_to_packages and args.condense_to_packages_externals:
1093+
parser.error('only one of -p and -pE can be provided')
10061094
except SystemExit as e:
10071095
return e.code
10081096

10091097
g = ModuleGraph()
1098+
g.max_depth = args.max_depth
10101099
g.all_unused = args.all_unused
10111100
g.warn_about_duplicates = args.warn_about_duplicates
10121101
g.verbose = args.verbose
@@ -1017,12 +1106,18 @@ def main(argv=None):
10171106
ignore_stdlib_modules=args.ignore_stdlib)
10181107
if args.write_cache:
10191108
g.writeCache(args.write_cache)
1109+
10201110
if args.condense_to_packages:
1021-
g = g.packageGraph(args.packagelevel)
1111+
g = g.packageGraph(args.packagelevel, externals_only=False)
1112+
elif args.condense_to_packages_externals:
1113+
g = g.packageGraph(args.packagelevel, externals_only=True)
1114+
10221115
if args.collapse_tests:
10231116
g = g.collapseTests()
10241117
if args.collapse_cycles:
10251118
g = g.collapseCycles()
1119+
if args.rmprefix is not None:
1120+
g = g.removePrefixes(args.rmprefix)
10261121
g.external_dependencies = not args.noext
10271122
getattr(g, args.action)()
10281123
return 0

0 commit comments

Comments
 (0)