@@ -781,14 +781,25 @@ def perform_collect(
781781 try :
782782 initialpaths : list [Path ] = []
783783 initialpaths_with_parents : list [Path ] = []
784- for arg in args :
785- collection_argument = resolve_collection_argument (
784+
785+ collection_args = [
786+ resolve_collection_argument (
786787 self .config .invocation_params .dir ,
787788 arg ,
789+ i ,
788790 as_pypath = self .config .option .pyargs ,
789791 consider_namespace_packages = consider_namespace_packages ,
790792 )
791- self ._initial_parts .append (collection_argument )
793+ for i , arg in enumerate (args )
794+ ]
795+
796+ if not self .config .getoption ("keepduplicates" ):
797+ # Normalize the collection arguments -- remove duplicates and overlaps.
798+ self ._initial_parts = normalize_collection_arguments (collection_args )
799+ else :
800+ self ._initial_parts = collection_args
801+
802+ for collection_argument in self ._initial_parts :
792803 initialpaths .append (collection_argument .path )
793804 initialpaths_with_parents .append (collection_argument .path )
794805 initialpaths_with_parents .extend (collection_argument .path .parents )
@@ -859,6 +870,7 @@ def collect(self) -> Iterator[nodes.Item | nodes.Collector]:
859870
860871 argpath = collection_argument .path
861872 names = collection_argument .parts
873+ parametrization = collection_argument .parametrization
862874 module_name = collection_argument .module_name
863875
864876 # resolve_collection_argument() ensures this.
@@ -943,12 +955,18 @@ def collect(self) -> Iterator[nodes.Item | nodes.Collector]:
943955
944956 # Name part e.g. `TestIt` in `/a/b/test_file.py::TestIt::test_it`.
945957 else :
946- # TODO: Remove parametrized workaround once collection structure contains
947- # parametrization.
948- is_match = (
949- node .name == matchparts [0 ]
950- or node .name .split ("[" )[0 ] == matchparts [0 ]
951- )
958+ if len (matchparts ) == 1 :
959+ # This the last part, one parametrization goes.
960+ if parametrization is not None :
961+ # A parametrized arg must match exactly.
962+ is_match = node .name == matchparts [0 ] + parametrization
963+ else :
964+ # A non-parameterized arg matches all parametrizations (if any).
965+ # TODO: Remove the hacky split once the collection structure
966+ # contains parametrization.
967+ is_match = node .name .split ("[" )[0 ] == matchparts [0 ]
968+ else :
969+ is_match = node .name == matchparts [0 ]
952970 if is_match :
953971 work .append ((node , matchparts [1 :]))
954972 any_matched_in_collector = True
@@ -969,12 +987,9 @@ def genitems(self, node: nodes.Item | nodes.Collector) -> Iterator[nodes.Item]:
969987 yield node
970988 else :
971989 assert isinstance (node , nodes .Collector )
972- keepduplicates = self .config .getoption ("keepduplicates" )
973990 # For backward compat, dedup only applies to files.
974- handle_dupes = not ( keepduplicates and isinstance (node , nodes .File ) )
991+ handle_dupes = not isinstance (node , nodes .File )
975992 rep , duplicate = self ._collect_one_node (node , handle_dupes )
976- if duplicate and not keepduplicates :
977- return
978993 if rep .passed :
979994 for subnode in rep .result :
980995 yield from self .genitems (subnode )
@@ -1024,12 +1039,15 @@ class CollectionArgument:
10241039
10251040 path : Path
10261041 parts : Sequence [str ]
1042+ parametrization : str | None
10271043 module_name : str | None
1044+ original_index : int
10281045
10291046
10301047def resolve_collection_argument (
10311048 invocation_path : Path ,
10321049 arg : str ,
1050+ arg_index : int ,
10331051 * ,
10341052 as_pypath : bool = False ,
10351053 consider_namespace_packages : bool = False ,
@@ -1052,14 +1070,15 @@ def resolve_collection_argument(
10521070 When as_pypath is True, expects that the command-line argument actually contains
10531071 module paths instead of file-system paths:
10541072
1055- "pkg.tests.test_foo::TestClass::test_foo"
1073+ "pkg.tests.test_foo::TestClass::test_foo[a,b] "
10561074
10571075 In which case we search sys.path for a matching module, and then return the *path* to the
10581076 found module, which may look like this:
10591077
10601078 CollectionArgument(
10611079 path=Path("/home/u/myvenv/lib/site-packages/pkg/tests/test_foo.py"),
10621080 parts=["TestClass", "test_foo"],
1081+ parametrization="[a,b]",
10631082 module_name="pkg.tests.test_foo",
10641083 )
10651084
@@ -1068,10 +1087,9 @@ def resolve_collection_argument(
10681087 """
10691088 base , squacket , rest = arg .partition ("[" )
10701089 strpath , * parts = base .split ("::" )
1071- if squacket :
1072- if not parts :
1073- raise UsageError (f"path cannot contain [] parametrization: { arg } " )
1074- parts [- 1 ] = f"{ parts [- 1 ]} { squacket } { rest } "
1090+ if squacket and not parts :
1091+ raise UsageError (f"path cannot contain [] parametrization: { arg } " )
1092+ parametrization = f"{ squacket } { rest } " if squacket else None
10751093 module_name = None
10761094 if as_pypath :
10771095 pyarg_strpath = search_pypath (
@@ -1099,5 +1117,59 @@ def resolve_collection_argument(
10991117 return CollectionArgument (
11001118 path = fspath ,
11011119 parts = parts ,
1120+ parametrization = parametrization ,
11021121 module_name = module_name ,
1122+ original_index = arg_index ,
1123+ )
1124+
1125+
1126+ def is_collection_argument_subsumed_by (
1127+ arg : CollectionArgument , by : CollectionArgument
1128+ ) -> bool :
1129+ """Check if `arg` is subsumed (contained) by `by`."""
1130+ # First check path subsumption.
1131+ if by .path != arg .path :
1132+ # `by` subsumes `arg` if `by` is a parent directory of `arg` and has no
1133+ # parts (collects everything in that directory).
1134+ if not by .parts :
1135+ return arg .path .is_relative_to (by .path )
1136+ return False
1137+ # Paths are equal, check parts.
1138+ # For example: ("TestClass",) is a prefix of ("TestClass", "test_method").
1139+ if len (by .parts ) > len (arg .parts ) or arg .parts [: len (by .parts )] != by .parts :
1140+ return False
1141+ # Paths and parts are equal, check parametrization.
1142+ # A `by` without parametrization (None) matches everything, e.g.
1143+ # `pytest x.py::test_it` matches `x.py::test_it[0]`. Otherwise must be
1144+ # exactly equal.
1145+ if by .parametrization is not None and by .parametrization != arg .parametrization :
1146+ return False
1147+ return True
1148+
1149+
1150+ def normalize_collection_arguments (
1151+ collection_args : Sequence [CollectionArgument ],
1152+ ) -> list [CollectionArgument ]:
1153+ """Normalize collection arguments to eliminate overlapping paths and parts.
1154+
1155+ Detects when collection arguments overlap in either paths or parts and only
1156+ keeps the shorter prefix, or the earliest argument if duplicate, preserving
1157+ order. The result is prefix-free.
1158+ """
1159+ # A quadratic algorithm is not acceptable since large inputs are possible.
1160+ # So this uses an O(n*log(n)) algorithm which takes advantage of the
1161+ # property that after sorting, a collection argument will immediately
1162+ # precede collection arguments it subsumes. An O(n) algorithm is not worth
1163+ # it.
1164+ collection_args_sorted = sorted (
1165+ collection_args ,
1166+ key = lambda arg : (arg .path , arg .parts , arg .parametrization or "" ),
11031167 )
1168+ normalized : list [CollectionArgument ] = []
1169+ last_kept = None
1170+ for arg in collection_args_sorted :
1171+ if last_kept is None or not is_collection_argument_subsumed_by (arg , last_kept ):
1172+ normalized .append (arg )
1173+ last_kept = arg
1174+ normalized .sort (key = lambda arg : arg .original_index )
1175+ return normalized
0 commit comments