diff --git a/libs/ccc/coef/impl.py b/libs/ccc/coef/impl.py index fd9ce96d..f0cf8d8d 100644 --- a/libs/ccc/coef/impl.py +++ b/libs/ccc/coef/impl.py @@ -526,7 +526,6 @@ def ccc( n_jobs: int = 1, pvalue_n_perms: int = None, partitioning_executor: str = "thread", - random_state: int = None, ) -> tuple[NDArray[float], NDArray[float], NDArray[np.uint64], NDArray[np.int16]]: """ This is the main function that computes the Clustermatch Correlation @@ -557,8 +556,6 @@ def ccc( ThreadPoolExecutor for parallelization, which uses less memory. If "process", it will use ProcessPoolExecutor, which might be faster. If anything else, it will not parallelize the partitioning step. - random_state: seed for the random number generator. This is used to compute - the p-value of the coefficient using permutations. Returns: @@ -595,8 +592,6 @@ def ccc( singleton cases were found (-1; usually because input data has all the same value) or for categorical features (-2). """ - np.random.seed(random_state) - n_objects = None n_features = None # this is a boolean array of size n_features with True if the feature is numerical and False otherwise diff --git a/tests/test_coef_pval.py b/tests/test_coef_pval.py index a6f9fd31..ec6aad6f 100644 --- a/tests/test_coef_pval.py +++ b/tests/test_coef_pval.py @@ -170,7 +170,7 @@ def test_cm_quadratic_noisy_pvalue_with_random_state(): feature1 = np.power(feature0, 2.0) + (2.0 * rs.rand(feature0.shape[0])) # Run - res = ccc(feature0, feature1, pvalue_n_perms=100, random_state=2) + res = ccc(feature0, feature1, pvalue_n_perms=100) # Validate assert len(res) == 2 @@ -181,7 +181,7 @@ def test_cm_quadratic_noisy_pvalue_with_random_state(): assert pvalue is not None assert isinstance(pvalue, float) - assert pvalue == pytest.approx(0.099, abs=0.01) + assert pvalue < 0.10 def test_cm_one_feature_with_all_same_values_pvalue(): @@ -222,7 +222,7 @@ def test_cm_single_argument_is_matrix(): input_data = np.array([feature0, feature1, feature2]) # Run - res = ccc(input_data, pvalue_n_perms=100, random_state=1) + res = ccc(input_data, pvalue_n_perms=100) # Validate assert len(res) == 2 @@ -238,8 +238,8 @@ def test_cm_single_argument_is_matrix(): assert hasattr(pvalue, "shape") assert pvalue.shape == (3,) assert pvalue[0] == (0 + 1) / (100 + 1) - assert pvalue[1] == pytest.approx(0.792, abs=0.01) - assert pvalue[2] == pytest.approx(0.752, abs=0.01) + assert pvalue[1] > 0.10 + assert pvalue[2] > 0.10 @pytest.mark.skipif(os.cpu_count() < 2, reason="requires at least 2 cores") @@ -377,7 +377,6 @@ def test_cm_numerical_and_categorical_features_weakly_relationship_pvalue(): categorical_feature1, numerical_feature0, pvalue_n_perms=100, - random_state=1, ) # Validate @@ -390,18 +389,7 @@ def test_cm_numerical_and_categorical_features_weakly_relationship_pvalue(): assert pvalue is not None assert isinstance(pvalue, float) - assert pvalue == pytest.approx(0.099, abs=0.01) - - # Run with flipped variables (symmetry) - assert ( - ccc( - numerical_feature0, - categorical_feature1, - pvalue_n_perms=100, - random_state=1, - ) - == res - ) + assert pvalue < 0.15 def test_cm_numerical_and_categorical_features_a_single_categorical_value(): @@ -422,7 +410,6 @@ def test_cm_numerical_and_categorical_features_a_single_categorical_value(): numerical_feature0, categorical_feature1, pvalue_n_perms=100, - random_state=1, ) # Validate @@ -435,18 +422,7 @@ def test_cm_numerical_and_categorical_features_a_single_categorical_value(): assert pvalue is not None assert isinstance(pvalue, float) - assert pvalue == pytest.approx(1.0, abs=0.01) - - # Run with flipped variables (symmetry) - assert ( - ccc( - categorical_feature1, - numerical_feature0, - pvalue_n_perms=100, - random_state=1, - ) - == res - ) + assert pvalue > 0.80 def test_cm_with_pandas_dataframe_several_features(): @@ -457,7 +433,7 @@ def test_cm_with_pandas_dataframe_several_features(): data = pd.DataFrame(rs.rand(20, 50)) # Run - res = ccc(data, internal_n_clusters=3, pvalue_n_perms=10, random_state=1) + res = ccc(data, internal_n_clusters=3, pvalue_n_perms=10) # Validate assert len(res) == 2