Skip to content

Commit

Permalink
ccc: remove random_state parameter and adjust unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
miltondp committed Sep 13, 2023
1 parent 9ee3374 commit 34a84c1
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 37 deletions.
5 changes: 0 additions & 5 deletions libs/ccc/coef/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,6 @@ def ccc(
n_jobs: int = 1,
pvalue_n_perms: int = None,
partitioning_executor: str = "thread",
random_state: int = None,
) -> tuple[NDArray[float], NDArray[float], NDArray[np.uint64], NDArray[np.int16]]:
"""
This is the main function that computes the Clustermatch Correlation
Expand Down Expand Up @@ -557,8 +556,6 @@ def ccc(
ThreadPoolExecutor for parallelization, which uses less memory. If
"process", it will use ProcessPoolExecutor, which might be faster. If
anything else, it will not parallelize the partitioning step.
random_state: seed for the random number generator. This is used to compute
the p-value of the coefficient using permutations.
Returns:
Expand Down Expand Up @@ -595,8 +592,6 @@ def ccc(
singleton cases were found (-1; usually because input data has all the same
value) or for categorical features (-2).
"""
np.random.seed(random_state)

n_objects = None
n_features = None
# this is a boolean array of size n_features with True if the feature is numerical and False otherwise
Expand Down
40 changes: 8 additions & 32 deletions tests/test_coef_pval.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_cm_quadratic_noisy_pvalue_with_random_state():
feature1 = np.power(feature0, 2.0) + (2.0 * rs.rand(feature0.shape[0]))

# Run
res = ccc(feature0, feature1, pvalue_n_perms=100, random_state=2)
res = ccc(feature0, feature1, pvalue_n_perms=100)

# Validate
assert len(res) == 2
Expand All @@ -181,7 +181,7 @@ def test_cm_quadratic_noisy_pvalue_with_random_state():

assert pvalue is not None
assert isinstance(pvalue, float)
assert pvalue == pytest.approx(0.099, abs=0.01)
assert pvalue < 0.10


def test_cm_one_feature_with_all_same_values_pvalue():
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_cm_single_argument_is_matrix():
input_data = np.array([feature0, feature1, feature2])

# Run
res = ccc(input_data, pvalue_n_perms=100, random_state=1)
res = ccc(input_data, pvalue_n_perms=100)

# Validate
assert len(res) == 2
Expand All @@ -238,8 +238,8 @@ def test_cm_single_argument_is_matrix():
assert hasattr(pvalue, "shape")
assert pvalue.shape == (3,)
assert pvalue[0] == (0 + 1) / (100 + 1)
assert pvalue[1] == pytest.approx(0.792, abs=0.01)
assert pvalue[2] == pytest.approx(0.752, abs=0.01)
assert pvalue[1] > 0.10
assert pvalue[2] > 0.10


@pytest.mark.skipif(os.cpu_count() < 2, reason="requires at least 2 cores")
Expand Down Expand Up @@ -377,7 +377,6 @@ def test_cm_numerical_and_categorical_features_weakly_relationship_pvalue():
categorical_feature1,
numerical_feature0,
pvalue_n_perms=100,
random_state=1,
)

# Validate
Expand All @@ -390,18 +389,7 @@ def test_cm_numerical_and_categorical_features_weakly_relationship_pvalue():

assert pvalue is not None
assert isinstance(pvalue, float)
assert pvalue == pytest.approx(0.099, abs=0.01)

# Run with flipped variables (symmetry)
assert (
ccc(
numerical_feature0,
categorical_feature1,
pvalue_n_perms=100,
random_state=1,
)
== res
)
assert pvalue < 0.15


def test_cm_numerical_and_categorical_features_a_single_categorical_value():
Expand All @@ -422,7 +410,6 @@ def test_cm_numerical_and_categorical_features_a_single_categorical_value():
numerical_feature0,
categorical_feature1,
pvalue_n_perms=100,
random_state=1,
)

# Validate
Expand All @@ -435,18 +422,7 @@ def test_cm_numerical_and_categorical_features_a_single_categorical_value():

assert pvalue is not None
assert isinstance(pvalue, float)
assert pvalue == pytest.approx(1.0, abs=0.01)

# Run with flipped variables (symmetry)
assert (
ccc(
categorical_feature1,
numerical_feature0,
pvalue_n_perms=100,
random_state=1,
)
== res
)
assert pvalue > 0.80


def test_cm_with_pandas_dataframe_several_features():
Expand All @@ -457,7 +433,7 @@ def test_cm_with_pandas_dataframe_several_features():
data = pd.DataFrame(rs.rand(20, 50))

# Run
res = ccc(data, internal_n_clusters=3, pvalue_n_perms=10, random_state=1)
res = ccc(data, internal_n_clusters=3, pvalue_n_perms=10)

# Validate
assert len(res) == 2
Expand Down

0 comments on commit 34a84c1

Please sign in to comment.