Skip to content

Commit

Permalink
BUG: Effect size simulation
Browse files Browse the repository at this point in the history
The original code was dividing the effect sizes by the number of causal sites, which led to the simulation of traits having extremely small variance. All the trait models are modified to account for that.
  • Loading branch information
daikitag committed Nov 15, 2023
1 parent 5371a97 commit c4a1d74
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 43 deletions.
45 changes: 33 additions & 12 deletions tests/test_sim_trait.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,27 +138,35 @@ def model_list():
loc = 2
scale = 5
df = 10
shape = 5
distr = [
(
tstrait.trait_model(distribution="normal", mean=loc, var=scale**2),
"norm",
(loc / num_causal, scale / num_causal),
),
(
tstrait.trait_model(distribution="exponential", scale=scale),
"expon",
(0, scale / num_causal),
(loc / num_causal, scale / np.sqrt(num_causal)),
),
(
tstrait.trait_model(distribution="t", mean=loc, var=scale**2, df=df),
"t",
(df, loc / num_causal, scale / num_causal),
(df, loc / num_causal, scale / np.sqrt(num_causal)),
),
]

return distr


def model_gamma_expon():
scale = 5
shape = 10
distr = [
(
tstrait.trait_model(distribution="gamma", shape=shape, scale=scale),
"gamma",
(shape, 0, scale / num_causal),
(shape, 0, scale),
),
(
tstrait.trait_model(distribution="exponential", scale=scale),
"expon",
(0, scale),
),
]

Expand Down Expand Up @@ -188,16 +196,29 @@ def test_KStest(self, model, distr, args, sample_ts):
result = np.concatenate((result, sim_result["effect_size"]))
self.check_distribution(result, distr, args)

@pytest.mark.parametrize("model, distr, args", model_gamma_expon())
def test_KStest_expon_gamma(self, model, distr, args, sample_ts):
result = np.array([])
num_causal = 1000
for i in range(2):
sim_result = tstrait.sim_trait(
ts=sample_ts, num_causal=num_causal, model=model, random_seed=i
)
sim_result["effect_size"] *= np.sqrt(num_causal)
result = np.concatenate((result, sim_result["effect_size"]))
self.check_distribution(result, distr, args)

def test_fixed(self, sample_ts):
"""
Some effect sizes are 0, as there are sites with only ancestral states
"""
value = 4
num_causal = 1000
model = tstrait.trait_model(distribution="fixed", value=value)
sim_result = tstrait.sim_trait(ts=sample_ts, num_causal=1000, model=model)
sim_result = tstrait.sim_trait(ts=sample_ts, num_causal=num_causal, model=model)
data = sim_result.loc[sim_result.effect_size != 0]
np.testing.assert_allclose(
data["effect_size"], np.ones(len(data)) * value / 1000
data["effect_size"], np.ones(len(data)) * value / np.sqrt(num_causal)
)

def test_multivariate(self, sample_ts):
Expand Down Expand Up @@ -233,7 +254,7 @@ def test_multivariate(self, sample_ts):
self.check_distribution(
df["effect_size"],
"norm",
(mean[i] / num_causal, np.sqrt(cov[i, i]) / num_causal),
(mean[i] / num_causal, np.sqrt(cov[i, i] / num_causal)),
)
sum_data += df["effect_size"] * const[i]

Expand Down
38 changes: 14 additions & 24 deletions tests/test_trait_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,6 @@ def test_output_dim(self):
assert beta.shape == (10, 4)
assert model.num_trait == 4

def test_large_sample(self):
np.random.seed(12)
n = 4
num_causal = 10000
mean = np.random.randn(n)
M = np.random.randn(n, n)
cov = np.dot(M, M.T)
model = tstrait.trait_model(distribution="multi_normal", mean=mean, cov=cov)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
np.testing.assert_allclose(
np.cov(effect_size.T) * (num_causal**2), cov, rtol=2e-1
)
np.testing.assert_allclose(effect_size.mean(0) * num_causal, mean, rtol=1e-1)


@pytest.mark.parametrize("num_causal", [1000])
class TestKSTest:
Expand All @@ -95,14 +81,14 @@ def test_normal(self, num_causal):
model = tstrait.trait_model(distribution="normal", mean=loc, var=scale**2)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
self.check_distribution(
effect_size, "norm", (loc / num_causal, scale / num_causal)
effect_size, "norm", (loc / num_causal, scale / np.sqrt(num_causal))
)

def test_exponential(self, num_causal):
scale = 2
model = tstrait.trait_model(distribution="exponential", scale=scale)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
self.check_distribution(effect_size, "expon", (0, scale / num_causal))
self.check_distribution(effect_size * np.sqrt(num_causal), "expon", (0, scale))
assert np.min(effect_size) > 0

def test_exponential_negative(self, num_causal):
Expand All @@ -112,8 +98,8 @@ def test_exponential_negative(self, num_causal):
)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
assert np.min(effect_size) < 0
effect_size = np.abs(effect_size)
self.check_distribution(effect_size, "expon", (0, scale / num_causal))
effect_size = np.abs(effect_size) * np.sqrt(num_causal)
self.check_distribution(effect_size, "expon", (0, scale))

def test_t(self, num_causal):
loc = 2
Expand All @@ -122,15 +108,17 @@ def test_t(self, num_causal):
model = tstrait.trait_model(distribution="t", mean=loc, var=scale**2, df=df)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
self.check_distribution(
effect_size, "t", (df, loc / num_causal, scale / num_causal)
effect_size, "t", (df, loc / num_causal, scale / np.sqrt(num_causal))
)

def test_gamma(self, num_causal):
shape = 3
scale = 2
model = tstrait.trait_model(distribution="gamma", shape=shape, scale=scale)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
self.check_distribution(effect_size, "gamma", (shape, 0, scale / num_causal))
self.check_distribution(
effect_size * np.sqrt(num_causal), "gamma", (shape, 0, scale)
)
assert np.min(effect_size) > 0

def test_gamma_negative(self, num_causal):
Expand All @@ -141,8 +129,8 @@ def test_gamma_negative(self, num_causal):
)
effect_size = model._sim_effect_size(num_causal, np.random.default_rng(1))
assert np.min(effect_size) < 0
effect_size = np.abs(effect_size)
self.check_distribution(effect_size, "gamma", (shape, 0, scale / num_causal))
effect_size = np.abs(effect_size) * np.sqrt(num_causal)
self.check_distribution(effect_size, "gamma", (shape, 0, scale))

def test_multivariate(self, num_causal):
np.random.seed(20)
Expand All @@ -156,12 +144,14 @@ def test_multivariate(self, num_causal):
self.check_distribution(
effect_size[:, i],
"norm",
(mean[i] / num_causal, np.sqrt(cov[i, i]) / num_causal),
(mean[i] / num_causal, np.sqrt(cov[i, i] / num_causal)),
)
const = np.random.randn(n)
data = np.matmul(effect_size, const)
data_val = np.matmul(const, cov)
data_sd = np.sqrt(np.matmul(data_val, const))
self.check_distribution(
data, "norm", (np.matmul(const, mean) / num_causal, data_sd / num_causal)
data,
"norm",
(np.matmul(const, mean) / num_causal, data_sd / np.sqrt(num_causal)),
)
18 changes: 11 additions & 7 deletions tstrait/trait_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _sim_effect_size(self, num_causal, rng):
num_causal = self._check_parameter(num_causal, rng)
beta = rng.normal(
loc=self.mean / num_causal,
scale=np.sqrt(self.var) / num_causal,
scale=np.sqrt(self.var / num_causal),
size=num_causal,
)
return beta
Expand Down Expand Up @@ -172,7 +172,8 @@ def _sim_effect_size(self, num_causal, rng):
Simulated effect size of a causal mutation.
"""
num_causal = self._check_parameter(num_causal, rng)
beta = rng.exponential(scale=self.scale / num_causal, size=num_causal)
beta = rng.exponential(scale=self.scale, size=num_causal)
beta /= np.sqrt(num_causal)
if self.negative:
beta = np.multiply(rng.choice([-1, 1], size=num_causal), beta)
return beta
Expand Down Expand Up @@ -228,7 +229,7 @@ def _sim_effect_size(self, num_causal, rng):
Simulated effect size of a causal mutation.
"""
num_causal = self._check_parameter(num_causal, rng)
beta = self.value / num_causal
beta = self.value / np.sqrt(num_causal)
return np.repeat(beta, num_causal)


Expand Down Expand Up @@ -291,7 +292,7 @@ def _sim_effect_size(self, num_causal, rng):
"""
num_causal = self._check_parameter(num_causal, rng)
beta = rng.standard_t(self.df, size=num_causal)
beta = (beta * np.sqrt(self.var) + self.mean) / num_causal
beta = beta * np.sqrt(self.var) / np.sqrt(num_causal) + self.mean / num_causal
return beta


Expand Down Expand Up @@ -355,7 +356,7 @@ def _sim_effect_size(self, num_causal, rng):
Simulated effect size of a causal mutation.
"""
num_causal = self._check_parameter(num_causal, rng)
beta = rng.gamma(self.shape, self.scale, size=num_causal) / num_causal
beta = rng.gamma(self.shape, self.scale, size=num_causal) / np.sqrt(num_causal)
if self.negative:
beta = np.multiply(rng.choice([-1, 1], size=num_causal), beta)
return beta
Expand Down Expand Up @@ -423,8 +424,11 @@ def _sim_effect_size(self, num_causal, rng):
Simulated effect size of a causal mutation.
"""
num_causal = self._check_parameter(num_causal, rng)
beta = rng.multivariate_normal(mean=self.mean, cov=self.cov, size=num_causal)
beta /= num_causal
beta = rng.multivariate_normal(
mean=np.array(self.mean) / num_causal,
cov=np.array(self.cov) / num_causal,
size=num_causal,
)
return beta


Expand Down

0 comments on commit c4a1d74

Please sign in to comment.