Skip to content

Commit b630377

Browse files
committed
Adds instrumentation and some other stuff
1 parent 03c1542 commit b630377

File tree

5 files changed

+35
-42
lines changed

5 files changed

+35
-42
lines changed

Diff for: randalo/adelie_integration.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,14 @@ def _matmul_impl(self, v):
6767
penalty=np.zeros(S),
6868
lmda_path=[0], progress_bar=False, n_threads=32, intercept=False)
6969
import pickle
70-
pickle.dump({
71-
'fit_active': state.benchmark_fit_active,
72-
'fit_screen': state.benchmark_fit_screen,
73-
'invariance': state.benchmark_invariance,
74-
'kkt': state.benchmark_kkt,
75-
'screen': state.benchmark_screen,
76-
}, f'/scratch/groups/candes/parth/benchmark{_i}.pkl')
70+
with open(f'/scratch/groups/candes/parth/benchmark{_i}.pkl', 'wb') as fd:
71+
pickle.dump({
72+
'fit_active': state.benchmark_fit_active,
73+
'fit_screen': state.benchmark_fit_screen,
74+
'invariance': state.benchmark_invariance,
75+
'kkt': state.benchmark_kkt,
76+
'screen': state.benchmark_screen,
77+
}, fd)
7778
_i += 1
7879
B = np.array(
7980
self.X_S @ state.betas.toarray()[0] #.reshape((S, -1), order='C')

Diff for: randalo/reductions.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,23 @@ def _matmul_impl(self, rhs):
9595
# TODO: Split cholesky/minres code paths into seperate ones
9696
if self.inverse_method == 'minres':
9797
if constraints is None and hessians is None:
98+
import time
99+
print("Line 1", time.monotonic(), flush=True)
98100
sqrt_d2loss_dy_hat2 = torch.sqrt(d2loss_dy_hat2)[:, None]
101+
print("Line 2", time.monotonic(), flush=True)
99102
tilde_X = sqrt_d2loss_dy_hat2 * X_mask
100-
return ((
101-
X @ minres(X.T @ X, (X.T @ (rhs_scaled / sqrt_d2loss_dy_hat2)))
102-
) / sqrt_d2loss_dy_hat2).to(rhs.dtype)
103+
print("Line 3", time.monotonic(), flush=True)
104+
p1 = (rhs_scaled / sqrt_d2loss_dy_hat2)
105+
print("Line 4", time.monotonic(), flush=True)
106+
p2 = X.T @ p1
107+
print("Line 5", time.monotonic(), flush=True)
108+
p3 = X.T @ X
109+
print("Line 6", time.monotonic(), flush=True)
110+
p4 = minres(p3, p2)
111+
print("Line 7", time.monotonic(), flush=True)
112+
p5 = X @ p4
113+
print("Line 8", time.monotonic(), flush=True)
114+
return (p5 / sqrt_d2loss_dy_hat2).to(rhs.dtype)
103115
else:
104116
raise NotImplementedError()
105117

Diff for: utils/run_data_collection.sh

-21
This file was deleted.

Diff for: utils/run_parameter_sweep.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#SBATCH --ntasks=1
55
#SBATCH --cpus-per-task=32
66
#SBATCH --mem=512GB
7-
#SBATCH --partition=candes
7+
#SBATCH --partition=candes,pilanci
88

99
BASE_DIR=$HOME/adelie_alo/benchmarking/lasso_sweep
1010
RESULTS_DIR=$BASE_DIR/results

Diff for: utils/sherlock_script.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
df = df.drop('ethnicity', axis=1)
2727
covars_dense = np.array(
2828
df[['age', 'age_squared', 'sex'] + [f'PC{i}' for i in range(1, 11)]].to_numpy(),
29-
dtype=np.float32)
30-
y = np.array(df['height'].to_numpy(), dtype=np.float32)
29+
dtype=np.float64)
30+
y = np.array(df['height'].to_numpy(), dtype=np.float64)
3131

3232
chromosomes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
3333

@@ -37,11 +37,12 @@
3737
[
3838
ad.matrix.snp_unphased(
3939
ad.io.snp_unphased(
40-
os.path.join(cache_dir, f"EUR_subset_chr{chr}.snpdat"),
41-
), n_threads=32, dtype=np.float32
40+
os.path.join(cache_dir, f"EUR_subset_chr{chr}.snpdat"), "mmap"
41+
), n_threads=32, dtype=np.float64
4242
)
4343
for chr in chromosomes],
4444
axis=1,
45+
n_threads=32
4546
)
4647
print(f'{X.shape=}')
4748

@@ -57,7 +58,7 @@
5758
print(f'{X_train.shape=}')
5859
print(f'{X_test.shape=}')
5960

60-
model_cache = f'/scratch/groups/candes/parth/fit_model_{task_id}.pkl'
61+
model_cache = f'/scratch/groups/candes/parth/fit_model_{task_id}_v3.pkl'
6162

6263
if os.path.exists(model_cache):
6364
class fake_state:
@@ -78,11 +79,11 @@ def __init__(self):
7879
ti_solve = time.monotonic()
7980
state = ad.grpnet(
8081
X=X_train,
81-
glm=ad.glm.gaussian(y_train, dtype=np.float32),
82+
glm=ad.glm.gaussian(y_train, dtype=np.float64),
8283
early_exit=False,
83-
min_ratio=1e-6,
84+
min_ratio=1e-9,
8485
n_threads=32,
85-
lmda_path_size=241,
86+
lmda_path_size=101,
8687
)
8788
tf_solve = time.monotonic()
8889

@@ -100,7 +101,7 @@ def __init__(self):
100101
ins[i] = loss(torch.from_numpy(y_hat_train[i]), torch.from_numpy(y_train))
101102

102103
ti_alo = time.monotonic()
103-
ld, alo, ts, r2 = ai.get_alo_for_sweep_v2(y_train, state, loss, 80)
104+
ld, alo, ts, r2 = ai.get_alo_for_sweep(y_train, state, loss, 20)
104105
tf_alo = time.monotonic()
105106

106-
#np.savez(sys.argv[1], alo_lamda=ld, full_lamda=state.lmda_path, alo=alo, oos=oos, in_sample=ins, ts=ts, r2=r2, solve_time=tf_solve - ti_solve, alo_time=tf_alo - ti_alo)
107+
np.savez(sys.argv[1], alo_lamda=ld, full_lamda=state.lmda_path, alo=alo, oos=oos, in_sample=ins, ts=ts, r2=r2, solve_time=tf_solve - ti_solve, alo_time=tf_alo - ti_alo)

0 commit comments

Comments
 (0)