format and fix dasm test

matsengrp · Oct 23, 2024 · f1684fa · f1684fa
1 parent cc65c0f
commit f1684fa
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 9 deletions.
diff --git a/netam/dnsm.py b/netam/dnsm.py
@@ -140,7 +140,9 @@ def of_pcp_df(cls, pcp_df, branch_length_multiplier=5.0, multihit_model=None):
         )
 
     @classmethod
-    def train_val_datasets_of_pcp_df(cls, pcp_df, branch_length_multiplier=5.0, multihit_model=None):
+    def train_val_datasets_of_pcp_df(
+        cls, pcp_df, branch_length_multiplier=5.0, multihit_model=None
+    ):
         """Perform a train-val split based on the 'in_train' column.
 
         This is a class method so it works for subclasses.

diff --git a/netam/hit_class.py b/netam/hit_class.py
@@ -41,7 +41,6 @@ def parent_specific_hit_classes(parent_codon_idxs: torch.Tensor) -> torch.Tensor
     ]
 
 
-
 def apply_multihit_correction(
     parent_codon_idxs: torch.Tensor,
     codon_probs: torch.Tensor,

diff --git a/netam/molevol.py b/netam/molevol.py
@@ -18,6 +18,7 @@
 from netam.sequences import CODON_AA_INDICATOR_MATRIX
 
 import netam.sequences as sequences
+
 # torch.autograd.set_detect_anomaly(True)
 
 
@@ -521,7 +522,10 @@ def optimize_branch_length(
         torch.nn.utils.clip_grad_norm_([log_branch_length], max_norm=5.0)
         optimizer.step()
         if torch.isnan(log_branch_length):
-            print("branch length optimization resulted in NAN, previous log branch length:", prev_log_branch_length)
+            print(
+                "branch length optimization resulted in NAN, previous log branch length:",
+                prev_log_branch_length,
+            )
             if np.isclose(prev_log_branch_length.detach().numpy(), 0):
                 log_branch_length = prev_log_branch_length
                 nan_issue = True

diff --git a/netam/multihit.py b/netam/multihit.py
@@ -287,9 +287,7 @@ def child_codon_probs_corrected(
         torch.Tensor: A (codon_count,) shaped tensor containing the corrected probabilities of each child codon.
     """
 
-    corrected_per_parent_probs = model(
-        parent_codon_idxs, uncorrected_per_parent_probs
-    )
+    corrected_per_parent_probs = model(parent_codon_idxs, uncorrected_per_parent_probs)
     return child_codon_probs_from_per_parent_probs(
         corrected_per_parent_probs, child_codon_idxs
     )
@@ -374,9 +372,13 @@ def log_pcp_probability(log_branch_length):
 
             child_codon_idxs = reshape_for_codons(child_idxs)[codon_mask]
             parent_codon_idxs = reshape_for_codons(parent_idxs)[codon_mask]
-            return child_codon_probs_corrected(
-                codon_probs, parent_codon_idxs, child_codon_idxs, self.model
-            ).log().sum()
+            return (
+                child_codon_probs_corrected(
+                    codon_probs, parent_codon_idxs, child_codon_idxs, self.model
+                )
+                .log()
+                .sum()
+            )
 
         return optimize_branch_length(
             log_pcp_probability,

diff --git a/tests/test_dasm.py b/tests/test_dasm.py
@@ -14,10 +14,21 @@
     DASMDataset,
     zap_predictions_along_diagonal,
 )
+import multiprocessing as mp
+
+
+def force_spawn():
+    """Force the spawn start method for multiprocessing.
+
+    This is necessary to avoid conflicts with the internal OpenMP-based thread pool in
+    PyTorch.
+    """
+    mp.set_start_method("spawn", force=True)
 
 
 @pytest.fixture(scope="module")
 def dasm_burrito(pcp_df):
+    force_spawn()
     """Fixture that returns the DNSM Burrito object."""
     pcp_df["in_train"] = True
     pcp_df.loc[pcp_df.index[-15:], "in_train"] = False