nf-core · LaurenceKuhl · Aug 1, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,10 @@
 repos:
-  - repo: https://github.com/psf/black
-    rev: 23.1.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.3
     hooks:
-      - id: black
-  - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
+      - id: ruff # linter
+        args: [--fix, --exit-non-zero-on-fix] # sort imports and fix
+      - id: ruff-format # formatter
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: "v3.1.0"
     hooks:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Add module to classify samples by clonality ([#178](https://github.com/nf-core/crisprseq/pull/178))
+- Add DrugZ, a module for chemogenetic interaction ([#168](https://github.com/nf-core/crisprseq/pull/168))
 
 ### Fixed
 

diff --git a/README.md b/README.md
@@ -133,6 +133,7 @@ We thank the following people for their extensive assistance in the development
 - [@mschaffer-incyte](https://github.com/mschaffer-incyte)
 - [@SusiJo](https://github.com/SusiJo)
 - [@joannakraw](https://github.com/joannakraw)
+- [@metinyazar](https://github.com/metinyazar)
 
 ## Contributions and Support
 

diff --git a/bin/BAGEL.py b/bin/BAGEL.py
@@ -36,7 +36,6 @@
 SOFTWARE.
 """
 
-
 import sys
 import time
 
@@ -105,7 +104,7 @@ def func_linear(x, a, b):
 
 class Training:
     def __init__(self, X, n=None, cvnum=10):
-        if n == None:
+        if n is None:
             self._n = len(X)
         self._cvnum = cvnum
         self._bid = int(self._n / cvnum)
@@ -275,7 +274,7 @@ def calculate_fold_change(
         reads.drop(ctrl_labels, axis=1, inplace=True)
         ctrl_label_new = ";".join(ctrl_labels)
         reads[ctrl_label_new] = ctrl_sum
-    except:
+    except Exception:
         print(reads[ctrl_labels].sum(axis=1))
         print("Invalid input controls")
         sys.exit(1)
@@ -519,7 +518,7 @@ def calculate_bayes_factors(
                         mismatch_1bp_gene,
                     )
 
-        except:
+        except Exception:
             print("Please check align-info file")
             sys.exit(1)
 
@@ -551,7 +550,7 @@ def calculate_bayes_factors(
             print("Using column:  " + ", ".join(column_labels))
         # print "Using column:  " + ", ".join(map(str,column_list))
 
-        except:
+        except Exception:
             print("Invalid columns")
             sys.exit(1)
 
@@ -588,15 +587,15 @@ def calculate_bayes_factors(
     coreEss = []
 
     with open(essential_genes) as fin:
-        skip_header = fin.readline()
+        # skip_header = fin.readline()
         for line in fin:
             coreEss.append(line.rstrip().split("\t")[0])
     coreEss = np.array(coreEss)
     print("Number of reference essentials: " + str(len(coreEss)))
 
     nonEss = []
     with open(non_essential_genes) as fin:
-        skip_header = fin.readline()
+        # skip_header = fin.readline()
         for line in fin:
             nonEss.append(line.rstrip().split("\t")[0])
 
@@ -617,9 +616,9 @@ def calculate_bayes_factors(
                     for i in [0, 1]:
                         if linearray[i] not in network:
                             network[linearray[i]] = {}
-                        network[linearray[i]][
-                            linearray[-1 * (i - 1)]
-                        ] = 1  # save edge information
+                        network[linearray[i]][linearray[-1 * (i - 1)]] = (
+                            1  # save edge information
+                        )
                     edgecount += 1
 
         print("Number of network edges: " + str(edgecount))
@@ -667,18 +666,18 @@ def calculate_bayes_factors(
     elif train_method == 1:
         LOOPCOUNT = no_of_cross_validations  # 10-folds
 
-    if run_test_mode == True:
+    if run_test_mode:
         fp = open(output_file + ".traininfo", "w")
         fp.write("#1: Loopcount\n#2: Training set\n#3: Testset\n")
     # No resampling option
-    if no_resampling == True:
+    if no_resampling:
         print("# Caution: Resampling is disabled")
         LOOPCOUNT = 1
 
     print("Iter TrainEss TrainNon TestSet")
     sys.stdout.flush()
     for loop in range(LOOPCOUNT):
-        currentbf = {}
+        # currentbf = {}
         printstr = ""
         printstr += str(loop)
 
@@ -688,7 +687,7 @@ def calculate_bayes_factors(
         # training set
         # define essential and nonessential training sets:  arrays of indexes
         #
-        if no_resampling == True:
+        if no_resampling:
             # no resampling
             gene_train_idx = gene_idx
             gene_test_idx = gene_idx
@@ -787,7 +786,7 @@ def calculate_bayes_factors(
             slope, intercept, r_value, p_value, std_err = stats.linregress(
                 np.array(testx), np.array(testy)
             )
-        except:
+        except Exception:
             print("Regression failed. Check quality of the screen")
             sys.exit(1)
         #
@@ -801,7 +800,7 @@ def calculate_bayes_factors(
                     bayes_factor.append(slope * fc[rnatag][rep] + intercept)
                 bf[rnatag].append(bayes_factor)
 
-    if run_test_mode == True:
+    if run_test_mode:
         fp.close()
 
     num_obs = dict()
@@ -825,7 +824,7 @@ def calculate_bayes_factors(
                     bf_mean_rna_rep[rnatag][column_list[rep]] = np.mean(t[rep])
                     bf_std_rna_rep[rnatag][column_list[rep]] = np.std(t[rep])
 
-        if rna_level == False:
+        if not rna_level:
             sumofbf_list = list()
             for i in range(num_obs[g]):
                 sumofbf = 0.0
@@ -911,7 +910,7 @@ def calculate_bayes_factors(
                 % (coeff_df["Coefficient"][0], coeff_df["Coefficient"][1])
             )
 
-            if rna_level == False:
+            if not rna_level:
                 for g in gene2rna:
                     penalty = 0.0
                     for seqid in gene2rna[g]:
@@ -942,8 +941,8 @@ def calculate_bayes_factors(
     #
     #  NORMALIZE sgRNA COUNT
     #
-    if rna_level is False and flat_sgrna == True:
-        if filter_multi_target == True:
+    if rna_level is False and flat_sgrna:
+        if filter_multi_target:
             targetbf = bf_multi_corrected_gene
         else:
             targetbf = bf_mean
@@ -964,10 +963,8 @@ def calculate_bayes_factors(
     # calculate network scores
     #
 
-    if (
-        network_boost == True and rna_level == False
-    ):  # Network boost is only working for gene level
-        if run_test_mode == True:  # TEST MODE
+    if network_boost and not rna_level:  # Network boost is only working for gene level
+        if run_test_mode:  # TEST MODE
             fp = open(output_file + ".netscore", "w")
         print("\nNetwork score calculation start\n")
 
@@ -987,7 +984,7 @@ def calculate_bayes_factors(
         #
 
         for loop in range(LOOPCOUNT):
-            currentnbf = {}
+            # currentnbf = {}
             printstr = ""
             printstr += str(loop)
 
@@ -1073,7 +1070,7 @@ def calculate_bayes_factors(
 
             for g in genes_array[gene_test_idx]:
                 if g in networkscores:
-                    if run_test_mode == True:
+                    if run_test_mode:
                         fp.write(
                             "%s\t%f\t%f\n"
                             % (
@@ -1087,25 +1084,25 @@ def calculate_bayes_factors(
                     nbf = 0.0
 
                 boostedbf[g].append(bf_mean[g] + nbf)
-                if flat_sgrna == True:
+                if flat_sgrna:
                     boostedbf[g].append(bf_norm[g] + nbf)
 
-        if run_test_mode == True:
+        if run_test_mode:
             fp.close()
 
     #
     # print out results
     #
 
     # Equalizing factor (Replicates)
-    if flat_rep == True:
+    if flat_rep:
         eqf = equalise_rep_no / float(len(column_labels))
     else:
         eqf = 1
 
     # print out
     with open(output_file, "w") as fout:
-        if rna_level == True:
+        if rna_level:
             fout.write("RNA\tGENE")
             for i in range(len(column_list)):
                 fout.write(f"\t{column_labels[i]:s}")
@@ -1130,7 +1127,7 @@ def calculate_bayes_factors(
                         fout.write(f"{bf_std_rna_rep[rnatag][rep]:4.3f}\t")
 
                 # Sum BF of replicates
-                if filter_multi_target == True:
+                if filter_multi_target:
                     fout.write(
                         f"{float(bf_multi_corrected_rna[rnatag]) * eqf:4.3f}"
                     )  # eqf = equalizing factor for the number of replicates
@@ -1145,29 +1142,29 @@ def calculate_bayes_factors(
                 fout.write("\n")
         else:
             fout.write("GENE")
-            if network_boost == True:
+            if network_boost:
                 fout.write("\tBoostedBF")
                 if train_method == 0:
                     fout.write("\tSTD_BoostedBF")
             fout.write("\tBF")
             if train_method == 0:
                 fout.write("\tSTD\tNumObs")
-            if flat_sgrna == True:
+            if flat_sgrna:
                 fout.write("\tNormBF")
             fout.write("\n")
 
             for g in sorted(genes.keys()):
                 # Gene
                 fout.write(f"{g:s}")
-                if network_boost == True:
+                if network_boost:
                     boostedbf_mean = np.mean(boostedbf[g])
                     boostedbf_std = np.std(boostedbf[g])
                     fout.write(f"\t{float(boostedbf_mean) * eqf:4.3f}")
                     if train_method == 0:
                         fout.write(f"\t{float(boostedbf_std) * eqf:4.3f}")
 
                 # BF
-                if filter_multi_target == True:
+                if filter_multi_target:
                     fout.write(
                         f"\t{float(bf_multi_corrected_gene[g]) * eqf:4.3f}"
                     )  # eqf = equalizing factor for the number of replicates
@@ -1177,7 +1174,7 @@ def calculate_bayes_factors(
                 if train_method == 0:
                     fout.write(f"\t{float(bf_std[g]):4.3f}\t{num_obs[g]:d}")
                 # Normalized BF
-                if flat_sgrna == True:
+                if flat_sgrna:
                     fout.write(f"\t{float(bf_norm[g]):4.3f}")
 
                 fout.write("\n")

diff --git a/bin/drugz.py b/bin/drugz.py
@@ -34,6 +34,7 @@
 
 import argparse
 import logging as log
+
 # ------------------------------------
 # python modules
 # ------------------------------------
@@ -42,7 +43,6 @@
 import numpy as np
 import pandas as pd
 import scipy.stats as stats
-import six
 
 log.basicConfig(level=log.INFO)
 log_ = log.getLogger(__name__)
@@ -123,7 +123,7 @@ def calculate_fold_change(
     fc_replicate_id = "fc_{replicate}".format(replicate=replicate)
     fc_zscore_id = "zscore_" + fc_replicate_id
     empirical_bayes_id = "eb_std_{replicate}".format(replicate=replicate)
-    one_based_idx = replicate + 1
+    # one_based_idx = replicate + 1
 
     # Get the control and treatment sample ids for each replicate
     control_sample = control_samples[replicate]
@@ -191,9 +191,9 @@ def empirical_bayes(
 
         # If the current variation is greater than the one for previous bin then set variation equal to this
         if std_dev >= fold_change[empirical_bayes_id][i - 1]:
-            fold_change[empirical_bayes_id][
-                i : i + 25
-            ] = std_dev  # set new std in whole step size (25)
+            fold_change[empirical_bayes_id][i : i + 25] = (
+                std_dev  # set new std in whole step size (25)
+            )
         # Otherwise, set it equal to the variation of the previous bin
         # This allows variation estimate for each bin to only increase or stay the same as the previous
         else:
@@ -505,7 +505,7 @@ def drugZ_analysis(args):
     control_samples = args.control_samples.split(",")
     treatment_samples = args.drug_samples.split(",")
 
-    if args.remove_genes == None:
+    if args.remove_genes is None:
         remove_genes = []
     else:
         remove_genes = args.remove_genes.split(",")
@@ -527,8 +527,8 @@ def drugZ_analysis(args):
     fc_zscore_ids = list()
     fold_changes = list()
 
-    if (len(control_samples) != len(treatment_samples) and args.unpaired == True) or (
-        len(control_samples) == len(treatment_samples) and args.unpaired == True
+    if (len(control_samples) != len(treatment_samples) and args.unpaired) or (
+        len(control_samples) == len(treatment_samples) and args.unpaired
     ):
         log_.info("Calculating gene-level Zscores unpaired approach")
         fold_change2 = calculate_unpaired_foldchange(
@@ -561,7 +561,7 @@ def drugZ_analysis(args):
         log_.info("Writing output file unpaired results")
         write_drugZ_output(outfile=args.drugz_output_file, output=gene_normZ2)
 
-    elif len(control_samples) != len(treatment_samples) and args.unpaired == False:
+    elif len(control_samples) != len(treatment_samples) and not args.unpaired:
         log_.error(
             "Must have the same number of control and drug samples to run the paired approach"
         )
@@ -611,7 +611,7 @@ def drugZ_analysis(args):
 
         log_.info("Writing output file paired results")
         write_drugZ_output(outfile=args.drugz_output_file, output=gene_normZ)
-    if args.unpaired == True:
+    if args.unpaired:
         return gene_normZ2
     else:
         return gene_normZ

diff --git a/templates/alignment_summary.py b/templates/alignment_summary.py
@@ -6,7 +6,6 @@
 #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text.
 ############################
 
-import sys
 
 import pysam
 

diff --git a/templates/clustering_summary.py b/templates/clustering_summary.py
@@ -7,7 +7,6 @@
 ############################
 
 import gzip
-import sys
 
 import Bio
 from Bio import SeqIO