From 15408538b0bdc3c297ff89ec2a4638f7d7d0f9e1 Mon Sep 17 00:00:00 2001
From: Jakob Nybo Nissen <jakobnybonissen@gmail.com>
Date: Mon, 22 Jul 2024 13:00:42 +0200
Subject: [PATCH] Remove abundance.py script

I didn't use this script, do not want to maintain it, and generally don't trust
that it provides reliable answers.
---
 src/abundance.py | 61 ------------------------------------------------
 1 file changed, 61 deletions(-)
 delete mode 100644 src/abundance.py

diff --git a/src/abundance.py b/src/abundance.py
deleted file mode 100644
index b5cad2f3..00000000
--- a/src/abundance.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import sys
-import os
-import argparse
-import numpy as np
-import vamb
-
-parser = argparse.ArgumentParser(
-    description="""Command-line bin abundance estimator.
-Print the median RPKM abundance for each bin in each sample to STDOUT.
-Will read the RPKM file into memory - beware.""",
-    formatter_class=argparse.RawDescriptionHelpFormatter,
-    add_help=False,
-)
-
-parser.add_argument("rpkmpath", help="Path to RPKM file")
-parser.add_argument("clusterspath", help="Path to clusters.tsv")
-parser.add_argument("headerpath", help="Path to list of headers")
-
-if len(sys.argv) == 1:
-    parser.print_help()
-    sys.exit()
-
-args = parser.parse_args()
-
-# Check files
-for infile in (args.rpkmpath, args.clusterspath, args.headerpath):
-    if not os.path.isfile(infile):
-        raise FileNotFoundError(infile)
-
-# Load in files
-with open(args.headerpath) as file:
-    indexof = {line.strip(): i for i, line in enumerate(file)}
-
-with open(args.clusterspath) as file:
-    clusters = vamb.vambtools.read_clusters(file)
-
-# Check that all clusters names are in headers:
-for cluster in clusters.values():
-    for header in cluster:
-        if header not in indexof:
-            raise KeyError("Header not found in headerlist: {}".format(header))
-
-# Load RPKM and check it
-rpkm = vamb.vambtools.read_npz(args.rpkmpath)
-nsamples = rpkm.shape[1]
-
-if len(indexof) != len(rpkm):
-    raise ValueError("Not the same number of headers as rows in RPKM file")
-
-# Now estimate abundances
-for clustername, cluster in clusters.items():
-    depths = np.empty((len(cluster), nsamples), dtype=np.float32)
-
-    for row, header in enumerate(cluster):
-        index = indexof[header]
-        depths[row] = rpkm[index]
-
-    median_depths = np.median(depths, axis=0)
-
-    print(clustername, end="\t")
-    print("\t".join([str(i) for i in median_depths]))