diff --git a/doc/how_to_run.md b/doc/how_to_run.md
index 38755cf4..a8be2f36 100644
--- a/doc/how_to_run.md
+++ b/doc/how_to_run.md
@@ -21,8 +21,11 @@ $ for sample in 1 2 3; do
       strobealign -t 8 --aemb contigs.fna.gz ${sample}.{fw,rv}.fq.gz > aemb/${sample}.tsv;
   done
 
+$ # Paste the aemb files together to make a TSV file with given header
+$ cat <(echo -e "contigname\t1\t2\t3") paste aemb/1.tsv <(cut -f 2 aemb/2.tsv) <(cut -f 2 aemb/3.tsv) > abundance.tsv
+
 $ # Run Vamb using the contigs and the directory with abundance files
-$ vamb bin default --outdir vambout --fasta contigs.fna.gz --aemb aemb
+$ vamb bin default --outdir vambout --fasta contigs.fna.gz --abundance_tsv abundance.tsv
 ```
 
 #### TaxVamb
@@ -58,14 +61,15 @@ Future runs can then instead use:
 
 #### Abundance
 The abundance may be computed from:
-* [recommended ]A directory containing TSV files obtained by mapping reads from
-  each individual sample against the contig catalogue using `strobealign --aemb`
 * A directory of BAM files generated the same way, except using any aligner that
   produces a BAM file, e.g. `minimap2`
+* A TSV file with the header being "contigname" followed by one samplename per sample,
+  and the values in the TSV file being precomputed abundances.
+  These may be derived from `paste`ing together outputs from the tool `strobealign --aemb`
 
 Thus, it can be specified as:
 ```
---aemb dir_with_aemb_files
+--abundance_tsv abundance.tsv
 ```
 or
 ```
diff --git a/test/test_parsebam.py b/test/test_parsebam.py
index 7dfe619c..30ff11e2 100644
--- a/test/test_parsebam.py
+++ b/test/test_parsebam.py
@@ -1,8 +1,8 @@
 import unittest
 import io
 import numpy as np
-import random
 import tempfile
+from pathlib import Path
 
 import vamb
 import testtools
@@ -105,59 +105,54 @@ def test_save_load(self):
         self.assertEqual(abundance2.refhash, self.abundance.refhash)
         self.assertEqual(abundance2.minid, self.abundance.minid)
 
-    def test_parse_from_aemb(self):
-        abundance = vamb.parsebam.Abundance.from_aemb(
-            testtools.AEMB_FILES, self.comp_metadata
-        )
-
-        self.assertTrue(abundance.refhash == self.comp_metadata.refhash)
-        self.assertTrue(
-            (abundance.samplenames == [str(i) for i in testtools.AEMB_FILES]).all()
-        )
-
-        manual_matrix = []
-        for file in testtools.AEMB_FILES:
-            manual_matrix.append([])
-            with open(file) as f:
-                for line in f:
-                    (_, ab) = line.split()
-                    manual_matrix[-1].append(float(ab))
-        manual_matrix = list(map(list, zip(*manual_matrix)))
-
-        self.assertTrue((np.abs(manual_matrix - abundance.matrix) < 1e-5).all())
-
-        # Good headers, but in other order
-        names = list(self.comp_metadata.identifiers)
-        random.shuffle(names)
-        files = [self.make_aemb_file(names) for i in range(2)]
-        a = vamb.parsebam.Abundance.from_aemb(
-            [f.name for f in files], self.comp_metadata
-        )
-        self.assertIsInstance(a, vamb.parsebam.Abundance)
-
-    def make_aemb_file(self, headers):
-        file = tempfile.NamedTemporaryFile(mode="w+")
-        for header in headers:
-            n = random.random() * 10
-            print(header, "\t", str(n), file=file)
-        file.seek(0)
-        return file
-
-    def test_bad_aemb(self):
-        # One header is wrong
-        names = list(self.comp_metadata.identifiers)
-        names[-4] = names[-4] + "a"
-        files = [self.make_aemb_file(names) for i in range(2)]
-        with self.assertRaises(ValueError):
-            vamb.parsebam.Abundance.from_aemb(
-                [f.name for f in files], self.comp_metadata
+    def test_parse_from_tsv(self):
+        # Check it parses
+        with open(testtools.AEMB_FILES[0]) as file:
+            lines = [s.rstrip() for s in file]
+        for path in testtools.AEMB_FILES[1:]:
+            with open(path) as file:
+                for i, existing in enumerate(file):
+                    lines[i] += "\t" + existing.split("\t")[1].rstrip()
+
+        with tempfile.NamedTemporaryFile(mode="w+") as file:
+            print("contigname\tfile1\tfile2\tfile3", file=file)
+            for line in lines:
+                print(line, file=file)
+            file.seek(0)
+            abundance = vamb.parsebam.Abundance.from_tsv(
+                Path(file.name), self.comp_metadata
             )
 
-        # Too many headers
-        names = list(self.comp_metadata.identifiers)
-        names.append(names[0])
-        files = [self.make_aemb_file(names) for i in range(2)]
-        with self.assertRaises(ValueError):
-            vamb.parsebam.Abundance.from_aemb(
-                [f.name for f in files], self.comp_metadata
-            )
+        self.assertEqual(abundance.refhash, self.comp_metadata.refhash)
+        self.assertEqual(list(abundance.samplenames), ["file1", "file2", "file3"])
+
+        # Check values are alright
+        M = np.zeros_like(abundance.matrix)
+        for row, line in enumerate(lines):
+            for col, cell in enumerate(line.split("\t")[1:]):
+                M[row, col] = float(cell)
+        self.assertTrue((np.abs((M - abundance.matrix)) < 1e-6).all())
+
+        # Bad header order errors
+        lines[5], lines[4] = lines[4], lines[5]
+
+        with tempfile.NamedTemporaryFile(mode="w+") as file:
+            print("contigname\tfile1\tfile2\tfile3", file=file)
+            for line in lines:
+                print(line, file=file)
+            file.seek(0)
+            with self.assertRaises(ValueError):
+                vamb.parsebam.Abundance.from_tsv(Path(file.name), self.comp_metadata)
+
+        # Restore
+        lines[5], lines[4] = lines[4], lines[5]
+
+        # Too many lines
+        with tempfile.NamedTemporaryFile(mode="w+") as file:
+            print("contigname\tfile1\tfile2\tfile3", file=file)
+            for line in lines:
+                print(line, file=file)
+            print(lines[-2], file=file)
+            file.seek(0)
+            with self.assertRaises(ValueError):
+                vamb.parsebam.Abundance.from_tsv(Path(file.name), self.comp_metadata)
diff --git a/test/test_reclustering.py b/test/test_reclustering.py
new file mode 100644
index 00000000..0118f74b
--- /dev/null
+++ b/test/test_reclustering.py
@@ -0,0 +1,24 @@
+import unittest
+
+# Invariants:
+# It produces disjoint clusters, a subset of the input points
+
+# For CAMI dataset, compute comp, abundance, taxonomy, markers
+# Subset to e.g. 5 genera plus a few unclassified contigs
+
+# FASTA
+# Comp
+# Abundance
+# Markers
+# Taxonomy
+# Latent
+# Refined taxonomy
+
+
+class TestKmeansReclustering(unittest.TestCase):
+    pass
+    # Make markers + lengths
+    # Make taxonomy
+    # Create latent
+
+    # Initial clustering
diff --git a/vamb/__main__.py b/vamb/__main__.py
index 2a34286a..115d3f99 100755
--- a/vamb/__main__.py
+++ b/vamb/__main__.py
@@ -153,19 +153,13 @@ def __init__(self, paths: list[Path], min_alignment_id: Optional[float]):
         self.paths = paths
 
 
-class AEMBPaths(list):
-    __slots__ = ["paths"]
+class AbundanceTSVPath:
+    __slots__ = ["path"]
 
-    def __init__(self, dir: Path):
-        if not dir.is_dir():
-            raise NotADirectoryError(dir)
-        paths = [b for b in dir.iterdir() if b.is_file() and b.suffix == ".tsv"]
-        if len(paths) == 0:
-            raise ValueError(
-                f'No `.tsv` files found in --aemb argument "{dir}". '
-                "Make sure all TSV files in the directory ends with `.tsv`."
-            )
-        self.paths = paths
+    def __init__(self, path: Path):
+        if not path.is_file():
+            raise FileNotFoundError(path)
+        self.path = path
 
 
 class AbundanceOptions:
@@ -176,7 +170,7 @@ def are_args_present(args: argparse.Namespace) -> bool:
         return (
             isinstance(args.bampaths, list)
             or isinstance(args.bamdir, Path)
-            or isinstance(args.aemb, Path)
+            or isinstance(args.abundance_tsv, Path)
             or isinstance(args.abundancepath, Path)
         )
 
@@ -185,7 +179,7 @@ def from_args(cls, args: argparse.Namespace):
         return cls(
             typeasserted(args.bampaths, (list, type(None))),
             typeasserted(args.bamdir, (Path, type(None))),
-            typeasserted(args.aemb, (Path, type(None))),
+            typeasserted(args.abundance_tsv, (Path, type(None))),
             typeasserted(args.abundancepath, (Path, type(None))),
             typeasserted(args.min_alignment_id, (float, type(None))),
         )
@@ -194,25 +188,19 @@ def __init__(
         self,
         bampaths: Optional[list[Path]],
         bamdir: Optional[Path],
-        aemb: Optional[Path],
+        abundance_tsv: Optional[Path],
         abundancepath: Optional[Path],
         min_alignment_id: Optional[float],
     ):
-        assert isinstance(bampaths, (list, type(None)))
-        assert isinstance(bamdir, (Path, type(None)))
-        assert isinstance(aemb, (Path, type(None)))
-        assert isinstance(abundancepath, (Path, type(None)))
-        assert isinstance(min_alignment_id, (float, type(None)))
-
         # Make sure only one abundance input is there
         if (
             (bampaths is not None)
             + (abundancepath is not None)
             + (bamdir is not None)
-            + (aemb is not None)
+            + (abundance_tsv is not None)
         ) != 1:
             raise argparse.ArgumentTypeError(
-                "Must specify exactly one of BAM files, BAM dir, AEMB or abundance NPZ file input"
+                "Must specify exactly one of BAM files, BAM dir, TSV file or abundance NPZ file input"
             )
 
         if abundancepath is not None:
@@ -224,8 +212,8 @@ def __init__(
                 "The --bamfiles argument is deprecated. It works, but might be removed in future versions of Vamb. Please use --bamdir instead"
             )
             self.paths = BAMPaths(bampaths, min_alignment_id)
-        elif aemb is not None:
-            self.paths = AEMBPaths(aemb)
+        elif abundance_tsv is not None:
+            self.paths = AbundanceTSVPath(abundance_tsv)
 
 
 class BasicTrainingOptions:
@@ -913,11 +901,10 @@ def calc_abundance(
         logger.info("\tOrder of columns is:")
         for i, samplename in enumerate(abundance.samplenames):
             logger.info(f"\t{i:>6}: {samplename}")
-    elif isinstance(paths, AEMBPaths):
-        paths = list(paths.paths)
-        logger.info(f"\tParsing {len(paths)} AEMB TSV files")
-        abundance = vamb.parsebam.Abundance.from_aemb(
-            paths,
+    elif isinstance(paths, AbundanceTSVPath):
+        logger.info(f'\tParsing abundance from TSV at "{paths.path}"')
+        abundance = vamb.parsebam.Abundance.from_tsv(
+            paths.path,
             comp_metadata,
         )
         abundance.save(outdir.joinpath("abundance.npz"))
@@ -1687,10 +1674,10 @@ def add_abundance_arguments(subparser: argparse.ArgumentParser):
         help="Dir with .bam files to use",
     )
     abundanceos.add_argument(
-        "--aemb",
+        "--abundance_tsv",
         metavar="",
         type=Path,
-        help="Dir with .tsv files from strobealign --aemb",
+        help='Path to TSV file of precomputed abundances with header being "contigname(\\t<samplename>)*"',
     )
     abundanceos.add_argument(
         "--abundance",
diff --git a/vamb/parsebam.py b/vamb/parsebam.py
index b4f77d93..2d213029 100644
--- a/vamb/parsebam.py
+++ b/vamb/parsebam.py
@@ -236,55 +236,53 @@ def run_pycoverm(
         return (coverage, refhash)
 
     @classmethod
-    def from_aemb(
-        cls: type[A],
-        paths: list[Path],
-        comp_metadata: CompositionMetaData,
-    ) -> A:
-        # We cannot guarantee strobealign outputs the contigs in the right order,
-        # so we must reorder them.
-        identifier_to_index: dict[str, int] = {
-            s: i for (i, s) in enumerate(comp_metadata.identifiers)
-        }
-
-        result = _np.empty((len(identifier_to_index), len(paths)), dtype=_np.float32)
-        for col_num, path in enumerate(paths):
-            column = _np.full(len(identifier_to_index), float("nan"), dtype=_np.float32)
-            with vambtools.Reader(path) as file:
-                n_lines_seen = 0
-                for line in file:
-                    line = line.rstrip()
-                    if len(line) == 0:
-                        continue
-                    n_lines_seen += 1
-                    if n_lines_seen > len(column):
-                        raise ValueError(
-                            f'Too many rows in AEMB file "{path}", expected {len(column)}'
-                        )
-                    # strobealign reserves the right to add more columns in future versions
-                    (identifier, depth_str, *_) = line.split(maxsplit=2)
-                    index = identifier_to_index.get(identifier.decode())
-                    if index is None:
-                        raise ValueError(
-                            f'Identifier "{identifier}" seen in `strobealign --aemb` output file "{path}", '
-                            "but this identifier is not seen in the FASTA file.\n"
-                            "Verify that the AEMB file was generated by mapping to the same FASTA file "
-                            "as was input to Vamb."
-                        )
-                    column[index] = float(depth_str)
-
-            isnans = _np.isnan(column)
-            if isnans.any():
-                first_missing_index = _np.nonzero(isnans)[0][0]
-                missing_identifier = comp_metadata.identifiers[first_missing_index]
+    def from_tsv(cls: type[A], path: Path, comp_metadata: CompositionMetaData) -> A:
+        seen_identifiers: list[str] = []
+        with open(path) as file:
+            try:
+                header = next(file)
+            except StopIteration:
+                err = ValueError(f'Empty abundance TSV path at "{path}"')
+                raise err from None
+            columns = header.splitlines()[0].split("\t")
+            if len(columns) < 1:
                 raise ValueError(
-                    f'Identifier "{missing_identifier}" from the input FASTA file is missing from the AEMB file "{path}".'
+                    f'Expected at least 1 column in abundance TSV file at "{path}"'
+                )
+            if columns[0] != "contigname":
+                raise ValueError('First column in header must be "contigname"')
+            samples = columns[1:]
+            n_samples = len(samples)
+            matrix = _np.empty((comp_metadata.nseqs, n_samples), dtype=_np.float32)
+            # Since we read the header already and this is zero-indexed, we need to add 2
+            for line_no_minus_two, line in enumerate(file):
+                if line_no_minus_two == comp_metadata.nseqs:
+                    raise ValueError(
+                        f'Too many rows in abundance TSV file "{path}", expected {comp_metadata.nseqs + 1}, got at least {line_no_minus_two + 1}'
+                    )
+                stripped = line.rstrip()
+                if not line:
+                    continue
+                fields = stripped.split("\t")
+                if len(fields) != n_samples + 1:
+                    raise ValueError(
+                        f'In abundance TSV file "{path}", on line {line_no_minus_two + 2}, expected {n_samples + 1} columns, found {len(fields)}'
+                    )
+                for i in range(n_samples):
+                    matrix[line_no_minus_two, i] = float(fields[i + 1])
+                seen_identifiers.append(fields[0])
+
+            if line_no_minus_two + 1 != comp_metadata.nseqs:
+                raise ValueError(
+                    f'Too few rows in abundance TSV file "{path}", expected {comp_metadata.nseqs + 1}, got {line_no_minus_two + 1}'
                 )
-            result[:, col_num] = column
 
-        return cls(
-            result,
-            [str(i) for i in paths],
-            0.0,
+        vambtools.RefHasher.verify_refhash(
+            vambtools.RefHasher.hash_refnames(seen_identifiers),
             comp_metadata.refhash,
+            "abundance TSV",
+            "composition",
+            (seen_identifiers, comp_metadata.identifiers),
         )
+
+        return cls(matrix, samples, 0.0, comp_metadata.refhash)