diff --git a/.github/workflows/cli_vamb.yml b/.github/workflows/cli_vamb.yml
index 6b86d6d9..686fb90a 100644
--- a/.github/workflows/cli_vamb.yml
+++ b/.github/workflows/cli_vamb.yml
@@ -58,6 +58,6 @@ jobs:
         cat outdir_taxometer/log.txt
     - name: Run k-means reclustering
       run: |
-        vamb recluster --outdir outdir_recluster --fasta catalogue_mock.fna.gz --rpkm abundance_mock.npz --latent_path outdir_taxvamb/vaevae_latent.npy --clusters_path outdir_taxvamb/vaevae_clusters.tsv --hmmout_path markers_mock.hmmout --algorithm kmeans --minfasta 200000
+        vamb recluster --outdir outdir_recluster --fasta catalogue_mock.fna.gz --rpkm abundance_mock.npz --latent_path outdir_taxvamb/vaevae_latent.npy --clusters_path outdir_taxvamb/vaevae_clusters_split.tsv --hmmout_path markers_mock.hmmout --algorithm kmeans --minfasta 200000
         ls -la outdir_recluster
         cat outdir_recluster/log.txt
diff --git a/test/test_parsecontigs.py b/test/test_parsecontigs.py
index 68a6bd38..753f3f80 100644
--- a/test/test_parsecontigs.py
+++ b/test/test_parsecontigs.py
@@ -2,7 +2,6 @@
 import unittest
 import random
 import numpy as np
-import warnings
 
 import testtools
 from vamb.parsecontigs import Composition, CompositionMetaData
@@ -41,26 +40,9 @@ def test_unique_names(self):
                 1000,
             )
 
-    # Does not warn
-    def test_nowarn(self):
-        with warnings.catch_warnings():
-            warnings.simplefilter("error", UserWarning)
-            Composition.from_file(self.large_io, minlength=250)
-
-    def test_warns_n_contigs(self):
-        with self.assertWarns(UserWarning):
-            Composition.from_file(self.io, minlength=250)
-
-    def test_warns_minlength(self):
-        with self.assertWarns(UserWarning):
-            Composition.from_file(self.large_io, minlength=275)
-
     def test_filter_minlength(self):
         minlen = 500
-
-        with self.assertWarns(UserWarning):
-            composition = Composition.from_file(self.io, minlength=450)
-
+        composition = Composition.from_file(self.io, minlength=450)
         md = composition.metadata
         hash1 = md.refhash
 
@@ -99,8 +81,7 @@ def test_minlength(self):
             Composition.from_file(self.io, minlength=3)
 
     def test_properties(self):
-        with self.assertWarns(UserWarning):
-            composition = Composition.from_file(self.io, minlength=420)
+        composition = Composition.from_file(self.io, minlength=420)
         passed = list(filter(lambda x: len(x.sequence) >= 420, self.records))
 
         self.assertEqual(composition.nseqs, len(composition.metadata.identifiers))
@@ -122,8 +103,7 @@ def test_properties(self):
 
     def test_save_load(self):
         buf = io.BytesIO()
-        with self.assertWarns(UserWarning):
-            composition_1 = Composition.from_file(self.io)
+        composition_1 = Composition.from_file(self.io)
         md1 = composition_1.metadata
         composition_1.save(buf)
         buf.seek(0)
@@ -153,10 +133,8 @@ def test_windows_newlines(self):
 
         buf1.seek(0)
         buf2.seek(0)
-        with self.assertWarns(UserWarning):
-            comp1 = Composition.from_file(buf1)
-        with self.assertWarns(UserWarning):
-            comp2 = Composition.from_file(buf2)
+        comp1 = Composition.from_file(buf1)
+        comp2 = Composition.from_file(buf2)
 
         self.assertEqual(comp1.metadata.refhash, comp2.metadata.refhash)
         self.assertTrue(np.all(comp1.matrix == comp2.matrix))
diff --git a/test/test_results.py b/test/test_results.py
index 59fce4ea..d38ccf55 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -30,9 +30,8 @@ def setUp(self):
         self.io.seek(0)
 
     def test_runs(self):
-        with self.assertRaises(UserWarning):
-            comp = vamb.parsecontigs.Composition.from_file(self.io)
-            self.assertIsInstance(comp, vamb.parsecontigs.Composition)
+        comp = vamb.parsecontigs.Composition.from_file(self.io)
+        self.assertIsInstance(comp, vamb.parsecontigs.Composition)
 
     if TEST_UNSTABLE_HASHES:
 
diff --git a/test/test_vambtools.py b/test/test_vambtools.py
index 837ee36b..6ab0106b 100644
--- a/test/test_vambtools.py
+++ b/test/test_vambtools.py
@@ -10,8 +10,11 @@
 import numpy as np
 import string
 import torch
+import pathlib
+import shutil
 
 import vamb
+from vamb.vambtools import BinSplitter
 import testtools
 
 PARENTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -220,7 +223,7 @@ def test_bad_files(self):
         # String input
         with self.assertRaises(TypeError):
             data = ">abc\nTAG\na\nAC".splitlines()
-            list(vamb.vambtools.byte_iterfasta(data))
+            list(vamb.vambtools.byte_iterfasta(data))  # type:ignore
 
     # Various correct formats
     def test_good_files(self):
@@ -379,16 +382,48 @@ class TestBinSplit(unittest.TestCase):
         ("s12-bin2", {"s12-c0"}),
     ]
 
+    def test_inert(self):
+        self.assertEqual(
+            BinSplitter("").splitter, BinSplitter.inert_splitter().splitter
+        )
+
     def test_split(self):
-        self.assertEqual(list(vamb.vambtools.binsplit(self.before, "-")), self.after)
+        self.assertEqual(list(BinSplitter("-").binsplit(self.before)), self.after)
 
     def test_badsep(self):
         with self.assertRaises(KeyError):
-            list(vamb.vambtools.binsplit(self.before, "2"))
+            list(BinSplitter("2").binsplit(self.before))
 
     def test_badtype(self):
-        with self.assertRaises(TypeError):
-            list(vamb.vambtools.binsplit([(1, [2])], ""))
+        with self.assertRaises(Exception):
+            list(BinSplitter("x").binsplit([(1, [2])]))  # type:ignore
+
+    def test_nosplit(self):
+        self.assertEqual(
+            list(BinSplitter("").binsplit(self.before)),
+            [(k, set(s)) for (k, s) in self.before],
+        )
+
+    def test_initialize(self):
+        # Nothing happens to an inert splitter
+        b = BinSplitter.inert_splitter()
+        s = b.splitter
+        b.initialize([""])
+        self.assertEqual(s, b.splitter)
+
+        b = BinSplitter("X")
+        with self.assertRaises(ValueError):
+            b.initialize(["AXC", "S1C2"])
+
+        b = BinSplitter(None)
+        with self.assertWarns(UserWarning):
+            b.initialize(["S1C2", "ABC"])
+
+        b = BinSplitter(None)
+        b.initialize(["S1C2", "KMCPLK"])
+
+        b = BinSplitter("XYZ")
+        b.initialize(["ABXYZCD", "KLMXYZA"])
 
 
 class TestConcatenateFasta(unittest.TestCase):
@@ -459,8 +494,8 @@ def setUp(self):
         self.io.truncate(0)
         self.io.seek(0)
 
-    def linesof(self, str):
-        return list(filter(lambda x: not x.startswith("#"), str.splitlines()))
+    def linesof(self, string: str):
+        return list(filter(lambda x: not x.startswith("#"), string.splitlines()))
 
     def conforms(self, str, clusters):
         lines = self.linesof(str)
@@ -488,46 +523,30 @@ def conforms(self, str, clusters):
         self.assertEqual(read_names, printed_names)
 
     def test_not_writable(self):
-        buf = io.BufferedReader(io.BytesIO(b""))
-        with self.assertRaises(ValueError):
-            vamb.vambtools.write_clusters(buf, self.test_clusters)
-
-    def test_invalid_max_clusters(self):
-        with self.assertRaises(ValueError):
-            vamb.vambtools.write_clusters(self.io, self.test_clusters, max_clusters=0)
-
+        buf = io.BufferedReader(io.BytesIO(b""))  # type:ignore
         with self.assertRaises(ValueError):
-            vamb.vambtools.write_clusters(self.io, self.test_clusters, max_clusters=-11)
-
-    def test_header_has_newline(self):
-        with self.assertRaises(ValueError):
-            vamb.vambtools.write_clusters(self.io, self.test_clusters, header="foo\n")
+            vamb.vambtools.write_clusters(buf, self.test_clusters)  # type:ignore
 
     def test_normal(self):
-        vamb.vambtools.write_clusters(self.io, self.test_clusters, header="someheader")
-        self.assertTrue(self.io.getvalue().startswith("# someheader"))
+        vamb.vambtools.write_clusters(self.io, self.test_clusters)
         self.conforms(self.io.getvalue(), self.test_clusters)
 
     def test_max_clusters(self):
-        vamb.vambtools.write_clusters(self.io, self.test_clusters, max_clusters=2)
+        vamb.vambtools.write_clusters(self.io, self.test_clusters[:2])
         lines = self.linesof(self.io.getvalue())
         self.assertEqual(len(lines), 9)
         self.conforms(self.io.getvalue(), self.test_clusters[:2])
 
-    def test_min_size(self):
-        vamb.vambtools.write_clusters(self.io, self.test_clusters, min_size=5)
-        lines = self.linesof(self.io.getvalue())
-        self.assertEqual(len(lines), 6)
-        self.conforms(self.io.getvalue(), self.test_clusters[1:2])
-
 
 class TestWriteBins(unittest.TestCase):
     file = io.BytesIO()
     N_BINS = 10
     minsize = 5 * 175  # mean of bin size
-    dirname = os.path.join(
-        tempfile.gettempdir(),
-        "".join(random.choices(string.ascii_letters + string.digits, k=10)),
+    dir = pathlib.Path(
+        os.path.join(
+            tempfile.gettempdir(),
+            "".join(random.choices(string.ascii_letters + string.digits, k=10)),
+        )
     )
 
     @classmethod
@@ -552,7 +571,7 @@ def setUp(self):
 
     def tearDown(self):
         try:
-            os.rmdir(self.dirname)
+            shutil.rmtree(self.dir)
         except FileNotFoundError:
             pass
 
@@ -560,26 +579,32 @@ def test_bad_params(self):
         # Too many bins for maxbins
         with self.assertRaises(ValueError):
             vamb.vambtools.write_bins(
-                self.dirname, self.bins, self.file, maxbins=self.N_BINS - 1
+                self.dir, self.bins, self.file, maxbins=self.N_BINS - 1
             )
 
         # Negative minsize
         with self.assertRaises(ValueError):
             vamb.vambtools.write_bins(
-                self.dirname, self.bins, self.file, maxbins=self.N_BINS + 1, minsize=-1
+                self.dir, self.bins, self.file, maxbins=self.N_BINS + 1, minsize=-1
             )
 
         # Parent does not exist
         with self.assertRaises(NotADirectoryError):
             vamb.vambtools.write_bins(
-                "svogew/foo", self.bins, self.file, maxbins=self.N_BINS + 1
+                pathlib.Path("svogew/foo"),
+                self.bins,
+                self.file,
+                maxbins=self.N_BINS + 1,
             )
 
-        # Target file already exists
+        # Target is an existing file
         with self.assertRaises(FileExistsError):
             with tempfile.NamedTemporaryFile() as file:
                 vamb.vambtools.write_bins(
-                    file.name, self.bins, self.file, maxbins=self.N_BINS + 1
+                    pathlib.Path(file.name),
+                    self.bins,
+                    self.file,
+                    maxbins=self.N_BINS + 1,
                 )
 
         # One contig missing from fasta dict
@@ -587,13 +612,17 @@ def test_bad_params(self):
             bins = {k: v.copy() for k, v in self.bins.items()}
             next(iter(bins.values())).add("a_new_bin_which_does_not_exist")
             vamb.vambtools.write_bins(
-                self.dirname, bins, self.file, maxbins=self.N_BINS + 1
+                self.dir, bins, self.file, maxbins=self.N_BINS + 1
             )
 
     def test_round_trip(self):
         with tempfile.TemporaryDirectory() as dir:
             vamb.vambtools.write_bins(
-                dir, self.bins, self.file, maxbins=self.N_BINS, minsize=self.minsize
+                pathlib.Path(dir),
+                self.bins,
+                self.file,
+                maxbins=self.N_BINS,
+                minsize=self.minsize,
             )
 
             reconstructed_bins: dict[str, set[str]] = dict()
diff --git a/vamb/__main__.py b/vamb/__main__.py
index 4ed879db..5ed105f9 100755
--- a/vamb/__main__.py
+++ b/vamb/__main__.py
@@ -11,6 +11,7 @@
 import datetime
 import time
 import random
+import itertools
 from math import isfinite
 from typing import Optional, IO, Tuple, Union
 from pathlib import Path
@@ -50,14 +51,19 @@ class CompositionPath(type(Path())):
 
 
 class CompositionOptions:
-    __slots__ = ["path", "min_contig_length"]
+    __slots__ = ["path", "min_contig_length", "warn_on_few_seqs"]
 
     def __init__(
-        self, fastapath: Optional[Path], npzpath: Optional[Path], min_contig_length: int
+        self,
+        fastapath: Optional[Path],
+        npzpath: Optional[Path],
+        min_contig_length: int,
+        warn_on_few_seqs: bool,
     ):
         assert isinstance(fastapath, (Path, type(None)))
         assert isinstance(npzpath, (Path, type(None)))
         assert isinstance(min_contig_length, int)
+        assert isinstance(warn_on_few_seqs, bool)
 
         if min_contig_length < 250:
             raise argparse.ArgumentTypeError(
@@ -79,6 +85,7 @@ def __init__(
             assert npzpath is not None
             self.path = CompositionPath(npzpath)
         self.min_contig_length = min_contig_length
+        self.warn_on_few_seqs = warn_on_few_seqs
 
 
 class AbundancePath(type(Path())):
@@ -238,7 +245,7 @@ class ReclusteringOptions:
         "latent_path",
         "clusters_path",
         "hmmout_path",
-        "binsplit_separator",
+        "binsplitter",
         "algorithm",
     ]
 
@@ -263,9 +270,9 @@ def __init__(
 
         self.latent_path = latent_path
         self.clusters_path = clusters_path
-        self.binsplit_separator = binsplit_separator
         self.hmmout_path = hmmout_path
         self.algorithm = algorithm
+        self.binsplitter = vamb.vambtools.BinSplitter(binsplit_separator)
 
 
 class EncoderOptions:
@@ -402,22 +409,19 @@ class ClusterOptions:
     __slots__ = [
         "window_size",
         "min_successes",
-        "min_cluster_size",
         "max_clusters",
-        "binsplit_separator",
+        "binsplitter",
     ]
 
     def __init__(
         self,
         window_size: int,
         min_successes: int,
-        min_cluster_size: int,
         max_clusters: Optional[int],
         binsplit_separator: Optional[str],
     ):
         assert isinstance(window_size, int)
         assert isinstance(min_successes, int)
-        assert isinstance(min_cluster_size, int)
         assert isinstance(max_clusters, (int, type(None)))
         assert isinstance(binsplit_separator, (str, type(None)))
 
@@ -425,10 +429,6 @@ def __init__(
             raise argparse.ArgumentTypeError("Window size must be at least 1")
         self.window_size = window_size
 
-        if min_cluster_size < 1:
-            raise argparse.ArgumentTypeError("Minimum cluster size must be at least 0")
-        self.min_cluster_size = min_cluster_size
-
         if min_successes < 1 or min_successes > window_size:
             raise argparse.ArgumentTypeError(
                 "Minimum cluster size must be in 1:windowsize"
@@ -438,12 +438,7 @@ def __init__(
         if max_clusters is not None and max_clusters < 1:
             raise argparse.ArgumentTypeError("Max clusters must be at least 1")
         self.max_clusters = max_clusters
-
-        if binsplit_separator is not None and len(binsplit_separator) == 0:
-            raise argparse.ArgumentTypeError(
-                "Binsplit separator cannot be an empty string"
-            )
-        self.binsplit_separator = binsplit_separator
+        self.binsplitter = vamb.vambtools.BinSplitter(binsplit_separator)
 
 
 class VambOptions:
@@ -514,6 +509,7 @@ def log(string: str, logfile: IO[str], indent: int = 0):
 def calc_tnf(
     options: CompositionOptions,
     outdir: Path,
+    binsplitter: vamb.vambtools.BinSplitter,
     logfile: IO[str],
 ) -> vamb.parsecontigs.Composition:
     begintime = time.time()
@@ -535,6 +531,30 @@ def calc_tnf(
             )
         composition.save(outdir.joinpath("composition.npz"))
 
+    binsplitter.initialize(composition.metadata.identifiers)
+
+    if options.warn_on_few_seqs and composition.nseqs < 20_000:
+        message = (
+            f"WARNING: Kept only {composition.nseqs} sequences from FASTA file. "
+            "We normally expect 20,000 sequences or more to prevent overfitting. "
+            "As a deep learning model, VAEs are prone to overfitting with too few sequences. "
+            "You may want to  bin more samples as a time, lower the beta parameter, "
+            "or use a different binner altogether."
+        )
+        vamb.vambtools.log_and_warn(message, logfile=logfile)
+
+    # Warn the user if any contigs have been observed, which is smaller
+    # than the threshold.
+    if not np.all(composition.metadata.mask):
+        n_removed = len(composition.metadata.mask) - np.sum(composition.metadata.mask)
+        message = (
+            f"WARNING: The minimum sequence length has been set to {options.min_contig_length}, "
+            f"but {n_removed} sequences fell below this threshold and was filtered away."
+            "\nBetter results are obtained if the sequence file is filtered to the minimum "
+            "sequence length before mapping."
+        )
+        vamb.vambtools.log_and_warn(message, logfile=logfile)
+
     elapsed = round(time.time() - begintime, 2)
     print("", file=logfile)
     log(
@@ -703,18 +723,19 @@ def trainaae(
     return latent, clusters_y_dict
 
 
-def cluster(
+def cluster_and_write_files(
+    vamb_options: VambOptions,
     cluster_options: ClusterOptions,
-    clusterspath: Path,
+    base_clusters_name: str,  # e.g. /foo/bar/vae -> /foo/bar/vae_unsplit.tsv
+    bins_dir: Path,
+    fasta_catalogue: Path,
     latent: np.ndarray,
-    contignames: Sequence[str],  # of dtype object
-    lengths: Sequence[int],  # of dtype object
-    vamb_options: VambOptions,
+    sequence_names: Sequence[str],
+    sequence_lens: Sequence[int],
     logfile: IO[str],
-    cluster_prefix: str,
-) -> None:
+):
     begintime = time.time()
-
+    # Create cluser iterator
     log("\nClustering", logfile)
     log(f"Windowsize: {cluster_options.window_size}", logfile, 1)
     log(
@@ -723,127 +744,117 @@ def cluster(
         1,
     )
     log(f"Max clusters: {cluster_options.max_clusters}", logfile, 1)
-    log(f"Min cluster size: {cluster_options.min_cluster_size}", logfile, 1)
     log(f"Use CUDA for clustering: {vamb_options.cuda}", logfile, 1)
-    log(
-        "Separator: {}".format(
-            None
-            if cluster_options.binsplit_separator is None
-            else ('"' + cluster_options.binsplit_separator + '"')
-        ),
-        logfile,
-        1,
-    )
+    log(f"Binsplitter: {cluster_options.binsplitter.log_string()}", logfile, 1)
 
     cluster_generator = vamb.cluster.ClusterGenerator(
         latent,
-        lengths,  # type:ignore
+        sequence_lens,  # type:ignore
         windowsize=cluster_options.window_size,
         minsuccesses=cluster_options.min_successes,
         destroy=True,
         normalized=False,
-        # cuda=vamb_options.cuda,
-        cuda=False,  # disabled until clustering is fixed
+        cuda=vamb_options.cuda,
         rng_seed=vamb_options.seed,
     )
 
     renamed = (
-        (str(cluster_index + 1), {contignames[i] for i in members})
+        (str(cluster_index + 1), {sequence_names[i] for i in members})
         for (cluster_index, (_, members)) in enumerate(
             map(lambda x: x.as_tuple(), cluster_generator)
         )
     )
 
-    # Binsplit if given a separator
-    if cluster_options.binsplit_separator is not None:
-        maybe_split = vamb.vambtools.binsplit(
-            renamed, cluster_options.binsplit_separator
-        )
-    else:
-        maybe_split = renamed
-
-    with open(clusterspath, "w") as clustersfile:
-        clusternumber, ncontigs = vamb.vambtools.write_clusters(
-            clustersfile,
-            maybe_split,
-            max_clusters=cluster_options.max_clusters,
-            min_size=cluster_options.min_cluster_size,
-            rename=False,
-            cluster_prefix=cluster_prefix,
-        )
-
-    print("", file=logfile)
-    log(f"Clustered {ncontigs} contigs in {clusternumber} bins", logfile, 1)
-
+    # This also works correctly when max_clusters is None
+    first_clusters = itertools.islice(renamed, cluster_options.max_clusters)
+    unsplit_clusters = dict(first_clusters)
     elapsed = round(time.time() - begintime, 2)
     log(f"Clustered contigs in {elapsed} seconds.", logfile, 1)
 
+    write_clusters_and_bins(
+        vamb_options,
+        cluster_options.binsplitter,
+        base_clusters_name,
+        bins_dir,
+        fasta_catalogue,
+        unsplit_clusters,
+        sequence_names,
+        sequence_lens,
+        logfile,
+    )
 
-def write_fasta(
-    outdir: Path,
-    clusterspath: Path,
-    fastapath: Path,
-    contignames: Sequence[str],  # of object
-    contiglengths: np.ndarray,
-    minfasta: int,
+
+def write_clusters_and_bins(
+    vamb_options: VambOptions,
+    binsplitter: vamb.vambtools.BinSplitter,
+    base_clusters_name: str,  # e.g. /foo/bar/vae -> /foo/bar/vae_unsplit.tsv
+    bins_dir: Path,
+    fasta_catalogue: Path,
+    clusters: dict[str, set[str]],
+    sequence_names: Sequence[str],
+    sequence_lens: Sequence[int],
     logfile: IO[str],
-    separator: Optional[str],
-) -> None:
+):
+    # Write unsplit clusters to file
     begintime = time.time()
-
-    log("\nWriting FASTA files", logfile)
-    log("Minimum FASTA size: " + str(minfasta), logfile, 1)
-    assert len(contignames) == len(contiglengths)
-
-    lengthof = dict(zip(contignames, contiglengths))
-    filtered_clusters: dict[str, set[str]] = dict()
-
-    with open(clusterspath) as file:
-        clusters = vamb.vambtools.read_clusters(file)
-
-    for cluster, contigs in clusters.items():
-        size = sum(lengthof[contig] for contig in contigs)
-        if size >= minfasta:
-            filtered_clusters[cluster] = clusters[cluster]
-
-    del lengthof, clusters
-    keep: set[str] = set()
-    for contigs in filtered_clusters.values():
-        keep.update(set(contigs))
-
-    with vamb.vambtools.Reader(fastapath) as file:
-        vamb.vambtools.write_bins(
-            outdir.joinpath("bins"),
-            filtered_clusters,
-            file,
-            maxbins=None,
-            separator=separator,
+    unsplit_path = Path(base_clusters_name + "_unsplit.tsv")
+    with open(unsplit_path, "w") as file:
+        (n_unsplit_clusters, n_contigs) = vamb.vambtools.write_clusters(
+            file, clusters.items()
         )
 
-    ncontigs = sum(map(len, filtered_clusters.values()))
-    nfiles = len(filtered_clusters)
-    print("", file=logfile)
-    log(f"Wrote {ncontigs} contigs to {nfiles} FASTA files", logfile, 1)
+    # Open unsplit clusters and split them
+    if binsplitter.splitter is not None:
+        split_path = Path(base_clusters_name + "_split.tsv")
+        clusters = dict(binsplitter.binsplit(clusters.items()))
+        with open(split_path, "w") as file:
+            (n_split_clusters, _) = vamb.vambtools.write_clusters(
+                file, clusters.items()
+            )
+        msg = f"Clustered {n_contigs} contigs in {n_split_clusters} split bins ({n_unsplit_clusters} clusters)"
+    else:
+        msg = f"Clustered {n_contigs} contigs in {n_unsplit_clusters} unsplit bins"
 
+    log("\n" + msg, logfile, 1)
     elapsed = round(time.time() - begintime, 2)
-    log(f"Wrote FASTA in {elapsed} seconds.", logfile, 1)
+    log(f"Wrote clusters file(s) in {elapsed} seconds.", logfile, 1)
+
+    # Write bins, if necessary
+    if vamb_options.min_fasta_output_size is not None:
+        starttime = time.time()
+        filtered_clusters: dict[str, set[str]] = dict()
+        assert len(sequence_lens) == len(sequence_names)
+        sizeof = dict(zip(sequence_names, sequence_lens))
+        for binname, contigs in clusters.items():
+            if sum(sizeof[c] for c in contigs) >= vamb_options.min_fasta_output_size:
+                filtered_clusters[binname] = contigs
+
+        with vamb.vambtools.Reader(fasta_catalogue) as file:
+            vamb.vambtools.write_bins(
+                bins_dir,
+                filtered_clusters,
+                file,
+                None,
+                vamb_options.min_fasta_output_size,
+            )
+        elapsed = round(time.time() - starttime, 2)
+        n_bins = len(filtered_clusters)
+        n_contigs = sum(len(v) for v in filtered_clusters.values())
+        log(
+            f"\nWrote {n_bins} bins with {n_contigs} sequences in {elapsed} seconds.",
+            logfile,
+            1,
+        )
 
 
 def load_composition_and_abundance(
     vamb_options: VambOptions,
     comp_options: CompositionOptions,
     abundance_options: AbundanceOptions,
+    binsplitter: vamb.vambtools.BinSplitter,
     logfile: IO[str],
 ) -> Tuple[vamb.parsecontigs.Composition, vamb.parsebam.Abundance]:
-    log("Starting Vamb version " + ".".join(map(str, vamb.__version__)), logfile)
-    log("Date and time is " + str(datetime.datetime.now()), logfile, 1)
-    log("Random seed is " + str(vamb_options.seed), logfile, 1)
-    begintime = time.time()
-
-    # Get TNFs, save as npz
-    composition = calc_tnf(comp_options, vamb_options.out_dir, logfile)
-
-    # Parse BAMs, save as npz
+    composition = calc_tnf(comp_options, vamb_options.out_dir, binsplitter, logfile)
     abundance = calc_rpkm(
         abundance_options,
         vamb_options.out_dir,
@@ -851,12 +862,6 @@ def load_composition_and_abundance(
         vamb_options.n_threads,
         logfile,
     )
-    time_generating_input = round(time.time() - begintime, 2)
-    log(
-        f"\nTNF and coabundances generated in {time_generating_input} seconds.",
-        logfile,
-        1,
-    )
     return (composition, abundance)
 
 
@@ -874,12 +879,11 @@ def run(
     vae_training_options = training_options.vae_options
     aae_training_options = training_options.aae_options
 
-    begintime = time.time()
-
     composition, abundance = load_composition_and_abundance(
         vamb_options=vamb_options,
         comp_options=comp_options,
         abundance_options=abundance_options,
+        binsplitter=cluster_options.binsplitter,
         logfile=logfile,
     )
     data_loader = vamb.encode.make_dataloader(
@@ -938,119 +942,48 @@ def run(
     if vae_options is not None:
         assert latent is not None
         assert comp_metadata.nseqs == len(latent)
-        # Cluster, save tsv file
-        clusterspath = vamb_options.out_dir.joinpath("vae_clusters.tsv")
-        cluster(
+        cluster_and_write_files(
+            vamb_options,
             cluster_options,
-            clusterspath,
+            str(vamb_options.out_dir.joinpath("vae_clusters")),
+            vamb_options.out_dir.joinpath("bins"),
+            comp_options.path,
             latent,
             comp_metadata.identifiers,  # type:ignore
             comp_metadata.lengths,  # type:ignore
-            vamb_options,
             logfile,
-            "vae_",
         )
-        log("VAE latent clustered", logfile, 1)
-
         del latent
-        fin_cluster_latent = time.time()
-
-        if vamb_options.min_fasta_output_size is not None:
-            path = comp_options.path
-            assert isinstance(path, FASTAPath)
-            write_fasta(
-                vamb_options.out_dir,
-                clusterspath,
-                path,
-                comp_metadata.identifiers,  # type:ignore
-                comp_metadata.lengths,
-                vamb_options.min_fasta_output_size,
-                logfile,
-                separator=cluster_options.binsplit_separator,
-            )
-
-        writing_bins_time = round(time.time() - fin_cluster_latent, 2)
-        log(f"VAE bins written in {writing_bins_time} seconds.", logfile, 1)
 
     if aae_options is not None:
         assert latent_z is not None
         assert clusters_y_dict is not None
         assert comp_metadata.nseqs == len(latent_z)
-        # Cluster, save tsv file
-        clusterspath = vamb_options.out_dir.joinpath("aae_z_clusters.tsv")
-
-        cluster(
+        cluster_and_write_files(
+            vamb_options,
             cluster_options,
-            clusterspath,
+            str(vamb_options.out_dir.joinpath("aae_z_clusters")),
+            vamb_options.out_dir.joinpath("bins"),
+            comp_options.path,
             latent_z,
             comp_metadata.identifiers,  # type:ignore
             comp_metadata.lengths,  # type:ignore
-            vamb_options,
             logfile,
-            "aae_z_",
         )
-
-        fin_cluster_latent_z = time.time()
-        log("AAE z latent clustered.", logfile, 1)
-
         del latent_z
-
-        if vamb_options.min_fasta_output_size is not None:
-            path = comp_options.path
-            assert isinstance(path, FASTAPath)
-            write_fasta(
-                vamb_options.out_dir,
-                clusterspath,
-                path,
-                comp_metadata.identifiers,  # type:ignore
-                comp_metadata.lengths,
-                vamb_options.min_fasta_output_size,
-                logfile,
-                separator=cluster_options.binsplit_separator,
-            )
-        time_writing_bins_z = time.time()
-        writing_bins_time_z = round(time_writing_bins_z - fin_cluster_latent_z, 2)
-        log(f"AAE z bins written in {writing_bins_time_z} seconds.", logfile, 1)
-
-        clusterspath = vamb_options.out_dir.joinpath("aae_y_clusters.tsv")
-        # Binsplit if given a separator
-        if cluster_options.binsplit_separator is not None:
-            maybe_split = vamb.vambtools.binsplit(
-                clusters_y_dict.items(), cluster_options.binsplit_separator
-            )
-        else:
-            maybe_split = clusters_y_dict.items()
-        with open(clusterspath, "w") as clustersfile:
-            clusternumber, ncontigs = vamb.vambtools.write_clusters(
-                clustersfile,
-                maybe_split,
-                max_clusters=cluster_options.max_clusters,
-                min_size=cluster_options.min_cluster_size,
-                rename=False,
-                cluster_prefix="aae_y_",
-            )
-
-        print("", file=logfile)
-        log(f"Clustered {ncontigs} contigs in {clusternumber} bins", logfile, 1)
-        time_start_writin_z_bins = time.time()
-        if vamb_options.min_fasta_output_size is not None:
-            path = comp_options.path
-            assert isinstance(path, FASTAPath)
-            write_fasta(
-                vamb_options.out_dir,
-                clusterspath,
-                path,
-                comp_metadata.identifiers,  # type:ignore
-                comp_metadata.lengths,
-                vamb_options.min_fasta_output_size,
-                logfile,
-                separator=cluster_options.binsplit_separator,
-            )
-        time_writing_bins_y = time.time()
-        writing_bins_time_y = round(time_writing_bins_y - time_start_writin_z_bins, 2)
-        log(f"AAE y bins written in {writing_bins_time_y} seconds.", logfile, 1)
-
-    log(f"\nCompleted Vamb in {round(time.time() - begintime, 2)} seconds.", logfile, 0)
+        # We enforce this in the VAEAAEOptions constructor, see comment there
+        assert cluster_options.max_clusters is None
+        write_clusters_and_bins(
+            vamb_options,
+            cluster_options.binsplitter,
+            str(vamb_options.out_dir.joinpath("aae_y_clusters")),
+            vamb_options.out_dir.joinpath("bins"),
+            comp_options.path,
+            clusters_y_dict,
+            comp_metadata.identifiers,  # type:ignore
+            comp_metadata.lengths,  # type:ignore
+            logfile,
+        )
 
 
 def parse_mmseqs_taxonomy(
@@ -1215,12 +1148,14 @@ def extract_and_filter_data(
     vamb_options: VambOptions,
     comp_options: CompositionOptions,
     abundance_options: AbundanceOptions,
+    binsplitter: vamb.vambtools.BinSplitter,
     logfile: IO[str],
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
     composition, abundance = load_composition_and_abundance(
         vamb_options=vamb_options,
         comp_options=comp_options,
         abundance_options=abundance_options,
+        binsplitter=binsplitter,
         logfile=logfile,
     )
 
@@ -1246,6 +1181,7 @@ def run_taxonomy_predictor(
         vamb_options=vamb_options,
         comp_options=comp_options,
         abundance_options=abundance_options,
+        binsplitter=vamb.vambtools.BinSplitter.inert_splitter(),
         logfile=logfile,
     )
     assert taxonomy_options.taxonomy_path is not None
@@ -1278,6 +1214,7 @@ def run_vaevae(
         vamb_options=vamb_options,
         comp_options=comp_options,
         abundance_options=abundance_options,
+        binsplitter=cluster_options.binsplitter,
         logfile=logfile,
     )
 
@@ -1402,38 +1339,17 @@ def run_vaevae(
     np.save(LATENT_PATH, latent_both)
 
     # Cluster, save tsv file
-    clusterspath = vamb_options.out_dir.joinpath("vaevae_clusters.tsv")
-    cluster(
+    cluster_and_write_files(
+        vamb_options,
         cluster_options,
-        clusterspath,
+        str(vamb_options.out_dir.joinpath("vaevae_clusters")),
+        vamb_options.out_dir.joinpath("bins"),
+        comp_options.path,
         latent_both,
-        contignames,
-        lengths,
-        vamb_options,
+        contignames,  # type:ignore
+        lengths,  # type:ignore
         logfile,
-        "vaevae_",
     )
-    log("VAEVAE latent clustered", logfile, 1)
-
-    del latent_both
-    fin_cluster_latent = time.time()
-
-    if vamb_options.min_fasta_output_size is not None:
-        path = comp_options.path
-        assert isinstance(path, FASTAPath)
-        write_fasta(
-            vamb_options.out_dir,
-            clusterspath,
-            path,
-            contignames,
-            lengths,
-            vamb_options.min_fasta_output_size,
-            logfile,
-            separator=cluster_options.binsplit_separator,
-        )
-
-    writing_bins_time = round(time.time() - fin_cluster_latent, 2)
-    log(f"VAEVAE bins written in {writing_bins_time} seconds.", logfile, 1)
 
 
 def run_reclustering(
@@ -1448,6 +1364,7 @@ def run_reclustering(
         vamb_options=vamb_options,
         comp_options=comp_options,
         abundance_options=abundance_options,
+        binsplitter=reclustering_options.binsplitter,
         logfile=logfile,
     )
 
@@ -1475,45 +1392,21 @@ def run_reclustering(
         predictions_path=predictions_path,
     )
 
-    cluster_dict = defaultdict(set)
+    cluster_dict: defaultdict[str, set[str]] = defaultdict(set)
     for k, v in zip(reclustered, composition.metadata.identifiers):
         cluster_dict[k].add(v)
-    clusterspath = vamb_options.out_dir.joinpath("clusters_reclustered.tsv")
-    if reclustering_options.binsplit_separator is not None:
-        maybe_split = vamb.vambtools.binsplit(
-            cluster_dict.items(), reclustering_options.binsplit_separator
-        )
-    else:
-        maybe_split = cluster_dict.items()
-    with open(clusterspath, "w") as clustersfile:
-        clusternumber, ncontigs = vamb.vambtools.write_clusters(
-            clustersfile,
-            maybe_split,
-            rename=False,
-            cluster_prefix="recluster",
-        )
 
-    print("", file=logfile)
-    log(f"Clustered {ncontigs} contigs in {clusternumber} bins", logfile, 1)
-
-    fin_cluster_latent = time.time()
-
-    if vamb_options.min_fasta_output_size is not None:
-        path = comp_options.path
-        assert isinstance(path, FASTAPath)
-        write_fasta(
-            vamb_options.out_dir,
-            clusterspath,
-            path,
-            composition.metadata.identifiers,
-            composition.metadata.lengths,
-            vamb_options.min_fasta_output_size,
-            logfile,
-            separator=reclustering_options.binsplit_separator,
-        )
-
-    writing_bins_time = round(time.time() - fin_cluster_latent, 2)
-    log(f"Reclustered bins written in {writing_bins_time} seconds.", logfile, 1)
+    write_clusters_and_bins(
+        vamb_options,
+        reclustering_options.binsplitter,
+        str(vamb_options.out_dir.joinpath("clusters_reclustered")),
+        vamb_options.out_dir.joinpath("bins"),
+        comp_options.path,
+        cluster_dict,
+        composition.metadata.identifiers,  # type:ignore
+        composition.metadata.lengths,  # type:ignore
+        logfile,
+    )
 
 
 class BasicArguments(object):
@@ -1522,7 +1415,10 @@ class BasicArguments(object):
     def __init__(self, args):
         self.args = args
         self.comp_options = CompositionOptions(
-            self.args.fasta, self.args.composition, self.args.minlength
+            self.args.fasta,
+            self.args.composition,
+            self.args.minlength,
+            args.warn_on_few_seqs,
         )
         self.abundance_options = AbundanceOptions(
             self.args.bampaths,
@@ -1546,8 +1442,21 @@ def run(self):
         torch.set_num_threads(self.vamb_options.n_threads)
         try_make_dir(self.vamb_options.out_dir)
         with open(self.vamb_options.out_dir.joinpath(self.LOGS_PATH), "w") as logfile:
+            begintime = time.time()
+            log(
+                "Starting Vamb version " + ".".join(map(str, vamb.__version__)), logfile
+            )
+            log("Date and time is " + str(datetime.datetime.now()), logfile, 1)
+            log("Random seed is " + str(self.vamb_options.seed), logfile, 1)
+
             self.run_inner(logfile)
 
+            log(
+                f"\nCompleted Vamb in {round(time.time() - begintime, 2)} seconds.",
+                logfile,
+                0,
+            )
+
 
 class TaxometerArguments(BasicArguments):
     def __init__(self, args):
@@ -1595,7 +1504,6 @@ def __init__(self, args):
         self.cluster_options = ClusterOptions(
             args.window_size,
             args.min_successes,
-            args.min_cluster_size,
             args.max_clusters,
             args.binsplit_separator,
         )
@@ -1995,14 +1903,6 @@ def add_clustering_arguments(subparser):
         default=15,
         help="minimum success in window [15]",
     )
-    clusto.add_argument(
-        "-i",
-        dest="min_cluster_size",
-        metavar="",
-        type=int,
-        default=1,
-        help="minimum cluster size [1]",
-    )
     clusto.add_argument(
         "-c",
         dest="max_clusters",
@@ -2016,8 +1916,12 @@ def add_clustering_arguments(subparser):
         dest="binsplit_separator",
         metavar="",
         type=str,
+        # This means: None is not passed, "" if passed but empty, e.g. "-o -c 5"
+        # otherwise a string.
         default=None,
-        help="binsplit separator [None = no split]",
+        const="",
+        nargs="?",
+        help="binsplit separator [C] (pass empty string to disable)",
     )
     return subparser
 
@@ -2307,8 +2211,10 @@ def main():
     args = parser.parse_args()
 
     if args.subcommand == TAXOMETER:
+        args.warn_on_few_seqs = True
         runner = TaxometerArguments(args)
     elif args.subcommand == BIN:
+        args.warn_on_few_seqs = True
         if args.model_subcommand is None:
             vaevae_parserbin_parser.print_help()
             sys.exit(1)
@@ -2319,6 +2225,8 @@ def main():
         }
         runner = classes_map[args.model_subcommand](args)
     elif args.subcommand == RECLUSTER:
+        # Uniquely, the reclustering cannot overfit, so we don't need this warning
+        args.warn_on_few_seqs = False
         runner = ReclusteringArguments(args)
     else:
         # There are no more subcommands
diff --git a/vamb/parsecontigs.py b/vamb/parsecontigs.py
index fe08f062..5d3deb15 100644
--- a/vamb/parsecontigs.py
+++ b/vamb/parsecontigs.py
@@ -215,26 +215,4 @@ def from_file(
             _np.array(mask, dtype=bool),
             minlength,
         )
-
-        if len(metadata.lengths) < 20_000:
-            message = (
-                f"WARNING: Parsed only {len(metadata.lengths)} sequences from FASTA file. "
-                "We normally expect 20,000 sequences or more to prevent overfitting. "
-                "As a deep learning model, VAEs are prone to overfitting with too few sequences. "
-                "You may want to  bin more samples as a time, lower the beta parameter, "
-                "or use a different binner altogether."
-            )
-            _vambtools.log_and_warn(message, logfile=logfile)
-
-        # Warn the user if any contigs have been observed, which is smaller
-        # than the threshold.
-        if not _np.all(metadata.mask):
-            message = (
-                f"WARNING: The minimum sequence length has been set to {minlength}, but a contig with "
-                f"length {minimum_seen_length} was seen. "
-                "Better results are obtained if the sequence file is filtered to the minimum "
-                "sequence length before mapping."
-            )
-            _vambtools.log_and_warn(message, logfile=logfile)
-
         return cls(metadata, tnfs_arr)
diff --git a/vamb/vambtools.py b/vamb/vambtools.py
index 822409b4..f9c26710 100644
--- a/vamb/vambtools.py
+++ b/vamb/vambtools.py
@@ -1,6 +1,5 @@
 __doc__ = "Various classes and functions Vamb uses internally."
 
-import os as _os
 import gzip as _gzip
 import bz2 as _bz2
 import lzma as _lzma
@@ -10,12 +9,146 @@
 import collections as _collections
 from itertools import zip_longest
 from hashlib import md5 as _md5
-from collections.abc import Iterable, Iterator, Generator
+from collections.abc import Iterable, Iterator
 from typing import Optional, IO, Union
 from pathlib import Path
 import warnings
 
 
+class BinSplitter:
+    """
+    The binsplitter can be either
+    * Instantiated with an explicit option, in which case `is_default` is False,
+      and `splitter` is None if the user explicitly opted out of binsplitting by
+      passing in an empty string, else a `str`, the string explicitly asked for
+    * Instantiated by default, in which case `is_default` is `True`, and `splitter`
+      is `_DEFAULT_SPLITTER`
+
+    The `initialize` function checks the validity of the binsplitter on the set of
+    identifiers:
+    * If the binsplitter sep is explicitly `None`, do nothing
+    * If the binsplitter is default and the separator is not found, warn the user,
+      then set the separator to `None` (disabling binsplitting)
+    * If the binsplitter is explicitly a string, check this string occurs in all
+      identifiers, else error early.
+    """
+
+    _DEFAULT_SPLITTER = "C"
+    __slots__ = ["is_default", "splitter"]
+
+    def __init__(self, binsplitter: Optional[str]):
+        if binsplitter is None:
+            self.is_default = True
+            self.splitter = self._DEFAULT_SPLITTER
+
+        else:
+            self.is_default = False
+            if len(binsplitter) == 0:
+                self.splitter = None
+            else:
+                self.splitter = binsplitter
+
+    @classmethod
+    def inert_splitter(cls):
+        return cls("")
+
+    def initialize(self, identifiers: Iterable[str]):
+        separator = self.splitter
+        if separator is None:
+            return None
+        message = (
+            'Binsplit separator (option `-o`) {imexplicit} passed as "{separator}", '
+            'but sequence identifier "{identifier}" does not contain this separator, '
+            "or contains it at the very start or end.\n"
+            "A binsplit separator X implies that every sequence identifier is formatted as\n"
+            "[sample identifier][X][sequence identifier], e.g. a binsplit separator of 'C' "
+            "means that 'S1C19' and '7C11' are valid identifiers."
+        )
+
+        if not self.is_default:
+            for identifier in identifiers:
+                (front, _, rest) = identifier.partition(separator)
+                if not front or not rest:
+                    log_and_raise(
+                        message.format(
+                            imexplicit="explicitly",
+                            separator=separator,
+                            identifier=identifier,
+                        )
+                    )
+        else:
+            for identifier in identifiers:
+                (front, _, rest) = identifier.partition(separator)
+                if not front or not rest:
+                    message += "\nSkipping binsplitting."
+                    log_and_warn(
+                        message.format(
+                            imexplicit="implicitly",
+                            separator=separator,
+                            identifier=identifier,
+                        )
+                    )
+                    self.splitter = None
+                    break
+
+    def split_bin(
+        self,
+        binname: str,
+        identifiers: Iterable[str],
+    ) -> Iterable[tuple[str, set[str]]]:
+        "Split a single bin by the prefix of the headers"
+        if self.splitter is None:
+            yield (binname, set(identifiers))
+            return None
+        else:
+            by_sample: dict[str, set[str]] = _collections.defaultdict(set)
+            for identifier in identifiers:
+                sample, _, rest = identifier.partition(self.splitter)
+
+                if not rest or not sample:
+                    raise KeyError(
+                        f"Separator '{self.splitter}' not in sequence identifier, or is at the very start or end of identifier: '{identifier}'"
+                    )
+
+                by_sample[sample].add(identifier)
+
+            for sample, splitheaders in by_sample.items():
+                newbinname = f"{sample}{self.splitter}{binname}"
+                yield newbinname, splitheaders
+
+    def binsplit(
+        self,
+        clusters: Iterable[tuple[str, Iterable[str]]],
+    ) -> Iterable[tuple[str, set[str]]]:
+        """Splits a set of clusters by the prefix of their names.
+        The separator is a string which separated prefix from postfix of contignames. The
+        resulting split clusters have the prefix and separator prepended to them.
+
+        clusters can be an iterator, in which case this function returns an iterator, or a dict
+        with contignames: set_of_contignames pair, in which case a dict is returned.
+
+        Example:
+        >>> clusters = {"bin1": {"s1-c1", "s1-c5", "s2-c1", "s2-c3", "s5-c8"}}
+        >>> binsplit(clusters, "-")
+        {'s2-bin1': {'s1-c1', 's1-c3'}, 's1-bin1': {'s1-c1', 's1-c5'}, 's5-bin1': {'s1-c8'}}
+        """
+        for binname, headers in clusters:
+            for newbinname, splitheaders in self.split_bin(binname, headers):
+                yield newbinname, splitheaders
+
+    def log_string(self) -> str:
+        if not self.is_default:
+            if self.splitter is None:
+                return "Explicitly passed as empty (no binsplitting)"
+            else:
+                return '"{self.splitter}"'
+        else:
+            if self.splitter is None:
+                return "Defaulting to 'C', but disabled due to incompatible identifiers"
+            else:
+                return "Defaulting to 'C'"
+
+
 def showwarning_override(message, category, filename, lineno, file=None, line=None):
     print(str(message) + "\n", file=file)
 
@@ -440,67 +573,18 @@ def verify_refhash(
 
 
 def write_clusters(
-    filehandle: IO[str],
+    unsplit_io: IO[str],
     clusters: Iterable[tuple[str, set[str]]],
-    max_clusters: Optional[int] = None,
-    min_size: int = 1,
-    header: Optional[str] = None,
-    rename: bool = True,
-    cluster_prefix: str = "",
 ) -> tuple[int, int]:
-    """Writes clusters to an open filehandle.
-    Inputs:
-        filehandle: An open filehandle that can be written to
-        clusters: An iterator generated by function `clusters` or a dict
-        max_clusters: Stop printing after this many clusters [None]
-        min_size: Don't output clusters smaller than N contigs
-        header: Commented one-line header to add
-        rename: Rename clusters to "cluster_1", "cluster_2" etc.
-        cluster_prefix: prepend a tag to identify which model produced the clusters (vae,aae_l, aae_y)
-
-    Outputs:
-        clusternumber: Number of clusters written
-        ncontigs: Number of contigs written
-    """
+    n_clusters = 0
+    n_contigs = 0
+    for cluster_name, contig_names in clusters:
+        n_clusters += 1
+        n_contigs += len(contig_names)
+        for contig_name in contig_names:
+            print(cluster_name, contig_name, sep="\t", file=unsplit_io)
 
-    if not hasattr(filehandle, "writable") or not filehandle.writable():
-        raise ValueError("Filehandle must be a writable file")
-
-    if max_clusters is not None and max_clusters < 1:
-        raise ValueError("max_clusters must None or at least 1, not {max_clusters}")
-
-    if header is not None and len(header) > 0:
-        if "\n" in header:
-            raise ValueError("Header cannot contain newline")
-
-        if header[0] != "#":
-            header = "# " + header
-
-        print(header, file=filehandle)
-
-    clusternumber = 0
-    ncontigs = 0
-
-    for clustername, contigs in clusters:
-        if len(contigs) < min_size:
-            continue
-
-        if rename:
-            clustername = cluster_prefix + "cluster_" + str(clusternumber + 1)
-        else:
-            clustername = cluster_prefix + str(clusternumber + 1)
-
-        for contig in contigs:
-            print(clustername, contig, sep="\t", file=filehandle)
-        filehandle.flush()
-
-        clusternumber += 1
-        ncontigs += len(contigs)
-
-        if clusternumber == max_clusters:
-            break
-
-    return clusternumber, ncontigs
+    return (n_clusters, n_contigs)
 
 
 def read_clusters(filehandle: Iterable[str], min_size: int = 1) -> dict[str, set[str]]:
@@ -528,20 +612,35 @@ def read_clusters(filehandle: Iterable[str], min_size: int = 1) -> dict[str, set
     return contigsof_dict
 
 
+def check_is_creatable_file_path(path: Path) -> None:
+    if path.exists():
+        raise FileExistsError(path)
+    if not path.parent.is_dir():
+        raise NotADirectoryError(path.parent)
+
+
+def create_dir_if_not_existing(path: Path) -> None:
+    if path.is_dir():
+        return None
+    if path.is_file():
+        raise FileExistsError(path)
+    if not path.parent.is_dir():
+        raise NotADirectoryError(path.parent)
+    path.mkdir(exist_ok=True)
+
+
 def write_bins(
-    directory: Union[str, Path],
+    directory: Path,
     bins: dict[str, set[str]],
     fastaio: Iterable[bytes],
     maxbins: Optional[int] = 250,
     minsize: int = 0,
-    separator: Optional[str] = None,
 ):
     """Writes bins as FASTA files in a directory, one file per bin.
 
     Inputs:
         directory: Directory to create or put files in
-        bins: dict[str: set[str]] (can be loaded from
-        clusters.tsv using vamb.cluster.read_clusters)
+        bins: dict[str: set[str]] (can be loaded from clusters.tsv using vamb.cluster.read_clusters)
         fastaio: bytes iterator containing FASTA file with all sequences
         maxbins: None or else raise an error if trying to make more bins than this [250]
         minsize: Minimum number of nucleotides in cluster to be output [0]
@@ -554,41 +653,26 @@ def write_bins(
     # a halt and piss people off like you wouldn't believe.
     if maxbins is not None and len(bins) > maxbins:
         raise ValueError(f"{len(bins)} bins exceed maxbins of {maxbins}")
-
-    # Check that the directory is not a non-directory file,
-    # and that its parent directory indeed exists
-    abspath = _os.path.abspath(directory)
-    parentdir = _os.path.dirname(abspath)
-
-    if parentdir != "" and not _os.path.isdir(parentdir):
-        raise NotADirectoryError(parentdir)
-
-    if _os.path.isfile(abspath):
-        raise FileExistsError(abspath)
-
     if minsize < 0:
         raise ValueError("Minsize must be nonnegative")
 
+    create_dir_if_not_existing(directory)
+
+    keep: set[str] = set()
+    for i in bins.values():
+        keep.update(i)
+
     byteslen_by_id: dict[str, tuple[bytes, int]] = dict()
     for entry in byte_iterfasta(fastaio):
-        byteslen_by_id[entry.identifier] = (
-            _gzip.compress(entry.format().encode(), compresslevel=1),
-            len(entry),
-        )
-
-    # Make the directory if it does not exist - if it does, do nothing
-    try:
-        _os.mkdir(directory)
-    except FileExistsError:
-        pass
+        if entry.identifier in keep:
+            byteslen_by_id[entry.identifier] = (
+                _gzip.compress(entry.format().encode(), compresslevel=1),
+                len(entry),
+            )
 
     # Now actually print all the contigs to files
     for binname, contigs in bins.items():
         size = 0
-        if separator is not None:
-            binsample = next(iter(contigs)).split(separator)[0]
-        else:
-            binsample = None
         for contig in contigs:
             byteslen = byteslen_by_id.get(contig)
             if byteslen is None:
@@ -600,20 +684,8 @@ def write_bins(
         if size < minsize:
             continue
 
-        # Split bin files into sample dirs
-        if binsample is not None:
-            bin_dir = _os.path.join(directory, binsample)
-            try:
-                _os.mkdir(bin_dir)
-            except FileExistsError:
-                pass
-        else:
-            bin_dir = directory
-
         # Print bin to file
-        filename = _os.path.join(bin_dir, binname + ".fna")
-
-        with open(filename, "wb") as file:
+        with open(directory.joinpath(binname + ".fna"), "wb") as file:
             for contig in contigs:
                 file.write(_gzip.decompress(byteslen_by_id[contig][0]))
                 file.write(b"\n")
@@ -712,50 +784,3 @@ def open_file_iterator(paths: Iterable[Path]) -> Iterable[Reader]:
     for path in paths:
         with Reader(path) as io:
             yield io
-
-
-def _split_bin(
-    binname: str,
-    headers: Iterable[str],
-    separator: str,
-    bysample: _collections.defaultdict[str, set[str]] = _collections.defaultdict(set),
-) -> Generator[tuple[str, set[str]], None, None]:
-    "Split a single bin by the prefix of the headers"
-
-    bysample.clear()
-    for header in headers:
-        if not isinstance(header, str):  # type: ignore
-            raise TypeError(
-                f"Can only split named sequences, not of type {type(header)}"
-            )
-
-        sample, _, identifier = header.partition(separator)
-
-        if not identifier:
-            raise KeyError(f"Separator '{separator}' not in sequence label: '{header}'")
-
-        bysample[sample].add(header)
-
-    for sample, splitheaders in bysample.items():
-        newbinname = f"{sample}{separator}{binname}"
-        yield newbinname, splitheaders
-
-
-def binsplit(
-    clusters: Iterable[tuple[str, Iterable[str]]], separator: str
-) -> Generator[tuple[str, set[str]], None, None]:
-    """Splits a set of clusters by the prefix of their names.
-    The separator is a string which separated prefix from postfix of contignames. The
-    resulting split clusters have the prefix and separator prepended to them.
-
-    clusters can be an iterator, in which case this function returns an iterator, or a dict
-    with contignames: set_of_contignames pair, in which case a dict is returned.
-
-    Example:
-    >>> clusters = {"bin1": {"s1-c1", "s1-c5", "s2-c1", "s2-c3", "s5-c8"}}
-    >>> binsplit(clusters, "-")
-    {'s2-bin1': {'s1-c1', 's1-c3'}, 's1-bin1': {'s1-c1', 's1-c5'}, 's5-bin1': {'s1-c8'}}
-    """
-    for binname, headers in clusters:
-        for newbinname, splitheaders in _split_bin(binname, headers, separator):
-            yield newbinname, splitheaders
diff --git a/workflow_avamb/avamb.snake.conda.smk b/workflow_avamb/avamb.snake.conda.smk
index 99d0b38c..c80cb046 100644
--- a/workflow_avamb/avamb.snake.conda.smk
+++ b/workflow_avamb/avamb.snake.conda.smk
@@ -332,9 +332,9 @@ rule run_avamb:
         abundance=os.path.join(OUTDIR,"abundance.npz")
     output:
         outdir_avamb=directory(os.path.join(OUTDIR,"avamb")),
-        clusters_aae_z=os.path.join(OUTDIR,"avamb/aae_z_clusters.tsv"),
-        clusters_aae_y=os.path.join(OUTDIR,"avamb/aae_y_clusters.tsv"),
-        clusters_vamb=os.path.join(OUTDIR,"avamb/vae_clusters.tsv"),
+        clusters_aae_z=os.path.join(OUTDIR,"avamb/aae_z_clusters_split.tsv"),
+        clusters_aae_y=os.path.join(OUTDIR,"avamb/aae_y_clusters_split.tsv"),
+        clusters_vamb=os.path.join(OUTDIR,"avamb/vae_clusters_split.tsv"),
         contignames=os.path.join(OUTDIR,"avamb/contignames"),
         contiglenghts=os.path.join(OUTDIR,"avamb/lengths.npz")
     params:
@@ -473,9 +473,9 @@ rule run_drep_manual_vamb_z_y:
         cluster_score_dict_path_avamb=os.path.join(OUTDIR,"tmp/cs_d_avamb.json"),
         contignames=os.path.join(OUTDIR,"avamb/contignames"),
         contiglengths=os.path.join(OUTDIR,"avamb/lengths.npz"),
-        clusters_aae_z=os.path.join(OUTDIR,"avamb/aae_z_clusters.tsv"),
-        clusters_aae_y=os.path.join(OUTDIR,"avamb/aae_y_clusters.tsv"),
-        clusters_vamb=os.path.join(OUTDIR,"avamb/vae_clusters.tsv")
+        clusters_aae_z=os.path.join(OUTDIR,"avamb/aae_z_clusters_split.tsv"),
+        clusters_aae_y=os.path.join(OUTDIR,"avamb/aae_y_clusters_split.tsv"),
+        clusters_vamb=os.path.join(OUTDIR,"avamb/vae_clusters_split.tsv")
 
     output:
         clusters_avamb_manual_drep=os.path.join(OUTDIR,"tmp/avamb_manual_drep_clusters.tsv")