diff --git a/.travis.yml b/.travis.yml
index 23559c2..347ac96 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ before_install:
   - sudo ln -s /run/shm /dev/shm
 
 install:
-  - conda install --yes python="3.6" numpy scipy cython h5py typing pandas
+  - conda install --yes python="3.6" numpy scipy cython h5py typing numba
   - pip install sphinx_bootstrap_theme
   # Install the dev version (1.7) of sphinx that has solved a problem with f-strings
   - git clone https://github.com/sphinx-doc/sphinx.git
diff --git a/README.md b/README.md
index cca5385..76f9f08 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,10 @@
 
 
-# loompy 2
+# loompy v3.0
 
-⭐ Loompy v2.0 was released Dec. 24, 2017! ([what's new](https://github.com/linnarsson-lab/loompy/releases/tag/v2.0)?)
+⭐ Loompy v3.0 was released Sep. 24, 2019!
 
-`.loom` is an efficient file format for very large omics datasets, 
-consisting of a main matrix, optional additional layers, a variable number of row and column 
-annotations. Loom also supports sparse graphs. We use loom files to store single-cell gene expression 
-data: the main matrix contains the actual expression values (one 
-column per cell, one row per gene); row and column annotations 
-contain metadata for genes and cells, such as `Name`, `Chromosome`, 
-`Position` (for genes), and `Strain`, `Sex`, `Age` (for cells).
-
-![Illustration of Loom format structure](/doc/Loom-images.png)
-
-Loom files (`.loom`) are created in the [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) file format, which 
-supports an internal collection of numerical multidimensional datasets.
-HDF5 is supported by many computer languages, including Java, MATLAB, 
-Mathematica, Python, R, and Julia. `.loom` files are accessible from 
-any language that supports HDF5.
-
-To get started, head over to [the documentation](http://linnarssonlab.org/loompy/)!
+To get started, head over to [the documentation](http://loompy.org)!
 
 Loom, loompy, and the [loom-viewer](https://github.com/linnarsson-lab/loom-viewer) are being developed by members of the [Linnarsson Lab](http://linnarssonlab.org).
 
diff --git a/doc/format/index.rst b/doc/format/index.rst
index 53f5ae2..b8aa3d1 100644
--- a/doc/format/index.rst
+++ b/doc/format/index.rst
@@ -6,7 +6,7 @@ Loom file format specs
 Versions
 --------
 
-This specification defines the Loom file format version ``2.0.1``.
+This specification defines the Loom file format version ``3.0.0``.
 
 
 .. _formatinfo:
@@ -91,15 +91,27 @@ Main matrix and layers
 Global attributes
 ^^^^^^^^^^^^^^^^^
 
--  There can OPTIONALLY be at least one `HDF5
-   attribute <https://www.hdfgroup.org/HDF5/Tutor/crtatt.html>`__ on the
-   root ``/`` group, which can be any valid scalar or multidimensional datatype and should be
-   interpreted as attributes of the whole Loom file. 
--  There can OPTIONALLY be an `HDF5
-   attribute <https://www.hdfgroup.org/HDF5/Tutor/crtatt.html>`__ on the
-   root ``/`` group named ``LOOM_SPEC_VERSION``, a string value giving the
-   loom file spec version that was followed in creating the file. See top of this
-   document for the current version of the spec.
+-  There MUST be an HDF5 group ``/attrs`` containing global attributes.
+-  There MUST be a HDF5 dataset ``/attrs/LOOM_SPEC_VERSION`` with the value ``v3.0.0``.
+
+Global attributes apply semantically to the whole file, not any specific part of it. 
+Such attributes are stored in the HDF5 group ``/attrs`` and can be any valid scalar
+or multidimensional datatype.
+
+As of Loom file format v3.0.0, only one global attribute is mandatory: the ``LOOM_SPEC_VERSION``
+attribute, which is a string value giving the loom file spec version that was followed in creating
+the file. See top of this document for the current version of the spec.
+
+Note: previous versions of the loom file format stored global attributes as `HDF5 attributes <https://www.hdfgroup.org/HDF5/Tutor/crtatt.html>`__
+on the root ``/`` group. However, such attributes are size-limited, which caused problems for some 
+applications. For backwards compatibility, readers compatible with Loom v3.0.0 and above MUST first look
+for global attributes under the HDF5 group ``/attrs`` (if it exists). If a requested attribute does not exist
+as a dataset under that group, the reader MUST then examine the HDF5 attributes on the root ``/`` group.
+
+When writing a global attribute, the writer MUST write only to the ``/attrs`` group if ``LOOM_SPEC_VERSION`` is
+``3.0.0`` or higher. The writer MUST write to both the ``/attrs`` group and the HDF5 attributes on the root ``/``
+group if ``LOOM_SPEC_VERSION`` is lower than ``3.0.0`` or if it does not exist. This is to preserve a consistent
+format for legacy files.
 
 
 Row and column attributes
diff --git a/loompy/__init__.py b/loompy/__init__.py
index 5d5d27b..c09c818 100644
--- a/loompy/__init__.py
+++ b/loompy/__init__.py
@@ -1,13 +1,15 @@
-from .utils import *
+from .utils import get_loom_spec_version, compare_loom_spec_version, timestamp, deprecated
 from .normalize import normalize_attr_values, materialize_attr_values
 from .attribute_manager import AttributeManager
-from .file_attribute_manager import FileAttributeManager
+from .global_attribute_manager import GlobalAttributeManager
 from .graph_manager import GraphManager
 from .layer_manager import LayerManager
 from .loom_view import LoomView
 from .loom_layer import MemoryLoomLayer, LoomLayer
 from .to_html import to_html
 from .view_manager import ViewManager
-from .loompy import connect, create, create_append, combine, create_from_cellranger, LoomConnection, new, combine_faster
+from .loompy import connect, create, create_append, combine, create_from_cellranger, LoomConnection, new, combine_faster, create_from_matrix_market
 from .loom_validator import LoomValidator
 from ._version import __version__, loom_spec_version
+from .bus_file import create_from_fastq
+from .cell_calling import call_cells
diff --git a/loompy/_version.py b/loompy/_version.py
index 26c8c01..f742e1b 100644
--- a/loompy/_version.py
+++ b/loompy/_version.py
@@ -1,2 +1,2 @@
-__version__ = '2.0.18'
-loom_spec_version = '2.0.1'
+__version__ = '3.0.0'
+loom_spec_version = '3.0.0'
diff --git a/loompy/attribute_manager.py b/loompy/attribute_manager.py
index 5968ef3..f48d898 100644
--- a/loompy/attribute_manager.py
+++ b/loompy/attribute_manager.py
@@ -1,7 +1,9 @@
 from typing import *
 import numpy as np
+import h5py
 import loompy
 from loompy import timestamp
+from .utils import compare_loom_spec_version
 
 
 class AttributeManager:
@@ -144,13 +146,17 @@ def __setattr__(self, name: str, val: np.ndarray) -> None:
 			raise KeyError("Attribute name cannot contain slash (/)")
 		else:
 			if self.ds is not None:
-				values = loompy.normalize_attr_values(val)
+				values = loompy.normalize_attr_values(val, compare_loom_spec_version(self.ds._file, "3.0.0") >= 0)
 				a = ["/row_attrs/", "/col_attrs/"][self.axis]
 				if self.ds.shape[self.axis] != 0 and values.shape[0] != self.ds.shape[self.axis]:
 					raise ValueError(f"Attribute '{name}' must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given")
 				if self.ds._file[a].__contains__(name):
 					del self.ds._file[a + name]
-				self.ds._file[a + name] = values  # TODO: for 2D arrays, use block compression along columns/rows
+				
+				if values.dtype == np.object_:
+					self.ds._file.create_dataset(a + name, data=values, dtype=h5py.string_dtype(encoding="utf-8"))
+				else:
+					self.ds._file[a + name] = values
 				self.ds._file[a + name].attrs["last_modified"] = timestamp()
 				self.ds._file[a].attrs["last_modified"] = timestamp()
 				self.ds._file.attrs["last_modified"] = timestamp()
diff --git a/loompy/bus_file.py b/loompy/bus_file.py
new file mode 100644
index 0000000..ca4a324
--- /dev/null
+++ b/loompy/bus_file.py
@@ -0,0 +1,463 @@
+import array
+import gzip
+import json
+import logging
+import os
+import sqlite3 as sqlite
+import subprocess
+from math import lgamma
+from tempfile import TemporaryDirectory
+from typing import Dict, Generator, List, Optional, Tuple
+
+import numpy as np
+import scipy.sparse as sparse
+from numba import jit
+
+from .cell_calling import call_cells
+from .loompy import connect, create
+
+
+# Copied from cytograph
+@jit("float32(float64[:], float64[:])", nopython=True, parallel=True, nogil=True)
+def multinomial_distance(p: np.ndarray, q: np.ndarray) -> float:
+	N = p.shape[0]
+	p_sum = p.sum()
+	q_sum = q.sum()
+	x = lgamma(N) + lgamma(p_sum + q_sum + N) - lgamma(p_sum + N) - lgamma(q_sum + N)
+	for k in range(N):
+		x += lgamma(p[k] + 1) + lgamma(q[k] + 1) - lgamma(1) - lgamma(p[k] + q[k] + 1)
+	x = np.exp(x)
+	return 1 - 1 / (1 + x)
+
+
+# https://maciejkula.github.io/2015/02/22/incremental-construction-of-sparse-matrices/
+class IncrementalSparseMatrixUInt16:
+	def __init__(self, shape: Tuple[int, int]):
+		self.dtype = np.uint16
+		self.shape = shape
+
+		self.rows = array.array('i')
+		self.cols = array.array('i')
+		self.data = array.array('H')
+
+	def append(self, i: int, j: int, v: int) -> None:
+		m, n = self.shape
+
+		if (i >= m or j >= n):
+			raise Exception('Index out of bounds')
+
+		self.rows.append(i)
+		self.cols.append(j)
+		self.data.append(v)
+
+	def tocoo(self) -> sparse.coo_matrix:
+		rows = np.frombuffer(self.rows, dtype=np.int32)
+		cols = np.frombuffer(self.cols, dtype=np.int32)
+		data = np.frombuffer(self.data, dtype=np.uint16)
+		return sparse.coo_matrix((data, (rows, cols)), shape=self.shape)
+
+	def __len__(self) -> int:
+		return len(self.data)
+
+
+twobit_to_dna_table = {0: "A", 1: "C", 2: "G", 3: "T"}
+dna_to_twobit_table = {"A": 0, "C": 1, "G": 2, "T": 3}
+
+
+@jit
+def twobit_to_dna(twobit: int, size: int) -> str:
+	result = []
+	for i in range(size):
+		x = (twobit & (3 << 2 * i)) >> 2 * i
+		if x == 0:
+			result.append("A")
+		elif x == 1:
+			result.append("C")
+		elif x == 2:
+			result.append("G")
+		elif x == 3:
+			result.append("T")
+	result.reverse()
+	return "".join(result)
+
+
+@jit
+def dna_to_twobit(dna: str) -> int:
+	x = 0
+	for nt in dna:
+		if nt == "A":
+			x += 0
+		elif nt == "C":
+			x += 1
+		elif nt == "G":
+			x += 2
+		elif nt == "T":
+			x += 3
+		x <<= 2
+	x >>= 2
+	return x
+
+
+@jit
+def twobit_1hamming(twobit: int, size: int) -> List[int]:
+	result = []
+	for i in range(size):
+		x = (twobit >> 2 * (size - i - 1)) & 3
+		for j in range(4):
+			if x == j:
+				continue
+			result.append(twobit & ~(3 << 2 * (size - i - 1)) | (j << 2 * (size - i - 1)))
+	return result
+
+
+def ixs_thatsort_a2b(a: np.ndarray, b: np.ndarray, check_content: bool = True) -> np.ndarray:
+	"This is super duper magic sauce to make the order of one list to be like another"
+	if check_content:
+		assert len(np.intersect1d(a, b)) == len(a), f"The two arrays are not matching"
+	return np.argsort(a)[np.argsort(np.argsort(b))]
+
+
+def load_sample_metadata(path: str, sample_id: str) -> Dict[str, str]:
+	if not os.path.exists(path):
+		raise ValueError(f"Samples metadata file '{path}' not found.")
+	if path.endswith(".db"):
+		# sqlite3
+		with sqlite.connect(path) as db:
+			cursor = db.cursor()
+			cursor.execute("SELECT * FROM sample WHERE name = ?", (sample_id,))
+			keys = [x[0] for x in cursor.description]
+			vals = cursor.fetchone()
+			if vals is not None:
+				return dict(zip(keys, vals))
+			raise ValueError(f"SampleID '{sample_id}' was not found in the samples database.")
+	else:
+		result = {}
+		with open(path) as f:
+			headers = [x.lower() for x in f.readline()[:-1].split("\t")]
+			if "sampleid" not in headers and 'name' not in headers:
+				raise ValueError("Required column 'SampleID' or 'Name' not found in sample metadata file")
+			if "sampleid" in headers:
+				sample_metadata_key_idx = headers.index("sampleid")
+			else:
+				sample_metadata_key_idx = headers.index("name")
+			sample_found = False
+			for line in f:
+				items = line[:-1].split("\t")
+				if len(items) > sample_metadata_key_idx and items[sample_metadata_key_idx] == sample_id:
+					for i, item in enumerate(items):
+						result[headers[i]] = item
+					sample_found = True
+		if not sample_found:
+			raise ValueError(f"SampleID '{sample_id}' not found in sample metadata file")
+		return result
+
+
+class BusFile:
+	def __init__(self, path: str, genes_metadata_file: str, genes_metadata_key: str, fragments2genes_file: str, equivalence_classes_file: str, fragments_file: str) -> None:
+		self.matrix: sparse.coo_matrix = None
+		logging.info("Loading gene metadata")
+		self.genes: Dict[str, List[str]] = {}  # Keys are Accessions, values are lists of attribute values
+		self.gene_metadata_attributes: List[str] = []  # Attribute names
+		with open(genes_metadata_file) as f:
+			line = f.readline()
+			self.gene_metadata_attributes = line[:-1].split("\t")
+			if genes_metadata_key not in self.gene_metadata_attributes:
+				raise ValueError(f"Metadata key '{genes_metadata_key}' not found in gene metadata file")
+			key_col = self.gene_metadata_attributes.index(genes_metadata_key)
+			for line in f:
+				items = line[:-1].split("\t")
+				self.genes[items[key_col]] = items
+		self.accessions = np.array([x for x in self.genes.keys()])
+		accession_idx = {acc: i for (i, acc) in enumerate(self.accessions)}
+		self.n_genes = len(self.accessions)
+
+		logging.info("Loading fragments-to-gene mappings")
+		self.gene_for_fragment: List[str] = []
+		with open(fragments2genes_file) as f:
+			for line in f:
+				transcript_id, accession = line[:-1].split("\t")
+				self.gene_for_fragment.append(accession)
+		self.gene_for_fragment = np.array(self.gene_for_fragment)
+
+		logging.info("Indexing genes")
+		# Array of indices into self.accessions for each gene in gene_for_transcript
+		self.gene_for_fragment_idx = np.zeros(len(self.gene_for_fragment), dtype="int32")
+		for i in range(len(self.gene_for_fragment)):
+			self.gene_for_fragment_idx[i] = accession_idx[self.gene_for_fragment[i]]
+
+		logging.info("Loading equivalence classes")
+		self.equivalence_classes: Dict[int, List[int]] = {}
+		with open(equivalence_classes_file) as f:
+			for line in f:
+				ec, trs = line[:-1].split("\t")
+				# Each equivalence class is a set of fragments (transcripts)
+				self.equivalence_classes[int(ec)] = np.array([int(x) for x in trs.split(",")])
+		# But we want each equivalence class mapped to a gene (or -1 if multimapping)
+		logging.info("Mapping equivalence classes to genes")
+		self.gene_for_ec: Dict[int, int] = {}
+		for eqc in self.equivalence_classes.keys():
+			gene = -1
+			for tid in self.equivalence_classes[eqc]:
+				if gene == -1:
+					gene = self.gene_for_fragment_idx[tid]
+					continue
+				elif self.gene_for_fragment_idx[tid] != gene:
+					gene = -1  # Multimapping UMI
+					break
+			self.gene_for_ec[eqc] = gene
+
+		logging.info("Loading fragment IDs")
+		self.fragments: List[str] = []
+		with open(fragments_file) as f:
+			for line in f:
+				self.fragments.append(line[:-1])
+		self.fragments = np.array(self.fragments)
+
+		logging.info("Loading BUS records")
+		fsize = os.path.getsize(path)
+		with open(path, "rb") as fb:
+			# Read the header
+			magic = fb.read(4)
+			if magic != b"BUS\0":
+				raise IOError("Not a valid BUS file (four leading magic bytes are missing)")
+			self.version = int.from_bytes(fb.read(4), byteorder="little", signed=False)
+			self.barcode_length = int.from_bytes(fb.read(4), byteorder="little", signed=False)
+			self.umi_length = int.from_bytes(fb.read(4), byteorder="little", signed=False)
+			tlen = int.from_bytes(fb.read(4), byteorder="little", signed=False)
+			self.header = fb.read(tlen).decode("utf-8")  # BUS does not specify an encoding, but let's assume UTF8
+
+			# Read the records
+			self.n_records = (fsize - tlen - 20) // 32
+			self.bus = np.fromfile(fb, dtype=[
+				("barcode", np.uint64),
+				("UMI", np.uint64),
+				("equivalence_class", np.int32),
+				("count", np.uint32),
+				("flags", np.uint32),
+				("padding", np.int32)
+			], count=self.n_records)
+			self.bus_gene = np.array([self.gene_for_ec[x] for x in self.bus["equivalence_class"]], dtype=np.int32)
+			self.bus_valid = self.bus_gene != -1
+
+		logging.info("Sorting cell IDs")
+		self.cell_ids = np.unique(self.bus["barcode"])
+		self.cell_ids.sort()
+		self.n_cells = len(self.cell_ids)  # This will change after error correction
+		self.layers: Dict[str, sparse.coo_matrix] = {}  # Dict of layer name -> sparse matrix
+		self.ambient_umis = 0
+
+	def correct(self, whitelist_file: str = None) -> np.ndarray:
+		if whitelist_file is not None:
+			size = 0
+			whitelist = set()
+			with open(whitelist_file) as f:
+				for bc in f:
+					size = len(bc) - 1  # Don't count the newline
+					whitelist.add(dna_to_twobit(bc[:-1]))
+			for i, bc in enumerate(self.bus["barcode"]):
+				if bc in whitelist:
+					continue
+				corrected = False
+				for mut in twobit_1hamming(int(bc), size=size):
+					if mut in whitelist:
+						self.bus["barcode"][i] = mut
+						corrected = True
+						break
+				if not corrected:
+					self.bus_valid[i] = False
+		self.cell_ids = np.unique(self.bus["barcode"][self.bus_valid])
+		self.cell_ids.sort()
+		self.cell_for_barcode_idx = {bc: i for (i, bc) in enumerate(self.cell_ids)}
+		self.n_cells = len(self.cell_ids)
+
+	def deduplicate(self) -> None:
+		# Sort by barcode, then by UMI, then by gene
+		ordering = np.lexsort((self.bus_gene, self.bus["UMI"], self.bus["barcode"]))
+		self.bus = self.bus[ordering]
+		self.bus_gene = self.bus_gene[ordering]
+		self.bus_valid = self.bus_valid[ordering]
+		dupes = (self.bus["barcode"][1:] == self.bus["barcode"][:-1]) & (self.bus["UMI"][1:] == self.bus["UMI"][:-1]) & (self.bus_gene[1:] == self.bus_gene[:-1])
+		self.bus_valid[1:][dupes] = False
+
+	def count(self) -> sparse.coo_matrix:
+		logging.info("Counting pseudoalignments for main matrix")
+		genes = self.bus_gene[self.bus_valid]
+		cells = [self.cell_for_barcode_idx[x] for x in self.bus["barcode"][self.bus_valid]]
+		self.matrix = sparse.coo_matrix((np.ones_like(genes), (genes, cells)), shape=(self.n_genes, self.n_cells), dtype=np.uint16)
+		self.total_umis = np.array(self.matrix.sum(axis=0))[0]
+		return self.matrix
+
+	def remove_empty_beads(self, expected_n_cells: int) -> None:
+		logging.info("Calling cells")
+		self.ambient_umis, self.ambient_pvalue = call_cells(self.matrix.tocsc(), expected_n_cells)
+		self.valid_cells = (self.ambient_pvalue < 0.01) | (self.total_umis > 1500)
+		self.matrix = self.matrix.tocsr()[:, self.valid_cells]
+		self.cell_ids = self.cell_ids[self.valid_cells]
+		self.n_cells = self.valid_cells.sum()
+		for name in self.layers.keys():
+			self.layers[name] = self.layers[name].tocsc()[:, self.valid_cells]
+		logging.info(f"Found {self.n_cells} valid cells and ~{int(self.ambient_umis)} ambient UMIs.")
+
+	def count_layer(self, layer_name: str, layer_fragments_file: str) -> sparse.coo_matrix:
+		fragments_idx = {f: i for i, f in enumerate(self.fragments)}
+		with open(layer_fragments_file) as f:
+			include_fragments = set([fragments_idx[x[:-1]] for x in f.readlines()])
+		logging.info(f"Counting pseudoalignments for layer '{layer_name}'")
+		# Figure out which of the equivalence classes are relevant
+		include_ec = {}
+		for ec, tids in self.equivalence_classes.items():
+			if any([tid in include_fragments for tid in tids]):
+				include_ec[ec] = True
+			else:
+				include_ec[ec] = False
+		m = IncrementalSparseMatrixUInt16((self.n_genes, self.n_cells))
+		for ix in range(self.n_records):
+			if not self.bus_valid[ix]:
+				continue
+			gene = self.bus_gene[ix]
+			if include_ec[self.bus["equivalence_class"][ix]]:
+				m.append(gene, self.cell_for_barcode_idx[self.bus["barcode"][ix]], 1)
+		self.layers[layer_name] = m.tocoo()
+		return self.layers[layer_name]
+
+	def save(self, out_file: str, sample_id: str, samples_metadata_file: str) -> None:
+		logging.info("Saving")
+		row_attrs = {}
+		# Transpose the gene metadata
+		for i, attr in enumerate(self.gene_metadata_attributes):
+			row_attrs[attr] = [v[i] for v in self.genes.values()]
+
+		# Create cell attributes
+		col_attrs = {
+			"CellID": [sample_id + "_" + twobit_to_dna(int(cid), 16) for cid in self.cell_ids],
+			"TotalUMIs": self.total_umis[self.valid_cells]
+		}
+
+		# Load sample metadata
+		metadata = load_sample_metadata(samples_metadata_file, sample_id)
+		global_attrs = {
+			**{
+				"SampleID": sample_id,
+				"AmbientUMIs": self.ambient_umis,
+				"RedundantReadFraction": 1 - self.bus_valid.sum() / self.n_records,
+				"AmbientPValue": self.ambient_pvalue,
+				"BarcodeTotalUMIs": self.total_umis,
+				"CellBarcodes": self.valid_cells
+			}, **metadata}
+
+		layers = self.layers.copy()
+		layers[""] = self.matrix
+		create(out_file, layers, row_attrs, col_attrs, file_attrs=global_attrs)
+
+
+def execute(cmd: List[str], synchronous: bool = False) -> Generator:
+	if synchronous:
+		yield os.popen(" ".join(cmd)).read()
+	else:
+		popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)  # type: ignore
+		for stdout_line in iter(popen.stdout.readline, ""):
+			yield stdout_line
+		popen.stdout.close()
+		return_code = popen.wait()
+		if return_code:
+			raise subprocess.CalledProcessError(return_code, cmd)
+
+
+def create_from_fastq(out_file: str, sample_id: str, fastqs: List[str], index_path: str, samples_metadata_file: str, n_threads: int = 1, temp_folder: str = None, synchronous: bool = False) -> None:
+	"""
+	Args:
+		technology			String like "10xv2" or None to read the technology from the sample metadata file
+		expected_n_cells	Expected number of cells captured in the sample, or None to read the number from the sample metadata file
+		samples_metadata_file	Path to tab-delimited file with one header row OR path to sqlite database with one table called "sample"
+	
+	Remarks:
+		Samples metadata table should contain these columns:
+			name			Sample name (i.e. sample id)
+			chemistry		10x chemistry version (v1, v2 or v3)
+			targetnumcells	Number of cells expected in the sample
+	"""
+	manifest_file = os.path.join(index_path, "manifest.json")
+	if not os.path.exists(manifest_file):
+		raise ValueError(f"Manifest file 'manifest.json' was missing from index at '{index_path}'")
+	for fastq in fastqs:
+		if not os.path.exists(fastq):
+			raise ValueError(f"Fastq file '{fastq}' was not found")
+	if not os.path.exists(samples_metadata_file):
+		raise ValueError("Samples metadata file not found")
+	with open(manifest_file) as f:
+		manifest = json.load(f)
+	
+	metadata = load_sample_metadata(samples_metadata_file, sample_id)
+
+	if (("technology" not in metadata) and ("chemistry" not in metadata)) or ("targetnumcells" not in metadata):
+		print(metadata.keys())
+		raise ValueError("Samples metadata must contain columns 'targetnumcells' and either 'chemistry' or 'technology'")
+	if "technology" in metadata:
+		technology = metadata["technology"]
+	else:
+		technology = "10x" + metadata["chemistry"]
+	try:
+		expected_n_cells = int(metadata["targetnumcells"])
+	except:
+		expected_n_cells = 5000
+
+	whitelist_file: Optional[str] = os.path.join(index_path, f"{technology}_whitelist.txt")
+	if not os.path.exists(whitelist_file):  # type: ignore
+		logging.warning(f"Barcode whitelist file {whitelist_file} not found in index folder at '{index_path}'; barcode correction will be skipped.")
+		whitelist_file = None
+
+	with TemporaryDirectory() as d:
+		if temp_folder is not None:
+			d = temp_folder
+			if not os.path.exists(d):
+				os.mkdir(d)
+		cmd = ["kallisto", "bus", "-i", os.path.join(index_path, manifest["index_file"]), "-o", d, "-x", technology, "-t", str(n_threads)] + fastqs
+		logging.info(" ".join(cmd))
+		for line in execute(cmd, synchronous):
+			if line != "\n":
+				logging.info(line[:-1])
+
+		run_info: Optional[Dict[str, str]] = None
+		try:
+			with open(os.path.join(d, "run_info.json")) as f:
+				run_info = json.load(f)
+		except json.JSONDecodeError as e:
+			with open(os.path.join(d, "run_info.json")) as f:
+				for line in f:
+					logging.error(line)
+			logging.error(f"Error decoding run_info.json: {e}")
+		bus = BusFile(
+			os.path.join(d, "output.bus"),
+			os.path.join(index_path, manifest["gene_metadata_file"]),
+			manifest["gene_metadata_key"],
+			os.path.join(index_path, manifest["fragments_to_genes_file"]),
+			os.path.join(d, "matrix.ec"),
+			os.path.join(d, "transcripts.txt")
+		)
+		logging.info(f"Found {bus.n_records:,} records for {bus.n_genes:,} genes and {bus.n_cells:,} uncorrected cell barcodes.")
+		if whitelist_file is None:
+			logging.warning("Not correcting barcodes, because whitelist file was not provided.")
+		else:
+			logging.info("Correcting cell barcodes")
+		bus.correct(whitelist_file)
+		logging.info(f"Found {bus.n_cells:,} corrected cell barcodes.")
+		logging.info("Removing redundant reads using UMIs")
+		bus.deduplicate()
+		seq_sat = 1 - bus.bus_valid.sum() / bus.n_records
+		logging.info(f"{int(seq_sat * 100)}% sequencing saturation.")
+		bus.count()
+		logging.info(f"Found {bus.matrix.count_nonzero():,} UMIs.")
+		for layer, layer_def in manifest["layers"].items():
+			bus.count_layer(layer, os.path.join(index_path, layer_def))
+			logging.info(f"Found {bus.layers[layer].count_nonzero():,} UMIs.")
+		bus.remove_empty_beads(expected_n_cells)
+		logging.info(f"Creating loom file '{out_file}'")
+		bus.save(out_file, sample_id, samples_metadata_file)
+		with connect(out_file) as ds:
+			ds.attrs.Species = manifest["species"]
+			ds.attrs.Saturation = seq_sat
+			if run_info is not None:
+				ds.attrs.NumReadsProcessed = int(run_info["n_processed"])
+				ds.attrs.NumPseudoaligned = int(run_info["n_pseudoaligned"])
+				ds.attrs.KallistoCommand = run_info["call"]
+				ds.attrs.KallistoVersion = run_info["kallisto_version"]
diff --git a/loompy/cell_calling.py b/loompy/cell_calling.py
new file mode 100644
index 0000000..9a165d0
--- /dev/null
+++ b/loompy/cell_calling.py
@@ -0,0 +1,352 @@
+# Original algorithm was published by John Marioni and colleagues as EmptyDrops (Lun, A. et al. Distinguishing cells from empty droplets in droplet-based single-cell RNA sequencing data.)
+
+# This implementation is based on the code in cellranger v3.0 by 10x Genomics
+
+# Copyright 2018 10X Genomics, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import logging
+
+import numpy as np
+import scipy.sparse as sparse
+import scipy.stats as sp_stats
+
+# Simple Good-Turing estimator.
+# Based on S implementation in
+# William A. Gale & Geoffrey Sampson (1995) Good-turing frequency estimation without tears,
+# Journal of Quantitative Linguistics, 2:3, 217-237, DOI: 10.1080/09296179508590051
+
+
+class SimpleGoodTuringError(Exception):
+	pass
+
+
+def _averaging_transform(r, nr):
+	d = np.concatenate((np.ones(1, dtype=int), np.diff(r)))
+	dr = np.concatenate((
+		0.5 * (d[1:] + d[0:-1]),
+		np.array((d[-1],), dtype=float),
+	))
+	return nr.astype(float) / dr
+
+
+def _rstest(r, coef):
+	return r * np.power(1 + 1 / r, 1 + coef)
+
+
+def simple_good_turing(xr, xnr):
+	"""Make a Simple Good-Turing estimate of the frequencies.
+
+	Args:
+	xr (np.array(int)): Non-zero item frequencies
+	xnr (np.array(int)): Non-zero frequencies of frequencies
+	Returns:
+	(rstar (np.array(float)), p0 (float)):
+		rstar: The adjusted non-zero frequencies
+		p0: The total probability of unobserved items
+	"""
+
+	xr = xr.astype(float)
+	xnr = xnr.astype(float)
+
+	xN = np.sum(xr * xnr)
+
+	# Get Linear Good-Turing estimate
+	xnrz = _averaging_transform(xr, xnr)
+	slope, intercept, _, _, _ = sp_stats.linregress(np.log(xr), np.log(xnrz))
+
+	if slope > -1:
+		raise SimpleGoodTuringError("The log-log slope is > -1 (%d); the SGT estimator is not applicable to these data." % slope)
+
+	xrst = _rstest(xr, slope)
+	xrstrel = xrst / xr
+
+	# Get traditional Good-Turing estimate
+	xrtry = xr == np.concatenate((xr[1:] - 1, np.zeros(1)))
+	xrstarel = np.zeros(len(xr))
+	xrstarel[xrtry] = (xr[xrtry] + 1) / xr[xrtry] * np.concatenate((xnr[1:], np.zeros(1)))[xrtry] / xnr[xrtry]
+
+	# Determine when to switch from GT to LGT estimates
+	tursd = np.ones(len(xr))
+	for i in range(len(xr)):
+		if xrtry[i]:
+			tursd[i] = float(i + 2) / xnr[i] * np.sqrt(xnr[i + 1] * (1 + xnr[i + 1] / xnr[i]))
+
+	xrstcmbrel = np.zeros(len(xr))
+	useturing = True
+	for r in range(len(xr)):
+		if not useturing:
+			xrstcmbrel[r] = xrstrel[r]
+		else:
+			if np.abs(xrstrel[r] - xrstarel[r]) * (1 + r) / tursd[r] > 1.65:
+				xrstcmbrel[r] = xrstarel[r]
+			else:
+				useturing = False
+				xrstcmbrel[r] = xrstrel[r]
+
+	# Renormalize the probabilities for observed objects
+	sumpraw = np.sum(xrstcmbrel * xr * xnr / xN)
+
+	xrstcmbrel = xrstcmbrel * (1 - xnr[0] / xN) / sumpraw
+	p0 = xnr[0] / xN
+
+	return (xr * xrstcmbrel, p0)
+
+
+def sgt_proportions(frequencies):
+	"""Use Simple Good-Turing estimate to adjust for unobserved items
+
+	Args:
+	frequencies (np.array(int)): Nonzero frequencies of items
+	Returns:
+	(pstar (np.array(float)), p0 (float)):
+		pstar: The adjusted non-zero proportions
+		p0: The total probability of unobserved items
+	"""
+	if len(frequencies) == 0:
+		raise ValueError("Input frequency vector is empty")
+	if np.count_nonzero(frequencies) != len(frequencies):
+		raise ValueError("Frequencies must be greater than zero")
+
+	freqfreqs = np.bincount(frequencies.astype(np.int64))
+	assert freqfreqs[0] == 0
+	use_freqs = np.flatnonzero(freqfreqs)
+
+	if len(use_freqs) < 10:
+		raise SimpleGoodTuringError("Too few non-zero frequency items (%d). Aborting SGT." % len(use_freqs))
+
+	rstar, p0 = simple_good_turing(use_freqs, freqfreqs[use_freqs])
+
+	# rstar contains the smoothed frequencies.
+	# Map each original frequency r to its smoothed rstar.
+	rstar_dict = dict(zip(use_freqs, rstar))
+
+	rstar_sum = np.sum(freqfreqs[use_freqs] * rstar)
+	rstar_i = np.fromiter((rstar_dict[f] for f in frequencies), dtype=float, count=len(frequencies))
+	pstar = (1 - p0) * (rstar_i / rstar_sum)
+
+	assert np.isclose(p0 + np.sum(pstar), 1)
+	return (pstar, p0)
+
+
+def adjust_pvalue_bh(p):
+	""" Multiple testing correction of p-values using the Benjamini-Hochberg procedure """
+	descending = np.argsort(p)[::-1]
+	# q = p * N / k where p = p-value, N = # tests, k = p-value rank
+	scale = float(len(p)) / np.arange(len(p), 0, -1)
+	q = np.minimum(1, np.minimum.accumulate(scale * p[descending]))
+
+	# Return to original order
+	return q[np.argsort(descending)]
+
+
+def eval_multinomial_loglikelihoods(matrix, profile_p, max_mem_gb=0.1):
+	"""Compute the multinomial log PMF for many barcodes
+	Args:
+	matrix (scipy.sparse.csc_matrix): Matrix of UMI counts (feature x barcode)
+	profile_p (np.ndarray(float)): Multinomial probability vector
+	max_mem_gb (float): Try to bound memory usage.
+	Returns:
+	log_likelihoods (np.ndarray(float)): Log-likelihood for each barcode
+	"""
+	gb_per_bc = float(matrix.shape[0] * matrix.dtype.itemsize) / (1024**3)
+	bcs_per_chunk = max(1, int(round(max_mem_gb / gb_per_bc)))
+	num_bcs = matrix.shape[1]
+
+	loglk = np.zeros(num_bcs)
+
+	for chunk_start in range(0, num_bcs, bcs_per_chunk):
+		chunk = slice(chunk_start, chunk_start + bcs_per_chunk)
+		matrix_chunk = matrix[:, chunk].transpose().toarray()
+		n = matrix_chunk.sum(1)
+		loglk[chunk] = sp_stats.multinomial.logpmf(matrix_chunk, n, p=profile_p)
+	return loglk
+
+
+def simulate_multinomial_loglikelihoods(profile_p, umis_per_bc, num_sims=1000, jump=1000, n_sample_feature_block=1000000, verbose=False):
+	"""Simulate draws from a multinomial distribution for various values of N.
+	Uses the approximation from Lun et al. ( https://www.biorxiv.org/content/biorxiv/early/2018/04/04/234872.full.pdf )
+	Args:
+	profile_p (np.ndarray(float)): Probability of observing each feature.
+	umis_per_bc (np.ndarray(int)): UMI counts per barcode (multinomial N).
+	num_sims (int): Number of simulations per distinct N value.
+	jump (int): Vectorize the sampling if the gap between two distinct Ns exceeds this.
+	n_sample_feature_block (int): Vectorize this many feature samplings at a time.
+	Returns:
+	(distinct_ns (np.ndarray(int)), log_likelihoods (np.ndarray(float)):
+	distinct_ns is an array containing the distinct N values that were simulated.
+	log_likelihoods is a len(distinct_ns) x num_sims matrix containing the
+		simulated log likelihoods.
+	"""
+	distinct_n = np.flatnonzero(np.bincount(umis_per_bc.astype(np.int64)))
+
+	loglk = np.zeros((len(distinct_n), num_sims), dtype=float)
+
+	sampled_features = np.random.choice(len(profile_p), size=n_sample_feature_block, p=profile_p, replace=True)
+	k = 0
+
+	log_profile_p = np.log(profile_p)
+
+	for sim_idx in range(num_sims):
+		curr_counts = np.ravel(sp_stats.multinomial.rvs(distinct_n[0], profile_p, size=1))
+
+		curr_loglk = sp_stats.multinomial.logpmf(curr_counts, distinct_n[0], p=profile_p)
+
+		loglk[0, sim_idx] = curr_loglk
+
+		for i in range(1, len(distinct_n)):
+			step = distinct_n[i] - distinct_n[i - 1]
+			if step >= jump:
+				# Instead of iterating for each n, sample the intermediate ns all at once
+				curr_counts += np.ravel(sp_stats.multinomial.rvs(step, profile_p, size=1))
+				curr_loglk = sp_stats.multinomial.logpmf(curr_counts, distinct_n[i], p=profile_p)
+				assert not np.isnan(curr_loglk)
+			else:
+				# Iteratively sample between the two distinct values of n
+				for n in range(distinct_n[i - 1] + 1, distinct_n[i] + 1):
+					j = sampled_features[k]
+					k += 1
+					if k >= n_sample_feature_block:
+						# Amortize this operation
+						sampled_features = np.random.choice(len(profile_p), size=n_sample_feature_block, p=profile_p, replace=True)
+						k = 0
+					curr_counts[j] += 1
+					curr_loglk += log_profile_p[j] + np.log(float(n) / curr_counts[j])
+
+			loglk[i, sim_idx] = curr_loglk
+
+	return distinct_n, loglk
+
+
+def compute_ambient_pvalues(umis_per_bc, obs_loglk, sim_n, sim_loglk):
+	"""Compute p-values for observed multinomial log-likelihoods
+	Args:
+	umis_per_bc (nd.array(int)): UMI counts per barcode
+	obs_loglk (nd.array(float)): Observed log-likelihoods of each barcode deriving from an ambient profile
+	sim_n (nd.array(int)): Multinomial N for simulated log-likelihoods
+	sim_loglk (nd.array(float)): Simulated log-likelihoods of shape (len(sim_n), num_simulations)
+	Returns:
+	pvalues (nd.array(float)): p-values
+	"""
+	assert len(umis_per_bc) == len(obs_loglk)
+	assert sim_loglk.shape[0] == len(sim_n)
+
+	# Find the index of the simulated N for each barcode
+	sim_n_idx = np.searchsorted(sim_n, umis_per_bc)
+	num_sims = sim_loglk.shape[1]
+
+	num_barcodes = len(umis_per_bc)
+
+	pvalues = np.zeros(num_barcodes)
+
+	for i in range(num_barcodes):
+		num_lower_loglk = np.sum(sim_loglk[sim_n_idx[i], :] < obs_loglk[i])
+		pvalues[i] = float(1 + num_lower_loglk) / (1 + num_sims)
+	return pvalues
+
+
+def estimate_profile_sgt(matrix, barcode_indices, nz_feat):
+	""" Estimate a gene expression profile by Simple Good Turing.
+	Args:
+	raw_mat (sparse matrix): Sparse matrix of all counts
+	barcode_indices (np.array(int)): Barcode indices to use
+	nz_feat (np.array(int)): Indices of features that are non-zero at least once
+	Returns:
+	profile (np.array(float)): Estimated probabilities of length len(nz_feat).
+	"""
+	# Initial profile estimate
+	prof_mat = matrix[:, barcode_indices]
+
+	profile = np.ravel(prof_mat[nz_feat, :].sum(axis=1))
+	zero_feat = np.flatnonzero(profile == 0)
+
+	# Simple Good Turing estimate
+	p_smoothed, p0 = sgt_proportions(profile[np.flatnonzero(profile)])
+
+	# Distribute p0 equally among the zero elements.
+	p0_i = p0 / len(zero_feat)
+
+	profile_p = np.repeat(p0_i, len(nz_feat))
+	profile_p[np.flatnonzero(profile)] = p_smoothed
+
+	assert np.isclose(profile_p.sum(), 1.0)
+	return profile_p
+
+
+# Construct a background expression profile from barcodes with <= T UMIs
+def est_background_profile_sgt(matrix, use_bcs):
+	""" Estimate a gene expression profile on a given subset of barcodes.
+		Use Good-Turing to smooth the estimated profile.
+	Args:
+	matrix (scipy.sparse.csc_matrix): Sparse matrix of all counts
+	use_bcs (np.array(int)): Indices of barcodes to use (col indices into matrix)
+	Returns:
+	profile (use_features, np.array(float)): Estimated probabilities of length use_features.
+	"""
+	# Use features that are nonzero anywhere in the data
+	use_feats = np.flatnonzero(np.asarray(matrix.sum(1)))
+
+	# Estimate background profile
+	bg_profile_p = estimate_profile_sgt(matrix, use_bcs, use_feats)
+
+	return (use_feats, bg_profile_p)
+
+
+# Sten Linnarsson's version (Aug 2019)
+def call_cells(matrix: sparse.csr_matrix, expected_n_cells: int = 5000) -> np.ndarray:
+	"""
+	Determine likely true cells among the barcodes by contrasting with the ambient RNA profile
+
+	Args:
+		matrix: 			expression matrix
+		expected_n_cells:	expected number of true cells in the sample
+
+	Returns:
+		calls:	vector of bools indicating true cell barcodes
+	"""
+	n_barcodes = matrix.shape[1]
+	expected_n_cells = min(expected_n_cells, n_barcodes // 5)
+	total_umis = np.array(matrix.sum(axis=0))[0]  # total UMIs per barcode
+	# upper limit of UMIs for barcodes considered ambient, calculated as greatest UMI count after removing twice the expected number of cells
+	max_ambient_umis = np.percentile(total_umis, 100 * (n_barcodes - expected_n_cells * 2) / n_barcodes)
+	# median number of UMIs among the top expected_n_cells barcodes
+	median_initial_umis = np.median(total_umis[total_umis > np.percentile(total_umis, 100 * (n_barcodes - expected_n_cells) / n_barcodes)])
+	min_cell_umis = int(max(500, median_initial_umis * 0.1))  # 10% of median, but at least 500 UMIs
+
+	# Ambient RNA beads, covering the range 20 to max_amient_umis
+	ambient_bcs = (total_umis < max_ambient_umis) & (total_umis > 20)
+	if ambient_bcs.sum() == 0:
+		# No beads were ambient, because cells had very low UMIs
+		logging.warning("No ambient RNA beads were found; maybe sample had too few cells?")
+		return max_ambient_umis, np.ones_like(total_umis)
+	try:
+		eval_features, ambient_profile_p = est_background_profile_sgt(matrix, ambient_bcs)
+	except SimpleGoodTuringError as e:
+		logging.error(e)
+		return max_ambient_umis, np.ones_like(total_umis)
+
+	# Evaluate candidate barcodes
+	eval_bcs = total_umis > min_cell_umis
+	eval_mat = matrix[eval_features, :][:, eval_bcs]
+
+	# Compute observed log-likelihood of barcodes being generated from ambient RNA
+	obs_loglk = eval_multinomial_loglikelihoods(eval_mat, ambient_profile_p)
+
+	# Simulate log likelihoods
+	distinct_ns, sim_loglk = simulate_multinomial_loglikelihoods(ambient_profile_p, total_umis[eval_bcs], num_sims=1000, verbose=True)
+
+	# Compute p-values
+	pvalues = compute_ambient_pvalues(total_umis[eval_bcs], obs_loglk, distinct_ns, sim_loglk)
+	pvalues_adj = adjust_pvalue_bh(pvalues)
+	pvalues_adj_all = np.ones_like(total_umis)
+	pvalues_adj_all[eval_bcs] = pvalues_adj
+	return max_ambient_umis, pvalues_adj_all
diff --git a/loompy/color.py b/loompy/color.py
deleted file mode 100644
index 61c8482..0000000
--- a/loompy/color.py
+++ /dev/null
@@ -1,367 +0,0 @@
-import colorsys
-import json
-import os
-import random
-import sys
-from typing import *
-
-import matplotlib.colors as colors
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn.preprocessing import LabelEncoder
-
-# See https://graphicdesign.stackexchange.com/questions/3682/where-can-i-find-a-large-palette-set-of-contrasting-colors-for-coloring-many-d
-
-# Adapted from https://github.com/kevinwuhoo/randomcolor-py
-
-# Copyright (c) 2016 Kevin Wu
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-_hues = {
-	"blue": {
-		"hue_range": [179, 257],
-		"lower_bounds": [
-			[20, 100],
-			[30, 86],
-			[40, 80],
-			[50, 74],
-			[60, 60],
-			[70, 52],
-			[80, 44],
-			[90, 39],
-			[100, 35]
-		]
-	},
-	"green": {
-		"hue_range": [63, 178],
-		"lower_bounds": [
-			[30, 100],
-			[40, 90],
-			[50, 85],
-			[60, 81],
-			[70, 74],
-			[80, 64],
-			[90, 50],
-			[100, 40]
-		]
-	},
-	"monochrome": {
-		"hue_range": [0, 0],
-		"lower_bounds": [
-			[0, 0],
-			[100, 0]
-		]
-	},
-	"orange": {
-		"hue_range": [19, 46],
-		"lower_bounds": [
-			[20, 100],
-			[30, 93],
-			[40, 88],
-			[50, 86],
-			[60, 85],
-			[70, 70],
-			[100, 70]
-		]
-	},
-	"pink": {
-		"hue_range": [283, 334],
-		"lower_bounds": [
-			[20, 100],
-			[30, 90],
-			[40, 86],
-			[60, 84],
-			[80, 80],
-			[90, 75],
-			[100, 73]
-		]
-	},
-	"purple": {
-		"hue_range": [258, 282],
-		"lower_bounds": [
-			[20, 100],
-			[30, 87],
-			[40, 79],
-			[50, 70],
-			[60, 65],
-			[70, 59],
-			[80, 52],
-			[90, 45],
-			[100, 42]
-		]
-	},
-	"red": {
-		"hue_range": [-26, 18],
-		"lower_bounds": [
-			[20, 100],
-			[30, 92],
-			[40, 89],
-			[50, 85],
-			[60, 78],
-			[70, 70],
-			[80, 60],
-			[90, 55],
-			[100, 50]
-		]
-	},
-	"yellow": {
-		"hue_range": [47, 62],
-		"lower_bounds": [
-			[25, 100],
-			[40, 94],
-			[50, 89],
-			[60, 86],
-			[70, 84],
-			[80, 82],
-			[90, 80],
-			[100, 75]
-		]
-	}
-}
-
-
-class RandomColor:
-	def __init__(self, seed=None):
-		# Load color dictionary and populate the color dictionary
-		self.colormap = _hues
-		self.seed = seed if seed else random.randint(0, sys.maxsize)
-		self.random = random.Random(self.seed)
-
-		for color_name, color_attrs in self.colormap.items():
-			lower_bounds = color_attrs['lower_bounds']
-			s_min = lower_bounds[0][0]
-			s_max = lower_bounds[len(lower_bounds) - 1][0]
-
-			b_min = lower_bounds[len(lower_bounds) - 1][1]
-			b_max = lower_bounds[0][1]
-
-			self.colormap[color_name]['saturation_range'] = [s_min, s_max]
-			self.colormap[color_name]['brightness_range'] = [b_min, b_max]
-
-	def generate(self, hue=None, luminosity=None, count=1, format_='hex'):
-		colors = []
-		for _ in range(count):
-			# First we pick a hue (H)
-			H = self.pick_hue(hue)
-
-			# Then use H to determine saturation (S)
-			S = self.pick_saturation(H, hue, luminosity)
-
-			# Then use S and H to determine brightness (B).
-			B = self.pick_brightness(H, S, luminosity)
-
-			# Then we return the HSB color in the desired format
-			colors.append(self.set_format([H, S, B], format_))
-
-		return colors
-
-	def pick_hue(self, hue):
-		hue_range = self.get_hue_range(hue)
-		hue = self.random_within(hue_range)
-
-		# Instead of storing red as two seperate ranges,
-		# we group them, using negative numbers
-		if (hue < 0):
-			hue += 360
-
-		return hue
-
-	def pick_saturation(self, hue, hue_name, luminosity):
-
-		if luminosity == 'random':
-			return self.random_within([0, 100])
-
-		if hue_name == 'monochrome':
-			return 0
-
-		saturation_range = self.get_saturation_range(hue)
-
-		s_min = saturation_range[0]
-		s_max = saturation_range[1]
-
-		if luminosity == 'bright':
-			s_min = 55
-		elif luminosity == 'dark':
-			s_min = s_max - 10
-		elif luminosity == 'light':
-			s_max = 55
-
-		return self.random_within([s_min, s_max])
-
-	def pick_brightness(self, H, S, luminosity):
-		b_min = self.get_minimum_brightness(H, S)
-		b_max = 100
-
-		if luminosity == 'dark':
-			b_max = b_min + 20
-		elif luminosity == 'light':
-			b_min = (b_max + b_min) / 2
-		elif luminosity == 'random':
-			b_min = 0
-			b_max = 100
-
-		return self.random_within([b_min, b_max])
-
-	def set_format(self, hsv, format_):
-		if 'hsv' in format_:
-			color = hsv
-		elif 'rgb' in format_:
-			color = self.hsv_to_rgb(hsv)
-		elif 'hex' in format_:
-			r, g, b = self.hsv_to_rgb(hsv)
-			return '#%02x%02x%02x' % (r, g, b)
-		else:
-			return "unrecognized format"
-
-		if "Array" in format_ or format_ == 'hex':
-			return color
-		else:
-			prefix = format_[:3]
-			color_values = [str(x) for x in color]
-			return "%s(%s)" % (prefix, ', '.join(color_values))
-
-	def get_minimum_brightness(self, H, S):
-		lower_bounds = self.get_color_info(H)['lower_bounds']
-
-		for i in range(len(lower_bounds) - 1):
-			s1 = lower_bounds[i][0]
-			v1 = lower_bounds[i][1]
-
-			s2 = lower_bounds[i + 1][0]
-			v2 = lower_bounds[i + 1][1]
-
-			if s1 <= S <= s2:
-				m = (v2 - v1) / (s2 - s1)
-				b = v1 - m * s1
-
-				return m * S + b
-
-		return 0
-
-	def get_hue_range(self, color_input):
-		if color_input and color_input.isdigit():
-			number = int(color_input)
-
-			if 0 < number < 360:
-				return [number, number]
-
-		elif color_input and color_input in self.colormap:
-				color = self.colormap[color_input]
-				if 'hue_range' in color:
-					return color['hue_range']
-
-		else:
-			return [0, 360]
-
-	def get_saturation_range(self, hue):
-		return self.get_color_info(hue)['saturation_range']
-
-	def get_color_info(self, hue):
-		# Maps red colors to make picking hue easier
-		if 334 <= hue <= 360:
-			hue -= 360
-
-		for color_name, color in self.colormap.items():
-			if color['hue_range'] and color['hue_range'][0] <= hue <= color['hue_range'][1]:
-				return self.colormap[color_name]
-
-		# this should probably raise an exception
-		return 'Color not found'
-
-	def random_within(self, r):
-		return self.random.randint(int(r[0]), int(r[1]))
-
-	@classmethod
-	def hsv_to_rgb(cls, hsv):
-		h, s, v = hsv
-		h = 1 if h == 0 else h
-		h = 359 if h == 360 else h
-
-		h = float(h) / 360
-		s = float(s) / 100
-		v = float(v) / 100
-
-		rgb = colorsys.hsv_to_rgb(h, s, v)
-		return [int(c * 255) for c in rgb]
-
-
-zviridis = colors.LinearSegmentedColormap.from_list("zviridis", [(0.9, 0.9, 0.9, 1)] + list(plt.cm.viridis(np.arange(1000) / 1000)), N=1001)
-
-_color_alphabet = [
-	'#efa2fe',
-	'#0075db',
-	'#983f00',
-	'#4c005c',
-	'#005c31',
-	'#2bcd48',
-	'#fecb98',
-	'#808080',
-	'#93feb4',
-	'#8e7c00',
-	'#9ccb00',
-	'#c10087',
-	'#003380',
-	'#fea305',
-	'#fea7ba',
-	'#426600',
-	'#fe0010',
-	'#5ef0f1',
-	'#00988e',
-	'#dffe66',
-	'#740afe',
-	'#980000',
-	'#fefe80',
-	'#fefe00',
-	'#fe5005'
-]
-
-
-def cat_colors(N: int = 1, *, hue: str = None, luminosity: str = None, bgvalue: int = None, loop: bool = False, seed: str = "cat") -> Union[List[Any], colors.LinearSegmentedColormap]:
-	"""
-	Return a colormap suitable for N categorical values, optimized to be both aesthetically pleasing and perceptually distinct.
-
-	Args:
-		N			The number of colors requested.
-		hue			Controls the hue of the generated color. You can pass a string representing a color name: "red", "orange", "yellow", "green", "blue", "purple", "pink" and "monochrome" are currently supported. If you pass a hexidecimal color string such as "#00FFFF", its hue value will be used to generate colors.
-		luminosity	Controls the luminosity of the generated color: "bright", "light" or "dark".
-		bgvalue		If not None, then the corresponding index color will be set to light gray
-		loop		If True, loop the color alphabet instead of generating random colors
-		seed		If not None, use as the random seed (default: "cat")
-	
-	Returns:
-		A set of colors in the requested format, either a list of values or a matplotlib LinearSegmentedColormap (when format="cmap")
-		If N <= 25 and hue and luminosity are both None, a subset of the optimally perceptually distinct "color alphabet" is returned.
-		Else, a pleasing set of random colors is returned.
-
-		Colors are designed to be displayed on a white background.
-	"""
-	c: List[str] = []
-	if N <= 25 and hue is None and luminosity is None:
-		c = _color_alphabet[:N]
-	elif not loop:
-		c = RandomColor(seed=seed).generate(count=N, hue=hue, luminosity=luminosity, format_="hex")
-	else:
-		n = N
-		while n > 0:
-			c += _color_alphabet[:n]
-			n -= 25
-	if bgvalue is not None:
-		c[bgvalue] = "#aaaaaa"
-	return colors.LinearSegmentedColormap.from_list("", c, N)
diff --git a/loompy/commands.py b/loompy/commands.py
new file mode 100644
index 0000000..05a674f
--- /dev/null
+++ b/loompy/commands.py
@@ -0,0 +1,34 @@
+import logging
+import os
+import sys
+from typing import List
+
+import click
+
+from ._version import __version__
+from .bus_file import create_from_fastq
+
+
+@click.group()
+@click.option('--show-message/--hide-message', default=True)
+@click.option('--verbosity', default="info", type=click.Choice(['error', 'warning', 'info', 'debug']))
+def cli(show_message: bool = True, verbosity: str = "info") -> None:
+	level = {"error": 40, "warning": 30, "info": 20, "debug": 10}[verbosity]
+	logging.basicConfig(stream=sys.stdout, format='%(asctime)s - %(levelname)s - %(message)s', level=level)
+	logging.captureWarnings(True)
+
+	if show_message:
+		print(f"Loompy v{__version__} by Linnarsson Lab 🌸 (http://linnarssonlab.org & http://loompy.org)")
+		print()
+
+
+@cli.command()
+@click.argument('loomfile', required=True)
+@click.argument('sampleid', required=True)
+@click.argument('indexdir', required=True)
+@click.argument('metadatafile', required=True)
+@click.argument('fastqs', required=True, nargs=-1)
+@click.option('--threads', default=os.cpu_count(), help="Number of threads to use")
+def fromfq(loomfile: str, sampleid: str, indexdir: str, metadatafile: str, threads: int, fastqs: List[str]) -> None:
+	logging.info(f"Using {threads} threads.")
+	create_from_fastq(loomfile, sampleid, list(fastqs), indexdir, metadatafile, threads)
diff --git a/loompy/file_attribute_manager.py b/loompy/file_attribute_manager.py
deleted file mode 100644
index 323e905..0000000
--- a/loompy/file_attribute_manager.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from typing import *
-import scipy.sparse as sparse
-import numpy as np
-import loompy
-import warnings
-with warnings.catch_warnings():
-	warnings.simplefilter("ignore")
-	import h5py
-
-
-class FileAttributeManager(object):
-	def __init__(self, f: h5py.File) -> None:
-		setattr(self, "!f", f)
-		storage: Dict[str, str] = {}
-		setattr(self, "!storage", storage)
-		for key, val in f.attrs.items():
-			materialized = loompy.materialize_attr_values(val)
-			self.__dict__["storage"][key] = materialized
-
-	def keys(self) -> List[str]:
-		return list(self.__dict__["storage"].keys())
-
-	def items(self) -> Iterable[Tuple[str, sparse.coo_matrix]]:
-		for key in self.keys():
-			yield (key, self[key])
-
-	def __len__(self) -> int:
-		return len(self.keys())
-
-	def __contains__(self, name: str) -> bool:
-		return name in self.keys()
-
-	def __iter__(self) -> Iterator[str]:
-		for key in self.keys():
-			yield key
-
-	def __getitem__(self, thing: Any) -> np.ndarray:
-		return self.__getattr__(thing)
-
-	def __getattr__(self, name: str) -> np.ndarray:
-		try:
-			return self.__dict__["storage"][name]
-		except KeyError:
-			if self.f is not None:
-				if name in self.f.attrs:
-					val = self.f.attrs[name]
-					materialized = loompy.materialize_attr_values(val)
-					self.__dict__["storage"][name] = materialized
-					return materialized
-			raise AttributeError(f"'{type(self)}' object has no attribute '{name}'")
-
-	def __setitem__(self, name: str, val: Any) -> None:
-		return self.__setattr__(name, val)
-
-	def __setattr__(self, name: str, val: Any) -> None:
-		if name.startswith("!"):
-			super(FileAttributeManager, self).__setattr__(name[1:], val)
-		elif "/" in name:
-			raise KeyError("Attribute name cannot contain slash (/)")
-		else:
-			if self.f is not None:
-				normalized = loompy.normalize_attr_values(val)
-				self.f.attrs[name] = normalized
-				self.f.flush()
-				val = self.f.attrs[name]
-				# Read it back in to ensure it's synced and normalized
-				normalized = loompy.materialize_attr_values(val)
-				self.__dict__["storage"][name] = normalized
-
-	def __delattr__(self, name: str) -> None:
-		if name.startswith("!"):
-			super(FileAttributeManager, self).__delattr__(name[1:])
-		else:
-			if self.f is not None:
-				del self.f.attrs[name]
-			del self.__dict__["storage"][name]
-
-	def get(self, name: str, default: Any = None) -> np.ndarray:
-		"""
-		Return the value for a named attribute if it exists, else default.
-		If default is not given, it defaults to None, so that this method never raises a KeyError.
-		"""
-		if name in self:
-			return self[name]
-		else:
-			return default
diff --git a/loompy/global_attribute_manager.py b/loompy/global_attribute_manager.py
new file mode 100644
index 0000000..1d703ee
--- /dev/null
+++ b/loompy/global_attribute_manager.py
@@ -0,0 +1,118 @@
+from typing import Dict, List, Iterable, Tuple, Any, Iterator
+import scipy.sparse as sparse
+import numpy as np
+import loompy
+import warnings
+with warnings.catch_warnings():
+	warnings.simplefilter("ignore")
+	import h5py
+from .utils import compare_loom_spec_version
+
+
+class GlobalAttributeManager(object):
+	def __init__(self, f: h5py.File) -> None:
+		setattr(self, "!f", f)
+		storage: Dict[str, str] = {}
+		setattr(self, "!storage", storage)
+		if "attrs" not in self.f:
+			for key, val in f.attrs.items():
+				materialized = loompy.materialize_attr_values(val)
+				self.__dict__["storage"][key] = materialized
+		else:
+			for key, val in f["attrs"].items():
+				materialized = loompy.materialize_attr_values(val[()])
+				self.__dict__["storage"][key] = materialized
+
+	def keys(self) -> List[str]:
+		return list(self.__dict__["storage"].keys())
+
+	def items(self) -> Iterable[Tuple[str, sparse.coo_matrix]]:
+		for key in self.keys():
+			yield (key, self[key])
+
+	def __len__(self) -> int:
+		return len(self.keys())
+
+	def __contains__(self, name: str) -> bool:
+		return name in self.keys()
+
+	def __iter__(self) -> Iterator[str]:
+		for key in self.keys():
+			yield key
+
+	def __getitem__(self, thing: Any) -> np.ndarray:
+		return self.__getattr__(thing)
+
+	def __getattr__(self, name: str) -> np.ndarray:
+		try:
+			return self.__dict__["storage"][name]
+		except KeyError:
+			if self.f is not None:
+				if loompy.compare_loom_spec_version(self.f, "3.0.0") < 0:
+					if name in self.f.attrs:
+						val = self.f.attrs[name]
+					else:
+						raise AttributeError(f"File has no global attribute '{name}'")
+				else:
+					if name in self.f["attrs"]:
+						val = self.f["attrs"][name]
+					else:
+						raise AttributeError(f"File has no global attribute '{name}'")
+				materialized = loompy.materialize_attr_values(val)
+				self.__dict__["storage"][name] = materialized
+				return materialized
+
+	def __setitem__(self, name: str, val: Any) -> None:
+		return self.__setattr__(name, val)
+
+	def __setattr__(self, name: str, val: Any) -> None:
+		if name.startswith("!"):
+			super(GlobalAttributeManager, self).__setattr__(name[1:], val)
+		elif "/" in name:
+			raise KeyError("Attribute name cannot contain slash (/)")
+		else:
+			if self.f is not None:
+				if loompy.compare_loom_spec_version(self.f, "3.0.0") < 0 and "attrs" not in self.f["/"]:
+					normalized = loompy.normalize_attr_values(val, False)
+					self.f.attrs[name] = normalized
+					self.f.flush()
+					val = self.f.attrs[name]
+					# Read it back in to ensure it's synced and normalized
+					normalized = loompy.materialize_attr_values(val)
+					self.__dict__["storage"][name] = normalized
+				else:
+					normalized = loompy.normalize_attr_values(val, True)
+					if name in self.f["attrs"]:
+						del self.f["attrs"][name]
+					if not np.isscalar(normalized) and normalized.dtype == np.object_:
+						self.ds._file.create_dataset("attrs/" + name, data=normalized, dtype=h5py.string_dtype(encoding="utf-8"))
+					else:
+						self.f["attrs"][name] = normalized
+					self.f.flush()
+					val = self.f["attrs"][name][()]
+					# Read it back in to ensure it's synced and normalized
+					normalized = loompy.materialize_attr_values(val)
+					self.__dict__["storage"][name] = normalized
+
+	def __delattr__(self, name: str) -> None:
+		if name.startswith("!"):
+			super(GlobalAttributeManager, self).__delattr__(name[1:])
+		else:
+			if self.f is not None:
+				if loompy.compare_loom_spec_version(self.f, "3.0.0") < 0:
+					if name in self.f.attrs:
+						del self.f.attrs[name]
+				else:
+					if name in self.f["attrs"]:
+						del self.f["attrs"][name]
+			del self.__dict__["storage"][name]
+
+	def get(self, name: str, default: Any = None) -> np.ndarray:
+		"""
+		Return the value for a named attribute if it exists, else default.
+		If default is not given, it defaults to None, so that this method never raises a KeyError.
+		"""
+		if name in self:
+			return self[name]
+		else:
+			return default
diff --git a/loompy/layer_manager.py b/loompy/layer_manager.py
index 5c45354..8327efc 100644
--- a/loompy/layer_manager.py
+++ b/loompy/layer_manager.py
@@ -146,7 +146,7 @@ def __setattr__(self, name: str, val: np.ndarray) -> None:
 
 				# Fill the matrix with sparse data
 				if sparse.issparse(val):
-					m = val.tocsr()
+					m = val.tocsc()
 					window = 6400
 					ix = 0
 					while ix < val.shape[1]:
diff --git a/loompy/loom_validator.py b/loompy/loom_validator.py
index ea6ce29..9d9cfa4 100644
--- a/loompy/loom_validator.py
+++ b/loompy/loom_validator.py
@@ -6,10 +6,10 @@
 
 
 class LoomValidator:
-	def __init__(self, version: str = "2.0.1") -> None:
+	def __init__(self, version: str = "3.0.0") -> None:
 		"""
 		Args:
-			version: 		The Loom file format version to validate against ("2.0.1" or "old")
+			version: 		The Loom file format version to validate against ("3.0.0", "2.0.1", "old")
 		
 		Remarks:
 			"old" version will accept files that lack the "row_graphs" and "col_graphs" groups
diff --git a/loompy/loom_view.py b/loompy/loom_view.py
index e1ec081..905e1ae 100644
--- a/loompy/loom_view.py
+++ b/loompy/loom_view.py
@@ -7,7 +7,7 @@ class LoomView:
 	"""
 	An in-memory loom dataset
 	"""
-	def __init__(self, layers: loompy.LayerManager, row_attrs: loompy.AttributeManager, col_attrs: loompy.AttributeManager, row_graphs: loompy.GraphManager, col_graphs: loompy.GraphManager, *, filename: str = "", file_attrs: loompy.FileAttributeManager = None) -> None:
+	def __init__(self, layers: loompy.LayerManager, row_attrs: loompy.AttributeManager, col_attrs: loompy.AttributeManager, row_graphs: loompy.GraphManager, col_graphs: loompy.GraphManager, *, filename: str = "", file_attrs: loompy.GlobalAttributeManager = None) -> None:
 		self.filename = filename
 		self.view = loompy.ViewManager(self)
 		self.layers = layers
diff --git a/loompy/loompy.py b/loompy/loompy.py
index 81ad760..a7a21f4 100755
--- a/loompy/loompy.py
+++ b/loompy/loompy.py
@@ -25,9 +25,8 @@
 import gzip
 import logging
 import os.path
-import time
 from shutil import copyfile
-from typing import *
+from typing import Tuple, Union, Any, Dict, List, Iterable, Callable, Optional
 
 import h5py
 import numpy as np
@@ -92,7 +91,7 @@ def __init__(self, filename: str, mode: str = 'r+', *, validate: bool = True, sp
 		self.view = loompy.ViewManager(self)  #: Create a view of the file by slicing this attribute, like ``ds.view[:100, :100]``
 		self.ra = loompy.AttributeManager(self, axis=0)  #: Row attributes, dict-like with np.ndarray values
 		self.ca = loompy.AttributeManager(self, axis=1)  #: Column attributes, dict-like with np.ndarray values
-		self.attrs = loompy.FileAttributeManager(self._file)  #: Global attributes
+		self.attrs = loompy.GlobalAttributeManager(self._file)  #: Global attributes
 		self.row_graphs = loompy.GraphManager(self, axis=0)  #: Row graphs, dict-like with values that are :class:`scipy.sparse.coo_matrix` objects
 		self.col_graphs = loompy.GraphManager(self, axis=1)  #: Column graphs, dict-like with values that are :class:`scipy.sparse.coo_matrix` objects
 
@@ -467,7 +466,7 @@ def ixs_thatsort_a2b(a: np.ndarray, b: np.ndarray, check_content: bool = True) -
 				ca = {key: v[selection] for key, v in other.col_attrs.items()}
 				if ordering is not None:
 					vals = {key: val[ordering, :] for key, val in vals.items()}
-				self.add_columns(vals, ca, fill_values=fill_values)
+				self.add_columns(vals, ca, fill_values=fill_values)  # type: ignore
 			
 			if include_graphs:
 				for gname in self.col_graphs.keys():
@@ -614,7 +613,7 @@ def scan(self, *, items: np.ndarray = None, axis: int = None, layers: Iterable =
 					temp = temp[ordering, :]
 					if selection is not None:
 						temp = temp[:, selection]
-					vals[layer] = loompy.MemoryLoomLayer(layer, temp)					
+					vals[layer] = loompy.MemoryLoomLayer(layer, temp)
 				lm = loompy.LayerManager(None)
 				for key, layer in vals.items():
 					lm[key] = loompy.MemoryLoomLayer(key, layer)
@@ -659,7 +658,7 @@ def scan(self, *, items: np.ndarray = None, axis: int = None, layers: Iterable =
 					ix += rows_per_chunk
 					continue
 
-				if selection.shape[0] ==rows_per_chunk:
+				if selection.shape[0] == rows_per_chunk:
 					selection = None  # Meaning, select all rows
 
 				# Load the whole chunk from the file, then extract genes and cells using fancy indexing
@@ -722,7 +721,6 @@ def batch_scan(self, cells: np.ndarray = None, genes: np.ndarray = None, axis: i
 					ix += cols_per_chunk
 					continue
 
-
 				# Load the whole chunk from the file, then extract genes and cells using fancy indexing
 				vals = self.layers[layer][:, ix:ix + cols_per_chunk]
 				vals = vals[genes, :]
@@ -851,7 +849,6 @@ def permute(self, ordering: np.ndarray, axis: int) -> None:
 			self.col_attrs._permute(ordering)
 			self.col_graphs._permute(ordering)
 
-
 	def aggregate(self, out_file: str = None, select: np.ndarray = None, group_by: Union[str, np.ndarray] = "Clusters", aggr_by: str = "mean", aggr_ca_by: Dict[str, str] = None) -> np.ndarray:
 		"""
 		Aggregate the Loom file by applying aggregation functions to the main matrix as well as to the column attributes
@@ -984,6 +981,7 @@ def new(filename: str, *, file_attrs: Optional[Dict[str, str]] = None) -> LoomCo
 	# Create the file (empty).
 	# Yes, this might cause an exception, which we prefer to send to the caller
 	f = h5py.File(name=filename, mode='w')
+	f.create_group('/attrs')  # v3.0.0
 	f.create_group('/layers')
 	f.create_group('/row_attrs')
 	f.create_group('/col_attrs')
@@ -994,9 +992,11 @@ def new(filename: str, *, file_attrs: Optional[Dict[str, str]] = None) -> LoomCo
 
 	ds = connect(filename, validate=False)
 	for vals in file_attrs:
-		ds.attrs[vals] = file_attrs[vals]
+		if file_attrs[vals] is None:
+			ds.attrs[vals] = "None"
+		else:
+			ds.attrs[vals] = file_attrs[vals]
 	# store creation date
-	currentTime = time.localtime(time.time())
 	ds.attrs['CreationDate'] = timestamp()
 	ds.attrs["LOOM_SPEC_VERSION"] = loompy.loom_spec_version
 	return ds
@@ -1047,15 +1047,18 @@ def create(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loomp
 		if layer.shape != shape:  # type: ignore
 			raise ValueError(f"Layer '{name}' is not the same shape as the main matrix")
 	for name, ra in row_attrs.items():
-		if ra.shape[0] != shape[0]:
+		if len(ra) != shape[0]:
 			raise ValueError(f"Row attribute '{name}' is not the same length ({ra.shape[0]}) as number of rows in main matrix ({shape[0]})")
 	for name, ca in col_attrs.items():
-		if ca.shape[0] != shape[1]:
+		if len(ca) != shape[1]:
 			raise ValueError(f"Column attribute '{name}' is not the same length ({ca.shape[0]}) as number of columns in main matrix ({shape[1]})")
 
 	try:
 		with new(filename, file_attrs=file_attrs) as ds:
+			ds.layer[""] = layers[""]
 			for key, vals in layers.items():
+				if key == "":
+					continue
 				ds.layer[key] = vals
 
 			for key, vals in row_attrs.items():
@@ -1065,7 +1068,6 @@ def create(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loomp
 				ds.ca[key] = vals
 
 	except ValueError as ve:
-		#ds.close(suppress_warning=True) # ds does not exist here
 		if os.path.exists(filename):
 			os.remove(filename)
 		raise ve
@@ -1094,16 +1096,16 @@ def create_from_cellranger(indir: str, outdir: str = None, genome: str = None) -
 		if genome is None:
 			genome = [f for f in os.listdir(matrix_folder) if not f.startswith(".")][0]
 		matrix_folder = os.path.join(matrix_folder, genome)
-		matrix = mmread(os.path.join(matrix_folder, "matrix.mtx")).astype("float32").todense()
+		matrix = mmread(os.path.join(matrix_folder, "matrix.mtx")).todense()
 		genelines = open(os.path.join(matrix_folder, "genes.tsv"), "r").readlines()
 		bclines = open(os.path.join(matrix_folder, "barcodes.tsv"), "r").readlines()
-	else: # cellranger V3 file locations
+	else:  # cellranger V3 file locations
 		if genome is None:
-			genome = "" # Genome is not visible from V3 folder
+			genome = ""  # Genome is not visible from V3 folder
 		matrix_folder = os.path.join(indir, 'outs', 'filtered_feature_bc_matrix')
-		matrix = mmread(os.path.join(matrix_folder, "matrix.mtx.gz")).astype("float32").todense()
-		genelines = [ l.decode() for l in gzip.open(os.path.join(matrix_folder, "features.tsv.gz"), "r").readlines() ]
-		bclines = [ l.decode() for l in gzip.open(os.path.join(matrix_folder, "barcodes.tsv.gz"), "r").readlines() ]
+		matrix = mmread(os.path.join(matrix_folder, "matrix.mtx.gz")).todense()
+		genelines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "features.tsv.gz"), "r").readlines()]
+		bclines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "barcodes.tsv.gz"), "r").readlines()]
 
 	accession = np.array([x.split("\t")[0] for x in genelines]).astype("str")
 	gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str")
@@ -1131,6 +1133,85 @@ def create_from_cellranger(indir: str, outdir: str = None, genome: str = None) -
 	return path
 
 
+def create_from_matrix_market(out_file: str, sample_id: str, layer_paths: Dict[str, str], row_metadata_path: str, column_metadata_path: str, delim: str = "\t", skip_row_headers: bool = False, skip_colums_headers: bool = False, file_attrs: Dict[str, str] = None, matrix_transposed: bool = False) -> None:
+	"""
+	Create a .loom file from .mtx matrix market format
+
+	Args:
+		out_file:				path to the newly created .loom file (will be overwritten if it exists)
+		sample_id:				string to use as prefix for cell IDs
+		layer_paths:			dict mapping layer names to paths to the corresponding matrix file (usually with .mtx extension)
+		row_metadata_path:		path to the row (usually genes) metadata file
+		column_metadata_path:	path to the column (usually cells) metadata file
+		delim:					delimiter used for metadata (default: "\t")
+		skip_row_headers:		if true, skip first line in rows metadata file
+		skip_column_headers: 	if true, skip first line in columns metadata file
+		file_attrs:				dict of global file attributes, or None
+		matrix_transposed:		if true, the main matrix is transposed
+	
+	Remarks:
+		layer_paths should typically map the empty string to a matrix market file: {"": "path/to/filename.mtx"}.
+		To create a multilayer loom file, map multiple named layers {"": "path/to/layer1.mtx", "layer2": "path/to/layer2.mtx"}
+		Note: the created file MUST have a main layer named "". If no such layer is given, BUT all given layers are the same
+		datatype, then a main layer will be created as the sum of the other layers. For example, {"spliced": "spliced.mtx", "unspliced": "unspliced.mtx"}
+		will create three layers, "", "spliced", and "unspliced", where "" is the sum of the other two.
+	"""
+	layers: Dict[str, Union[np.ndarray, scipy.sparse.coo_matrix]] = {}
+
+	for name, path in layer_paths.items():
+		matrix = mmread(path)
+		if matrix_transposed:
+			matrix = matrix.T
+		layers[name] = matrix
+	if "" not in layers:
+		main_matrix = None
+		for name, matrix in layers.items():
+			if main_matrix is None:
+				main_matrix = matrix.copy()
+			else:
+				main_matrix = main_matrix + matrix
+		layers[""] = main_matrix
+
+	genelines = open(row_metadata_path, "r").readlines()
+	bclines = open(column_metadata_path, "r").readlines()
+
+	accession = np.array([x.split("\t")[0] for x in genelines]).astype("str")
+	if(len(genelines[0].split("\t")) > 1):
+		gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str")
+		row_attrs = {"Accession": accession, "Gene": gene}
+	else:
+		row_attrs = {"Accession": accession}
+
+	cellids = np.array([sample_id + ":" + x.strip() for x in bclines]).astype("str")
+	col_attrs = {"CellID": cellids}
+
+	create(out_file, layers[""], row_attrs, col_attrs, file_attrs=file_attrs)
+
+	if len(layers) > 1:
+		with loompy.connect(out_file) as ds:
+			for name, layer in layers.items():
+				if name == "":
+					continue
+				ds[name] = layer
+
+
+def create_from_kallistobus(out_file: str, in_dir: str, tr2g_file: str, whitelist_file: str, file_attrs: Dict[str, str] = None, layers: Dict[str, str] = None):
+	"""
+	Create a loom file from a kallisto-bus output folder.
+
+	Args:
+		out_file				Full path to the loom file to be created
+		in_dir					Full path to the kallisto-bus directory (containing output.bus, matrix.ec and transcripts.txt)
+		whitelist_file			Full path to the barcode whitelist file (e.g. 10xv2_whitelist.txt)
+		file_attrs				Optional dictionary of global attributes
+		layers					Dict of {layer_name: capture_file_path} to define extra layers
+	"""
+
+	pass
+
+	# def import(file: str, key: str)
+	# def demote()
+		
 def combine(files: List[str], output_file: str, key: str = None, file_attrs: Dict[str, str] = None, batch_size: int = 1000, convert_attrs: bool = False) -> None:
 	"""
 	Combine two or more loom files and save as a new loom file
diff --git a/loompy/normalize.py b/loompy/normalize.py
index ed4a938..4b9c0cc 100644
--- a/loompy/normalize.py
+++ b/loompy/normalize.py
@@ -47,7 +47,7 @@ def normalize_attr_array(a: Any) -> np.ndarray:
 		raise ValueError("Argument must be a list, tuple, numpy matrix, numpy ndarray or sparse matrix.")
 
 
-def normalize_attr_values(a: Any) -> np.ndarray:
+def normalize_attr_values(a: Any, use_object_strings: bool = False) -> np.ndarray:
 	"""
 	Take all kinds of input values and validate/normalize them.
 	
@@ -70,7 +70,10 @@ def normalize_attr_values(a: Any) -> np.ndarray:
 	if np.issubdtype(arr.dtype, np.integer) or np.issubdtype(arr.dtype, np.floating):
 		pass  # We allow all these types
 	elif np.issubdtype(arr.dtype, np.character) or np.issubdtype(arr.dtype, np.object_):
-		arr = normalize_attr_strings(arr)
+		if use_object_strings:
+			arr = np.array([str(elm) for elm in a], dtype=object)
+		else:
+			arr = normalize_attr_strings(arr)
 	elif np.issubdtype(arr.dtype, np.bool_):
 		arr = arr.astype('ubyte')
 	if scalar:
diff --git a/loompy/utils.py b/loompy/utils.py
index 2f31ed3..9d3df9f 100644
--- a/loompy/utils.py
+++ b/loompy/utils.py
@@ -1,7 +1,8 @@
 import logging
-from typing import *
 from inspect import currentframe, getouterframes
 import datetime
+from h5py import File as HDF5File
+from .normalize import materialize_attr_values
 
 
 def deprecated(message: str) -> None:
@@ -12,3 +13,21 @@ def deprecated(message: str) -> None:
 
 def timestamp() -> str:
 	return datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S.%fZ")
+
+
+def get_loom_spec_version(f: HDF5File) -> str:
+	if "attrs" in f and "LOOM_SPEC_VERSION" in f["/attrs"]:
+		return materialize_attr_values(f["/attrs"]["LOOM_SPEC_VERSION"][()])
+	if "LOOM_SPEC_VERSION" in f.attrs:
+		return materialize_attr_values(f.attrs["LOOM_SPEC_VERSION"])
+	return "0.0.0"
+
+
+def compare_loom_spec_version(f: HDF5File, v: str) -> int:
+	vf = int("".join(get_loom_spec_version(f).split(".")))
+	vc = int("".join(v.split(".")))
+	if vf == vc:
+		return 0
+	if vf > vc:
+		return 1
+	return -1
diff --git a/setup.cfg b/setup.cfg
index 09a2b36..1be5f4c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,13 +1,9 @@
 [pycodestyle]
-ignore = W191, W293, E501, E128
+ignore = W191, W293, W0312, W0401, E501, E701, E117
 max-line-length = 160
-
 [pep8]
-ignore = W191, W293, E501, E128
+ignore = W191, W293, W0312, W0401, E501, E701, E117
 max-line-length = 160
-
-[egg_info]
-tag_build = 
-tag_date = 0
-tag_svn_revision = 0
-
+[flake8]
+ignore = W191, W293, W0312, W0401, E501, E701, E117, E251
+max-line-length = 160
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 6150ad5..076d87e 100644
--- a/setup.py
+++ b/setup.py
@@ -2,11 +2,7 @@
 
 # First update the version in loompy/_version.py, then:
 
-# cd ~/code  (the directory where loompy resides)
-# cp -R loompy loompy-3.5
-# cd loompy-3.5/loompy
-# for f in *.py; do py-backwards -i $f -o $f -t 3.5; done
-# cd ..
+# cd ~/code/loompy  (the directory where loompy resides)
 # rm -r dist   (otherwise twine will upload the oldest build!)
 # python setup.py sdist
 # twine upload dist/*
@@ -22,8 +18,11 @@
 	version=__version__,
 	packages=find_packages(),
 	python_requires='>=3.6',
-	install_requires=['h5py', 'numpy', 'scipy', 'setuptools', 'pandas'],
-	extras_require=dict(colors=['matplotlib']),
+	install_requires=['h5py', 'numpy', 'scipy', 'setuptools', 'numba', 'click'],
+	entry_points='''
+		[console_scripts]
+		loompy=loompy.commands:cli
+	''',
 	# metadata for upload to PyPI
 	author="Linnarsson Lab",
 	author_email="sten.linnarsson@ki.se",
diff --git a/test.loom b/test.loom
new file mode 100644
index 0000000..20d4eb6
Binary files /dev/null and b/test.loom differ
diff --git a/tests/debug.py b/tests/debug.py
new file mode 100644
index 0000000..3b2572c
--- /dev/null
+++ b/tests/debug.py
@@ -0,0 +1,29 @@
+import sys
+import logging
+import loompy
+
+# Create logger
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+# Create STDERR handler
+handler = logging.StreamHandler(sys.stderr)
+# ch.setLevel(logging.DEBUG)
+
+# Create formatter and add it to the handler
+formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
+handler.setFormatter(formatter)
+
+# Set STDERR handler as the only handler 
+logger.handlers = [handler]
+
+d = "/Users/stelin/kallisto_GRCh38/"
+
+fastqs = [
+	d + "HLFGJBCXY/10X_17_029_S2_L002_R1_001.fastq.gz",
+	d + "HLFGJBCXY/10X_17_029_S2_L002_R2_001.fastq.gz"
+#	d + "HL73JBCXY/10X_17_029_S2_L002_R1_001.fastq.gz",
+#	d + "HL73JBCXY/10X_17_029_S2_L002_R2_001.fastq.gz"
+]
+#loompy.create_from_fastq(d + "10X_17_029.loom", "10X_17_029", fastqs, d + "human_GRCh38_gencode.v31", d + "samples.tab", n_threads=6)
+loompy.create_from_fastq(d + "10X_17_029.loom", "10X_17_029", fastqs, d + "human_GRCh38_gencode.v31", "/Users/stelin/sqlite3_chromium.db", n_threads=6)
diff --git a/tests/test_file_attribute_manager.py b/tests/test_file_attribute_manager.py
index fd36206..b5e5fd8 100644
--- a/tests/test_file_attribute_manager.py
+++ b/tests/test_file_attribute_manager.py
@@ -5,10 +5,10 @@
 import h5py
 import numpy as np
 
-from loompy import FileAttributeManager
+from loompy import GlobalAttributeManager
 
 
-class FileAttributeManagerTests(TestCase):
+class GlobalAttributeManagerTests(TestCase):
 	VALUE_IN_FILE = np.arange(3)
 
 	def setUp(self):
@@ -23,7 +23,7 @@ def tearDown(self):
 		os.remove(self.filename)
 
 	def test_get(self):
-		m = FileAttributeManager(self.file)
+		m = GlobalAttributeManager(self.file)
 		default = np.arange(2, 5)
 
 		val = m.get("arr")