Add minSize option which moves small languages into other result (#35)

Nixinova · web-flow · commit 6798cfe2b2a7 · 2025-02-08T17:26:17.000+13:00
An implementation of a `minSize` option (proposed in #32). This allows filtering the output to only show languages with a resulting size greater than what the user inputs. This is CLI-only and doesn't affect the outputted schema.
diff --git a/src/cli.ts b/src/cli.ts
@@ -22,6 +22,7 @@ program
 	.option('-j|--json [bool]', 'Display the output as JSON', false)
 	.option('-t|--tree <traversal>', 'Which part of the output JSON to display (dot-delimited)')
 	.option('-F|--listFiles [bool]', 'Whether to list every matching file under the language results', false)
+	.option('-m|--minSize <size>', 'Minimum file size to show language results for (must have a unit: b, kb, mb, %)')
 	.option('-q|--quick [bool]', 'Skip complex language analysis (alias for -{A|I|H|S}=false)', false)
 	.option('-o|--offline [bool]', 'Use packaged data files instead of fetching latest from GitHub', false)
 	.option('-L|--calculateLines [bool]', 'Calculate lines of code totals', true)
@@ -61,12 +62,41 @@ if (args.analyze) (async () => {
 	const { files, languages, unknown } = data;
 	// Print output
 	if (!args.json) {
-		const sortedEntries = Object.entries(languages.results).sort((a, b) => a[1].bytes < b[1].bytes ? +1 : -1);
+		// Ignore languages with a bytes/% size less than the declared min size
+		if (args.minSize) {
+			const totalSize = languages.bytes;
+			const minSizeAmt = parseFloat(args.minSize.replace(/[a-z]+$/i, '')); // '2KB' -> 2
+			const minSizeUnit = args.minSize.replace(/^\d+/, '').toLowerCase(); // '2KB' -> 'kb'
+			const conversionFactors: Record<string, (n: number) => number> = {
+				'b': n => n,
+				'kb': n => n * 1e3,
+				'mb': n => n * 1e6,
+				'%': n => n * totalSize / 100,
+			};
+			const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt);
+			const other = { bytes: 0, lines: { total: 0, content: 0, code: 0 } };
+			// Apply specified minimums: delete language results that do not reach the threshold
+			for (const [lang, data] of Object.entries(languages.results)) {
+				if (data.bytes < minBytesSize) {
+					// Add to 'other' count
+					other.bytes += data.bytes;
+					other.lines.total += data.lines.total;
+					other.lines.content += data.lines.content;
+					other.lines.code += data.lines.code;
+					// Remove language result
+					delete languages.results[lang];
+				}
+			}
+			languages.results['Other'] = { ...other, type: null! };
+		}
+
+		const sortedEntries = Object.entries(languages.results).sort((a, b) => (a[1].bytes < b[1].bytes ? +1 : -1));
 		const totalBytes = languages.bytes;
 		console.log(`\n Analysed ${files.bytes.toLocaleString()} B from ${files.count} files with linguist-js`);
 		console.log(`\n Language analysis results: \n`);
 		let count = 0;
 		if (sortedEntries.length === 0) console.log(`  None`);
+
 		// Collate files per language
 		const filesPerLanguage: Record<string, string[]> = {};
 		if (args.listFiles) {