Skip to content

Commit daaf868

Browse files
authored
Add alternatives key for listing other possible language matches (#19)
Contains files that are classified using the fallback language and lists what languages it could be. Lists files and a list of matches under files.alternatives. Resolves #15
1 parent 9728955 commit daaf868

File tree

9 files changed

+53
-18
lines changed

9 files changed

+53
-18
lines changed

changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## Next
4+
- Add `alternatives` key to list all possible language matches for files that do not have a definite match.
5+
36
## 2.5.6
47
*2023-06-28*
58
- Changed fetching of data files to fallback to using the packaged files if the fetch request fails ([#21](https://github.com/Nixinova/LinguistJS/issues/21)).

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"scripts": {
1414
"download-files": "npx tsx@3 build/download-files",
1515
"pre-publish": "npm run download-files && npm test && npm run test:perf",
16-
"test:perf": "tsc && node test/perf",
16+
"perf": "tsc && node test/perf",
1717
"test": "tsc && node test/folder && echo --- && node test/unit"
1818
},
1919
"files": [

readme.md

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,29 +38,35 @@ As an example, take the following file structure:
3838
| | index.ts 2kB
3939
| readme.md 3kB
4040
| no-lang 10B
41+
| x.pluginspec 10B
4142
```
4243

4344
Running LinguistJS on this folder will return the following JSON:
4445

4546
```json
4647
{
4748
"files": {
48-
"count": 4,
49-
"bytes": 6010,
49+
"count": 5,
50+
"bytes": 6020,
5051
"results": {
5152
"/src/index.ts": "TypeScript",
5253
"/src/cli.js": "JavaScript",
5354
"/readme.md": "Markdown",
5455
"/no-lang": null,
55-
}
56+
"/x.pluginspec": "Ruby",
57+
},
58+
"alternatives": {
59+
".pluginspec": ["XML"],
60+
},
5661
},
5762
"languages": {
5863
"count": 3,
59-
"bytes": 6000,
64+
"bytes": 6010,
6065
"results": {
61-
"JavaScript": { "type": "programming", "bytes": 1000, "color": "#f1e05a" },
62-
"TypeScript": { "type": "programming", "bytes": 2000, "color": "#2b7489" },
63-
"Markdown": { "type": "prose", "bytes": 3000, "color": "#083fa1" },
66+
"JavaScript": { "type": "programming", "bytes": 1000, "color": "#f1e05a" },
67+
"Markdown": { "type": "prose", "bytes": 3000, "color": "#083fa1" },
68+
"Ruby": { "type": "programming", "bytes": 10, "color": "#701516" },
69+
"TypeScript": { "type": "programming", "bytes": 2000, "color": "#2b7489" },
6470
},
6571
},
6672
"unknown": {

src/index.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
3333
const extensions: Record<T.FilePath, string> = {};
3434
const overrides: Record<T.FilePath, T.LanguageResult> = {};
3535
const results: T.Results = {
36-
files: { count: 0, bytes: 0, results: {} },
36+
files: { count: 0, bytes: 0, results: {}, alternatives: {} },
3737
languages: { count: 0, bytes: 0, results: {} },
3838
unknown: { count: 0, bytes: 0, extensions: {}, filenames: {} },
3939
};
@@ -157,8 +157,11 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
157157
fileAssociations[file] = [];
158158
extensions[file] = '';
159159
}
160-
const parent = !opts.childLanguages && result && langData[result].group || false;
161-
fileAssociations[file].push(parent || result);
160+
// Set parent to result group if it is present
161+
// Is nullish if either `opts.childLanguages` is set or if there is no group
162+
const finalResult = !opts.childLanguages && result && langData[result].group || result;
163+
if (!fileAssociations[file].includes(finalResult))
164+
fileAssociations[file].push(finalResult);
162165
extensions[file] = paths.extname(file).toLowerCase();
163166
};
164167
const overridesArray = Object.entries(overrides);
@@ -279,12 +282,14 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
279282
if (Array.isArray(heuristic.language)) {
280283
heuristic.language = heuristic.language[0];
281284
}
285+
282286
// Make sure the results includes this language
283287
const languageGroup = langData[heuristic.language]?.group;
284288
const matchesLang = fileAssociations[file].includes(heuristic.language);
285289
const matchesParent = languageGroup && fileAssociations[file].includes(languageGroup);
286290
if (!matchesLang && !matchesParent)
287291
continue;
292+
288293
// Normalise heuristic data
289294
const patterns: string[] = [];
290295
const normalise = (contents: string | string[]) => patterns.push(...[contents].flat());
@@ -296,17 +301,29 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
296301
if (data.named_pattern) normalise(heuristicsData.named_patterns[data.named_pattern]);
297302
}
298303
}
304+
299305
// Check file contents and apply heuristic patterns
300306
const fileContent = opts.fileContent?.length ? opts.fileContent[files.indexOf(file)] : await readFile(file).catch(() => null);
307+
// Skip if file read errors
301308
if (fileContent === null) continue;
309+
// Apply heuristics
302310
if (!patterns.length || patterns.some(pattern => pcre(pattern).test(fileContent))) {
303311
results.files.results[file] = heuristic.language;
304312
break;
305313
}
306314
}
307315
}
308316
// If no heuristics, assign a language
309-
results.files.results[file] ??= fileAssociations[file][0];
317+
if (!results.files.results[file]) {
318+
const possibleLangs = fileAssociations[file];
319+
// Assign first language as a default option
320+
const defaultLang = possibleLangs[0];
321+
const alternativeLangs = possibleLangs.slice(1)
322+
results.files.results[file] = defaultLang;
323+
// List alternative languages if there are any
324+
if (alternativeLangs.length > 0)
325+
results.files.alternatives[file] = alternativeLangs;
326+
}
310327
}
311328

312329
// Skip specified categories

src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export interface Results {
2929
bytes: Bytes
3030
/** Note: Results use slashes as delimiters even on Windows. */
3131
results: Record<FilePath, LanguageResult>
32+
alternatives: Record<FilePath, LanguageResult[]>
3233
}
3334
languages: {
3435
count: Integer

test/expected.json

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
11
{
22
"files": {
3-
"count": 8,
4-
"bytes": 47,
3+
"count": 9,
4+
"bytes": 61,
55
"results": {
66
"~/al.al": "Perl",
7+
"~/alternatives.asc": "AGS Script",
78
"~/file.txt": "JavaScript",
89
"~/folder/sub.txt": "Text",
910
"~/hashbang": "JavaScript",
1011
"~/modeline.txt": "C++",
1112
"~/Pipfile": "TOML",
1213
"~/unknown": null
14+
},
15+
"alternatives": {
16+
"~/alternatives.asc": [ "AsciiDoc", "Public Key" ]
1317
}
1418
},
1519
"languages": {
16-
"count": 5,
17-
"bytes": 38,
20+
"count": 6,
21+
"bytes": 52,
1822
"results": {
1923
"Perl": { "type": "programming", "bytes": 0, "color": "#0298c3" },
24+
"AGS Script": { "type": "programming", "bytes": 14, "color": "#B9D9FF" },
2025
"JavaScript": { "type": "programming", "bytes": 23, "color": "#f1e05a" },
2126
"Text": { "type": "prose", "bytes": 0 },
2227
"C++": { "type": "programming", "bytes": 15, "color": "#f34b7d" },

test/perf.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ async function perfTest() {
55
const amount = +process.argv[2] || 75;
66
for (let i = 0; i < amount; i++) {
77
let t1 = +new Date();
8-
await linguist('.');
8+
await linguist('.', { offline: true });
99
let t2 = +new Date();
1010
time += t2 - t1;
1111
}
1212
const unit = 'ms';
1313
const total = time;
1414
const average = total / amount;
15-
const EXPECTED_MAX = 75; // 2.3
15+
const EXPECTED_MAX = 100; // 2.6
1616
console.log('\n<Performance test results>');
1717
console.log('Total:', total, unit, `(n=${amount})`);
1818
console.log('Average:', average, unit);

test/samples/alternatives.asc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Alternatives

test/unit.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ async function test([filename, fileContent = ''], [type, testVal]) {
1313
'files': actual.files.results[filename],
1414
'size': actual.files.bytes,
1515
'count': actual.files.count,
16+
'alternatives_count': Object.entries(actual.files.alternatives).length,
1617
}[type];
1718
const result = testContent === testVal;
1819
i = `${+i + 1}`.padStart(2, '0');
@@ -36,6 +37,7 @@ async function unitTest() {
3637
await test(['x.cpp'], ['files', 'C++']);
3738
await test(['x.c'], ['files', 'C']);
3839
await test(['x.R'], ['files', 'R']);
40+
await test(['.m'], ['alternatives_count', 1])
3941
desc('filenames');
4042
await test(['Dockerfile'], ['files', 'Dockerfile']);
4143
await test(['CMakeLists.txt'], ['files', 'CMake']);

0 commit comments

Comments
 (0)