diff --git a/src/components/processContent.js b/src/components/processContent.js index 7943568..297dd5f 100644 --- a/src/components/processContent.js +++ b/src/components/processContent.js @@ -20,10 +20,6 @@ module.exports = (data, fileInfo) => { const calculations = calculateConfidenceScore(data, fileInfo); - if (data.testFile) { - return calculations; - } - if (fileInfo.confidence.encoding) { fileInfo.confidence.language = calculations; } else { diff --git a/src/components/processing-content/calculateConfidenceScore.js b/src/components/processing-content/calculateConfidenceScore.js index 037e888..51d456d 100644 --- a/src/components/processing-content/calculateConfidenceScore.js +++ b/src/components/processing-content/calculateConfidenceScore.js @@ -6,7 +6,6 @@ module.exports = (data, fileInfo) => { const totalCharacters = data.content.replace(charRegex, "").length; const langArr = data.languageArr; const pos = data.pos; - const testFile = data.testFile; const secondLanguage = langArr.reduce((acc, val) => { if (acc.name === fileInfo.language) return val; @@ -57,24 +56,5 @@ module.exports = (data, fileInfo) => { ); } - // If the test script is running - if (testFile) { - return { - name: testFile.substr(testFile.lastIndexOf("/") + 1), - path: testFile, - encoding: fileInfo.encoding, - language: fileInfo.language, - languageConfidence: confidenceScore, - ratio: Number(languageRatio.toFixed(2)), - count: langArr[pos].count, - totalCharacters: totalCharacters, - characterWordRatio: characterWordRatio.toFixed(6), - secondLanguage: { - name: secondLanguage.name, - count: secondLanguage.count, - }, - }; - } - return confidenceScore; }; diff --git a/src/index-node.js b/src/index-node.js index 47ac8e8..24247c7 100644 --- a/src/index-node.js +++ b/src/index-node.js @@ -3,7 +3,7 @@ const checkUTF = require("./components/checkUTF.js"); const processContent = require("./components/processContent.js"); const checkByteOrderMark = require("./components/checkByteOrderMark.js"); -module.exports = (file, test) => { +module.exports = (file) => { return new Promise((resolve, reject) => { const fileInfo = { encoding: null, @@ -14,7 +14,6 @@ module.exports = (file, test) => { }, }; const data = {}; - data.testFile = test ? file : null; // Reading the first four bytes and checking if they coincide with one of the predefined byte order marks. const readStream = fs.createReadStream(file, { start: 0, end: 3 }); diff --git a/testing/language-encoding.test.js b/testing/language-encoding.test.js index 11d48b0..3863cd1 100644 --- a/testing/language-encoding.test.js +++ b/testing/language-encoding.test.js @@ -12,11 +12,44 @@ const folderPath = "/home/gignu/Documents/Subtitle Database/Language Folders/"; const testFiles = getFiles(folderPath); testFiles.forEach((file) => { - languageEncoding(file, true) + languageEncoding(file) .then((fileInfo) => { - if (fileInfo.ratio <= 0.85) { + // language = language + const testFileArray = file.split("/"); + const expectedLanguage = testFileArray[testFileArray.length - 2] + .toLowerCase() + .replace(" ", "-"); + + if ( + fileInfo.language !== expectedLanguage && + expectedLanguage !== "japanese" + ) + testFailed("language"); + + // confidence >= 0.95 + if ( + fileInfo.confidence.encoding < 0.95 && + expectedLanguage !== "japanese" + ) + testFailed("confidence"); + + function testFailed(issue) { console.log("Test case failed:"); + + switch (issue) { + case "language": + console.log("Expected language:", expectedLanguage); + console.log("Detected language:", fileInfo.language); + case "confidence": + console.log("Confidence score too low!"); + console.log( + "fileInfo.confidence.encoding:", + fileInfo.confidence.encoding + ); + } + console.log(fileInfo); + console.log("file:", file); process.exit(1); } })