Skip to content

Commit

Permalink
I simplified the test cases. The test is now making sure that the lan…
Browse files Browse the repository at this point in the history
…guage is detected correctly for each file and instead of checking the language ratio we're now checking the confidence score
  • Loading branch information
gignupg committed Apr 20, 2021
1 parent 380a478 commit ec7d4b8
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 28 deletions.
4 changes: 0 additions & 4 deletions src/components/processContent.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ module.exports = (data, fileInfo) => {

const calculations = calculateConfidenceScore(data, fileInfo);

if (data.testFile) {
return calculations;
}

if (fileInfo.confidence.encoding) {
fileInfo.confidence.language = calculations;
} else {
Expand Down
20 changes: 0 additions & 20 deletions src/components/processing-content/calculateConfidenceScore.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ module.exports = (data, fileInfo) => {
const totalCharacters = data.content.replace(charRegex, "").length;
const langArr = data.languageArr;
const pos = data.pos;
const testFile = data.testFile;

const secondLanguage = langArr.reduce((acc, val) => {
if (acc.name === fileInfo.language) return val;
Expand Down Expand Up @@ -57,24 +56,5 @@ module.exports = (data, fileInfo) => {
);
}

// If the test script is running
if (testFile) {
return {
name: testFile.substr(testFile.lastIndexOf("/") + 1),
path: testFile,
encoding: fileInfo.encoding,
language: fileInfo.language,
languageConfidence: confidenceScore,
ratio: Number(languageRatio.toFixed(2)),
count: langArr[pos].count,
totalCharacters: totalCharacters,
characterWordRatio: characterWordRatio.toFixed(6),
secondLanguage: {
name: secondLanguage.name,
count: secondLanguage.count,
},
};
}

return confidenceScore;
};
3 changes: 1 addition & 2 deletions src/index-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const checkUTF = require("./components/checkUTF.js");
const processContent = require("./components/processContent.js");
const checkByteOrderMark = require("./components/checkByteOrderMark.js");

module.exports = (file, test) => {
module.exports = (file) => {
return new Promise((resolve, reject) => {
const fileInfo = {
encoding: null,
Expand All @@ -14,7 +14,6 @@ module.exports = (file, test) => {
},
};
const data = {};
data.testFile = test ? file : null;

// Reading the first four bytes and checking if they coincide with one of the predefined byte order marks.
const readStream = fs.createReadStream(file, { start: 0, end: 3 });
Expand Down
37 changes: 35 additions & 2 deletions testing/language-encoding.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,44 @@ const folderPath = "/home/gignu/Documents/Subtitle Database/Language Folders/";
const testFiles = getFiles(folderPath);

testFiles.forEach((file) => {
languageEncoding(file, true)
languageEncoding(file)
.then((fileInfo) => {
if (fileInfo.ratio <= 0.85) {
// language = language
const testFileArray = file.split("/");
const expectedLanguage = testFileArray[testFileArray.length - 2]
.toLowerCase()
.replace(" ", "-");

if (
fileInfo.language !== expectedLanguage &&
expectedLanguage !== "japanese"
)
testFailed("language");

// confidence >= 0.95
if (
fileInfo.confidence.encoding < 0.95 &&
expectedLanguage !== "japanese"
)
testFailed("confidence");

function testFailed(issue) {
console.log("Test case failed:");

switch (issue) {
case "language":
console.log("Expected language:", expectedLanguage);
console.log("Detected language:", fileInfo.language);
case "confidence":
console.log("Confidence score too low!");
console.log(
"fileInfo.confidence.encoding:",
fileInfo.confidence.encoding
);
}

console.log(fileInfo);
console.log("file:", file);
process.exit(1);
}
})
Expand Down

0 comments on commit ec7d4b8

Please sign in to comment.