Skip to content

Commit

Permalink
Merge pull request #5 from wanhuz/main
Browse files Browse the repository at this point in the history
Main feature changes
  • Loading branch information
wanhuz committed May 17, 2021
2 parents bd2e752 + bd0ccca commit 827c93d
Show file tree
Hide file tree
Showing 21 changed files with 600 additions and 118 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
build/
vs2019/.vs/
.git/
vs2019/x86/
vs2019/x64/

# Prerequisites
*.d
Expand Down
Binary file added res/NotoSansMonoCJKjp-Regular.otf
Binary file not shown.
23 changes: 16 additions & 7 deletions src/capturekey/maincontroller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#include "../settings/config.h"




MainController::MainController() {
DictLoader* dictloader = new DictLoader();
dict = new Dict();
Expand All @@ -23,7 +21,6 @@ MainController::MainController() {
Config::getInstance().setFrame(frame);
dictloader->setDict(dict);
dictloader->start();


connect(capturekeypress, SIGNAL(OCRkeyStateChanged()), this, SLOT(captureOCR()));
connect(capturekeypress, SIGNAL(TextkeyStateChanged()), this, SLOT(captureTextGeneric()));
Expand Down Expand Up @@ -63,23 +60,35 @@ void MainController::startCaptureKeyTextGeneric() {

}

QVector<QStringList> MainController::searchDict(QString searchStr) {
return dict->search(searchStr);
QVector<entry> MainController::searchDict(QString searchStr) {
QVector<entry> searchResult = dict->search(searchStr);
searchResult = dict->sort(searchResult, searchStr);
return searchResult;
}

void MainController::captureOCR() {
frame->setBoxSize();
QPixmap screenshot = frame->shootScreenshot();
QTimer *timer = new QTimer();
timer->start(2500);
frame->show();
frame->activateWindow();
Pix* pix = Util::qPixMap2PIX(&screenshot);
Pix *pix = Util::qPixMap2PIX(&screenshot);
QString text = ocr->recognize(pix);

//Dumb word processing to only remove space get correct result, make proper processing function later
text = text.simplified();
text = text.replace(" ", "");

emit OcrResult(text);

connect(timer, &QTimer::timeout, frame, &Frame::hide);
connect(timer, &QTimer::timeout, timer, &QTimer::deleteLater);
}


/*Capture digital text by sending CTRL+C as input to Windows
It is a hacky solution, but it is the most optimal way to get text from different application types in Windows*/
It is a hacky solution, but it is the most optimal way to get text from different application types in Microsoft Windows*/
void MainController::captureTextGeneric() {
Util::sendKeyInput();
}
Expand Down
4 changes: 2 additions & 2 deletions src/capturekey/maincontroller.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "../capturekey/capturekey.h"
#include "../dict/dict.h"
#include <qclipboard.h>

#include "../data/entry.h"

class MainController : public QObject
{
Expand All @@ -17,7 +17,7 @@ class MainController : public QObject
void startCaptureKeyOCR();
void startCaptureKeyTextGeneric();
void stopCaptureKey();
QVector<QStringList> searchDict(QString searchStr);
QVector<entry> searchDict(QString searchStr);

private:
Frame* frame;
Expand Down
25 changes: 25 additions & 0 deletions src/data/entry.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once
#include <qstring.h>

/*Data type for dictionary entry. Each entry will have a kanji, readings, glossary and how common it is in real-life (frequency)*/
class entry {
private:
QString kanji;
QString reading;
QString gloss;
int freq;

public:
entry(const QString kanji, const QString reading, const QString gloss, const int freq) {
this->kanji = kanji;
this->reading = reading;
this->gloss = gloss;
this->freq = freq;
}

QString getKanji() { return this->kanji; }
QString getReading() { return this->reading; }
QString getGloss() { return this->gloss; }
int getFreq() { return this->freq; }
void setFreq(int freq) { if (freq < 101 && freq > -1) { this->freq = freq; } }
};
192 changes: 142 additions & 50 deletions src/dict/dict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,88 +5,180 @@
#include <qstring.h>
#include <QBuffer>
#include <qeventloop.h>
#include <qdir.h>
#include <algorithm>
#include "../data/entry.h"



Dict::Dict() {
QStringList meaning_list;
QStringList kanji_list;
QStringList reading_list;
dictlist.append(kanji_list);
dictlist.append(reading_list);
dictlist.append(meaning_list);
// Hard mapping of word frequency line in Japanese Dictionary file to number
QString line;

/*Insert word frequency nf01 to nf48. lower is more common*/
for (int i = 1; i < 49; i++) {

if (i < 10) {
line = "<ke_pri>nf";
line = line + "0" + QString::number(i);
line = line + "</ke_pri>";
freqMap.insert(line, i);
}
else {
line = "<ke_pri>nf";
line = line + QString::number(i);
line = line + "</ke_pri>";
freqMap.insert(line, i);
}

}

freqMap.insert("<re_pri>news1</re_pri>", 49);
freqMap.insert("<re_pri>news2</re_pri>", 50);
freqMap.insert("<re_pri>ichi1</re_pri>", 51);
freqMap.insert("<re_pri>ichi2</re_pri>", 52);
freqMap.insert("<re_pri>spec1</re_pri>", 53);
freqMap.insert("<re_pri>spec2</re_pri>", 54);
freqMap.insert("<re_pri>gai1</re_pri>", 55);
freqMap.insert("<re_pri>gai2</re_pri>", 56);
freqMap.insert("<ke_pri>news1</ke_pri>", 57);
freqMap.insert("<ke_pri>news2</ke_pri>", 58);
freqMap.insert("<ke_pri>ichi1</ke_pri>", 59);
freqMap.insert("<ke_pri>ichi2</ke_pri>", 60);
freqMap.insert("<ke_pri>spec1</ke_pri>", 61);
freqMap.insert("<ke_pri>spec2</ke_pri>", 62);
freqMap.insert("<ke_pri>gai1</ke_pri>", 63);
freqMap.insert("<ke_pri>gai2</ke_pri>", 64);
}

/*Load file into QList*/
void Dict::load() {
QFile dictFile("../res/JMdict_e");
QString dictPath = QDir::currentPath();
//dictPath = dictPath + "/res/JMdict_e"; //Deployment
dictPath = "C://Users//WanHuz//Documents//Shanachan//res//JMdict_e"; //Debug Mode
QFile dictFile(dictPath);
if (!dictFile.open(QIODevice::ReadOnly | QIODevice::Text))
{
qDebug() << "Could not open dictionary file " + dictPath;
return;
}

QByteArray data = dictFile.readAll();
this->parse(&data);

dictFile.close();
}

void Dict::parse(QByteArray* data) {
QBuffer buffer(data);
buffer.open(QIODevice::ReadOnly);
QTextStream out(&buffer);
out.setCodec("UTF-8");
int rowCount = 0;

QString meanings = "";
QString kanji;
QString readings = "";
QString line;


//This code below takes ~40 seconds without thread
const int MAX_PRIORITY = 100;
const QString CLRTXT = "";
QVector<QString> kanji;
kanji.append(CLRTXT);
QString meanings = CLRTXT;
QString readings = CLRTXT;
QString line = CLRTXT;
int wordFreq = MAX_PRIORITY;

//This code below takes ~15s using different threads; ~40s without thread
while (!out.atEnd()) {
line = out.readLine();

if (line.contains("<keb>")) {
line.remove(QRegExp("<...>|<\/...>"));
kanji = line;

if (line.startsWith("<keb>")) {
line.remove(0,5);
line.remove("</keb>");
kanji.append(line);
}
else if (line.contains("<gloss>")) {
line.remove(QRegExp("<.....>"));
line.replace("</gloss>", "; ");
else if (line.startsWith("<gloss>")) {
line.remove(0, 7);
line.remove("</gloss>");
line = line + " / ";
meanings = meanings + line;
}
else if (line.contains("<reb>")) {
line.remove(QRegExp("<...>|<\/...>"));
else if (line.startsWith("<reb>")) {
line.remove(0, 5);
line.remove("</reb>");
readings = readings + line + "\n";
}
else if (line.contains("</entry>")) {
//Memory allocation here is 200MB~ give or take
dictlist[1].append(readings);
dictlist[0].append(kanji);
dictlist[2].append(meanings);
rowCount++;
meanings = "";
kanji = "";
readings = "";
else if ( (line.contains("<re_pri>")) || (line.contains("<ke_pri>")) ) {
int tempFreq = freqMap.value(line);

//If line is not in Map, skip
if (!tempFreq) { continue; }

if (tempFreq < wordFreq) {
wordFreq = tempFreq;

}


}
else if (line.startsWith("</ent")) {
//Memory allocation here is 130MB~ give or take
readings = readings.trimmed();
meanings.chop(2);

if (kanji.size() > 1) { kanji.removeFirst(); }

for (int i = 0; i < kanji.size(); i++) {
entry tempEntry = entry(kanji.at(i), readings, meanings, wordFreq);
dictlist.append(tempEntry);
}

meanings = CLRTXT;
kanji.clear();
kanji.append(CLRTXT);
readings = CLRTXT;
wordFreq = MAX_PRIORITY;
}

}

}

QVector<QStringList> Dict::search(QString searchString) {
QVector<QStringList> searchResult;
QStringList search_meaning_list;
QStringList search_kanji_list;
QStringList search_reading_list;
searchResult.append(search_kanji_list);
searchResult.append(search_reading_list);
searchResult.append(search_meaning_list);

for (int i = 0; i < dictlist[0].size(); i++) {
if (dictlist[0][i].contains(searchString)) {
searchResult[0].append(dictlist[0].at(i));
searchResult[1].append(dictlist[1].at(i));
searchResult[2].append(dictlist[2].at(i));
/*Search Kanji or Kana and return result if the entry starts with target Kana or Kanji*/
QVector<entry> Dict::search(QString searchString) {
QVector<entry> searchResult;
QRegExp wordBound("\\b(" + searchString + ")\\b");

/*Make responsiveness slow. Responsiveness bottleneck. Worst case of input of each character k = O(n^k) for alphabet*/
for (int i = 0; i < dictlist.size(); i++) {
if (dictlist[i].getKanji().startsWith(searchString) ||
dictlist[i].getReading().startsWith(searchString) ||
dictlist[i].getGloss().contains(wordBound)){
entry tempEntry = entry(
dictlist[i].getKanji(),
dictlist[i].getReading(),
dictlist[i].getGloss(),
dictlist[i].getFreq()
);
searchResult.append(tempEntry);
}

}

return searchResult;
}
}

/*Sort from highest word frequency to lowest word frequency. If there is an exact match to search string, place it at the highest frequency*/
QVector<entry> Dict::sort(QVector<entry> searchedWord, QString targetString) {
int size = searchedWord.size();

for (int i = 0; i < size; i++) {
if ( (searchedWord[i].getKanji() == targetString) || (searchedWord[i].getReading() == targetString) ) {
searchedWord[i].setFreq(0);
}
}

std::sort(searchedWord.begin(), searchedWord.end(), []
(entry searchedWord, entry searchedWordNext)
{ return searchedWord.getFreq() < searchedWordNext.getFreq();});

return searchedWord;
}



10 changes: 7 additions & 3 deletions src/dict/dict.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
#pragma once
#include <qstring.h>
#include <qvector.h>
#include <qmap.h>
#include "../data/entry.h"

class Dict {

public:
Dict();
void load();
QVector<QStringList> search(QString searchString);
QVector<entry> search(QString searchString);
QVector<entry> sort(QVector<entry> searchedWord, QString targetString);

private:
void parse(QByteArray* line);
QVector<QStringList> dictlist;

QVector<entry> dictlist;
QMap<QString, int> freqMap;
};
1 change: 1 addition & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <QtWidgets/QApplication>
#include "ui/MainWindow.h"
#include "ui/popup.h"


int main(int argc, char *argv[])
Expand Down
Loading

0 comments on commit 827c93d

Please sign in to comment.