-
Notifications
You must be signed in to change notification settings - Fork 0
/
Jaccard.cpp
84 lines (66 loc) · 2.51 KB
/
Jaccard.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <fstream>
#include <iostream>
#include <iomanip>
#include <omp.h>
#include "Jaccard.h"
#include "enron.h"
#include "App.h"
Jaccard::Jaccard() {
match();
}
Jaccard *Jaccard::singleton;
Jaccard *Jaccard::get() {
if (!Jaccard::singleton) Jaccard::singleton = new Jaccard();
return Jaccard::singleton;
}
void Jaccard::match() {
enron& entity = (*enron::get());
set<int> union_set;
pair<int, float> min(0, 0);
array<measure, NB_SCORES> scores{};
int i, j, intersection, inserted_values = 0;
float score, completion;
int nb_measures_done = 1;
cout << fixed << setprecision(2);
#pragma omp parallel for private(i, j, intersection, union_set, score) shared(entity, min, scores, nb_measures_done, completion) reduction(+: inserted_values) num_threads(NB_THREADS)
for (i = 0; i < NB_MAILS; i++) {
for (j = i + 1; j < NB_MAILS; j++) {
const set<int> &setA = (*entity.get_mails())[i];
const set<int> &setB = (*entity.get_mails())[j];
if (setA.size() / THRESHOLD > setB.size() && setB.size() > setA.size() * THRESHOLD ) {
intersection = intersection_of(setA, setB);
if (intersection > 0) {
union_set = setA;
union_set.insert(setB.begin(), setB.end());
score = (float) intersection / union_set.size();
if (inserted_values < NB_SCORES) {
scores[inserted_values++].set(i, j, score);
} else {
if (score > min.second) {
#pragma omp critical
{
scores[min.first].set(i, j, score);
update_min(min, scores);
}
}
}
}
}
if (omp_get_thread_num() == 0) {
completion = 100 * ((float)nb_measures_done++ / NB_MEASURES_PER_THREAD);
cout << completion << "% completed : " << nb_measures_done << " measures done (1st thread)\n" ;
}
}
}
log(scores);
}
void Jaccard::log(array<measure, NB_SCORES> & scores) {
string output;
ofstream output_file(get_enron_path().append("/Jaccard_Measures.txt"));
for(measure m : scores) {
output = ("#" + to_string(m.mail_id_A + 1) + " - #" + to_string(m.mail_id_B + 1) + " "
+ to_string(m.score) + "\n");
output_file << output;
}
output_file.close();
}