-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTextFpMining.h
63 lines (47 loc) · 1.94 KB
/
TextFpMining.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
//
// TextFpMining.h
// cs412_text-fp-mining
//
// Created by Yvette Luo on 10/24/13.
// Copyright (c) 2013 Yvette Luo. All rights reserved.
//
#ifndef __cs412_text_fp_mining__TextFpMining__
#define __cs412_text_fp_mining__TextFpMining__
#include <iostream>
#include <map>
#include <string>
const int MAX = 5;
class TextFpMining {
public:
TextFpMining(std::string& _dataFile,
std::string& _dictFile,
std::string& _titleFile,
std::string& _ldaFile,
int _topicNum):topicNum(_topicNum) {
dataFile = _dataFile;
dictFile = _dictFile;
titleFile = _titleFile;
ldaFile = _ldaFile;
}
void Run();
static std::map<std::string,int> dict;
static std::map<int, std::string> reverseDict; //for later index->term mapping
static int TopicNum;
static int D2d[MAX][MAX];
static int D1d[MAX];
private:
void Preprocess(); //Step 2: Preprocessing
void GenerateDict(std::map<std::string,int>& dict); //Step 2.1 Generate a Dictionary
void Tokenize(std::map<std::string, int>& dict); //Step 2.2 Tokenize Plain Text by Dictionary
void OrganizeTerms(); //Step 3: Partitioning
void MineFrequentPatterns(); //Step 4: Mining Frequent Patterns for Each Topic
//Step 5: Mining Maximal/Closed Patterns
void Rank(); //Step 6: Re-rank by Purity of Patterns
//Step 7: Bonus(improve ranking quality)
std::string dataFile;
std::string dictFile;
std::string titleFile;
std::string ldaFile;
int topicNum;
};
#endif /* defined(__cs412_text_fp_mining__TextFpMining__) */