-
Notifications
You must be signed in to change notification settings - Fork 0
/
kvstore.cpp
283 lines (243 loc) · 8.37 KB
/
kvstore.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#include "kvstore.h"
KVStore::KVStore(const string &_storagePath)
: KVStoreAPI(_storagePath),
storagePath(_storagePath),
timeStamp(1),
fileNums(0) {
ios_base::sync_with_stdio(false); // turn off sync to accelerate
// build cache
string dir = storagePath + "/level-0";
int level = 0;
while (utils::dirExists(dir)) {
vector<string> files;
utils::scanDir(dir, files);
for (const auto &file : files) {
auto fileName = dir + "/" + file;
auto fileCache = new SSTableCache(fileName);
cache.emplace(fileName, fileCache);
// check time stamp
if (fileCache->getHeader().timeStamp > timeStamp)
timeStamp = fileCache->getHeader().timeStamp + 1;
// check file numbers to avoid duplicate file names
fileNums = max(stoi(file) + 1, fileNums);
}
dir = storagePath + "/level-" + to_string(++level);
}
}
void KVStore::put(uint64_t key, const string &value) {
// if overflow, convert memTable to a SSTable and do compaction
if (overflow(memTable.getLength() + 1,
memTable.getValueSize() + value.size())) {
if (!utils::dirExists(storagePath + "/level-0"))
utils::mkdir((storagePath + "/level-0").c_str());
auto fileName = storagePath + "/level-0/" + to_string(fileNums++) + ".sst";
SSTable::toSSTable(memTable, fileName, timeStamp);
cache.emplace(fileName, new SSTableCache(memTable, timeStamp, fileName));
timeStamp++;
memTable.reset();
compaction(0);
}
memTable.put(key, value);
}
string KVStore::get(uint64_t key) {
auto value = memTable.get(key);
if (value == "~DELETED~") return "";
if (!value.empty()) return value;
auto i = cache.cbegin();
int level = 0;
while (i != cache.cend()) {
string ans;
uint64_t maxTime = 0;
while (i != cache.cend() && getLevel(i->first) == level) {
value = i->second->get(key);
if (!value.empty() && i->second->getHeader().timeStamp >= maxTime) {
ans = value;
maxTime = i->second->getHeader().timeStamp;
}
i++;
}
if (ans == "~DELETED~") return "";
if (!ans.empty()) return ans;
level++;
}
return "";
}
bool KVStore::del(uint64_t key) {
auto value = get(key);
if (value.empty()) return false;
memTable.remove(key);
memTable.put(key, "~DELETED~");
return true;
}
void KVStore::reset() {
timeStamp = 0;
fileNums = 0;
memTable.reset(); // reset memTable
// clean all SSTables
string dir = storagePath + "/level-0";
int level = 0;
while (utils::dirExists(dir)) {
vector<string> files;
utils::scanDir(dir, files);
for (const auto &file : files) utils::rmfile((dir + "/" + file).c_str());
utils::rmdir(dir.c_str());
dir = storagePath + "/level-" + to_string(++level);
files.clear();
}
// clear all cache
for (auto &c : cache) delete c.second;
cache.clear();
}
bool KVStore::overflow(unsigned long length, unsigned long valueSize) {
auto indexSize = (length + 1) * 12;
return 32 + 10240 + indexSize + valueSize >= 2 * 1024 * 1024;
}
void KVStore::compaction(int level) {
string dir = storagePath + "/level-" + to_string(level);
if (!utils::dirExists(dir)) return;
vector<string> dirFiles;
int maxFileNum = pow2(level + 1), fileNum = utils::scanDir(dir, dirFiles);
if (fileNum <= maxFileNum) return;
for (string &name : dirFiles) name.insert(0, dir + '/');
// get the files which need compaction
vector<string> compactionFiles;
if (level == 0) {
compactionFiles = dirFiles;
} else {
// find the files with smallest timeStamp
using nameHeader = pair<string, SSTableHeader>;
auto cmp = [](const nameHeader &left, const nameHeader &right) {
const auto &lHeader = left.second;
const auto &rHeader = right.second;
return (lHeader.timeStamp > rHeader.timeStamp ||
(lHeader.timeStamp == rHeader.timeStamp &&
lHeader.minKey > rHeader.minKey));
};
priority_queue<nameHeader, vector<nameHeader>, decltype(cmp)> q(cmp);
int k = fileNum - maxFileNum;
for (const auto &file : dirFiles)
q.emplace(file, cache.at(file)->getHeader());
while (k--) {
auto top = q.top();
q.pop();
compactionFiles.push_back(top.first);
}
}
// prepare minKey, maxKey, maxTimeStamp for overlap detection
uint64_t minKey = UINT64_MAX, maxKey = 0, maxTimeStamp = 0;
for (const auto &file : compactionFiles) {
const auto &header = cache.at(file)->getHeader();
maxKey = max(header.maxKey, maxKey);
minKey = min(header.minKey, minKey);
maxTimeStamp = max(header.timeStamp, maxTimeStamp);
}
// get SSTables in next level whose keys intersects with compaction Files
string nextDir = storagePath + "/level-" + to_string(level + 1);
if (!utils::dirExists(nextDir)) utils::mkdir(nextDir.c_str());
dirFiles.clear();
utils::scanDir(nextDir, dirFiles);
for (string &name : dirFiles) name.insert(0, nextDir + '/');
for (const auto &file : dirFiles) {
const auto &header = cache.at(file)->getHeader();
if ((minKey <= header.minKey && header.minKey <= maxKey) ||
(minKey <= header.maxKey && header.maxKey <= maxKey)) {
compactionFiles.push_back(file);
maxTimeStamp = max(maxTimeStamp, header.timeStamp);
}
}
const auto k = compactionFiles.size();
unsigned long pairNum = 0;
// check if it is the last level
bool lastLevel = false;
if (!utils::dirExists(storagePath + "/level-" + to_string(level + 2)))
lastLevel = true;
// read dictionaries from dics
vector<SSTableDic> dics(compactionFiles.size());
for (uint64_t i = 0; i < k; i++) {
// read dic
SSTable::readDic(compactionFiles[i], dics[i]);
utils::rmfile(compactionFiles[i].c_str());
pairNum += dics[i].size();
}
// merge the dics, convert to SSTables
vector<uint64_t> indices(k);
SSTableDic mergedDic;
uint64_t lastTimeStamp = 0;
while (pairNum--) {
// find the next pair to merge
minKey = UINT64_MAX;
auto minDic = 0;
for (uint64_t i = 0; i < k; i++) {
if (indices[i] < dics[i].size() &&
dics[i].at(indices[i]).first < minKey) {
minKey = dics[i].at(indices[i]).first;
minDic = i;
}
}
const auto &pair = dics[minDic].at(indices[minDic]++);
const auto &header = cache.at(compactionFiles[minDic])->getHeader();
// duplicate keys, select the one with the largest timeStamp
if (!mergedDic.empty() && pair.first == mergedDic.back().first) {
if (header.timeStamp < lastTimeStamp)
continue;
else
mergedDic.pop_back();
}
mergedDic.emplace_back(pair.first, pair.second);
lastTimeStamp = header.timeStamp;
}
// split to SSTable
SSTableDic t; // TODO: can be optimized out
uint64_t valueSize = 0, length = 0;
for (const auto &pair : mergedDic) {
// last level should not contain deleted value
if (lastLevel && pair.second == "~DELETED~") continue;
if (overflow(length + 1, valueSize + pair.second.size())) {
auto fileName = nextDir + "/" + to_string(fileNums++) + ".sst";
SSTable::toSSTable(t, fileName, maxTimeStamp);
cache.emplace(fileName, new SSTableCache(t, maxTimeStamp, fileName));
valueSize = 0;
length = 0;
t.clear();
}
length++;
valueSize += pair.second.size();
t.push_back(pair);
}
// delete cache
for (uint64_t i = 0; i < k; i++) {
// delete cache
assert(cache.count(compactionFiles[i]));
auto c = cache.at(compactionFiles[i]);
delete c;
cache.erase(compactionFiles[i]);
}
// convert the remaining key value pairs to a SSTable
if (!t.empty()) {
auto fileName = nextDir + "/" + to_string(fileNums++) + ".sst";
SSTable::toSSTable(t, fileName, maxTimeStamp);
cache.emplace(fileName, new SSTableCache(t, maxTimeStamp, fileName));
}
// compact next level
compaction(level + 1);
}
int KVStore::pow2(int n) {
int ret = 1;
for (int i = 0; i < n; i++) ret *= 2;
return ret;
}
KVStore::~KVStore() {
if (memTable.getLength() > 0) {
if (!utils::dirExists(storagePath + "/level-0"))
utils::mkdir((storagePath + "/level-0").c_str());
auto fileName = storagePath + "/level-0/" + to_string(fileNums++) + ".sst";
SSTable::toSSTable(memTable, fileName, timeStamp);
cache.emplace(fileName, new SSTableCache(memTable, timeStamp, fileName));
compaction(0);
}
for (auto &c : cache) delete c.second;
}
int KVStore::getLevel(const string &path) {
auto s = path.substr(path.find("level-") + 6);
return stoi(s);
}