Skip to content

Commit 6f0bbf4

Browse files
Merge pull request #147 from OpenTriply/add-offset-hdtSearch
Add offset flag in hdtSearch
2 parents 4dcf52b + a4378be commit 6f0bbf4

File tree

5 files changed

+47
-8
lines changed

5 files changed

+47
-8
lines changed

libhdt/include/Iterator.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ class IteratorTripleString {
255255
}
256256
virtual void goToStart() {
257257
}
258+
virtual bool canGoTo(){
259+
return false;
260+
}
258261
virtual size_t estimatedNumResults() {
259262
return 0;
260263
}

libhdt/src/hdt/TripleIDStringIterator.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ TripleString *TripleIDStringIterator::previous() {
4141
void TripleIDStringIterator::goToStart() {
4242
iterator->goToStart();
4343
}
44-
44+
bool TripleIDStringIterator::canGoTo() {
45+
return iterator->canGoTo();
46+
}
4547
size_t TripleIDStringIterator::estimatedNumResults() {
4648
return iterator->estimatedNumResults();
4749
}

libhdt/src/hdt/TripleIDStringIterator.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class TripleIDStringIterator : public IteratorTripleString {
2626
TripleString *next();
2727
bool hasPrevious();
2828
TripleString *previous();
29+
bool canGoTo();
2930
void goToStart();
3031
size_t estimatedNumResults();
3132
ResultEstimationType numResultEstimation();

libhdt/src/triples/BitmapTriplesIterators.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,18 +455,23 @@ void MiddleWaveletIterator::skip(unsigned int pos) {
455455
//goTo(predicateOcurrence+pos);
456456

457457
int numJumps = 0;
458+
unsigned int posLeft = pos;
458459
while ((numJumps<pos)&&(posZ<maxZ)){
459-
if((posZ+pos)>nextZ) {
460+
if((posZ+posLeft)>nextZ) {
460461
numJumps += (nextZ-posZ)+1; // count current jump
461462
predicateOcurrence++; // jump to the next occurrence
463+
posLeft = pos-numJumps; // set remaining offset
462464
if (predicateOcurrence<=numOcurrences){
463465
posY = predicateIndex->getAppearance(patY, predicateOcurrence);
464466
posZ = prevZ = adjZ.find(posY);
465467
nextZ = adjZ.last(posY);
466468
}
469+
else {
470+
throw std::runtime_error("Cannot goTo on this pattern.");
471+
}
467472

468473
} else {
469-
posZ=(posZ+pos);
474+
posZ=(posZ+posLeft);
470475
numJumps=pos;
471476
}
472477
}

libhdt/tools/hdtSearch.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <getopt.h>
3939
#include <string.h>
4040
#include <string>
41+
#include <cstdint>
4142
#include <iostream>
4243
#include <fstream>
4344
#include "../src/util/StopWatch.hpp"
@@ -57,12 +58,13 @@ void help() {
5758
cout << "\t-h\t\t\tThis help" << endl;
5859
cout << "\t-q\t<query>\t\tLaunch query and exit." << endl;
5960
cout << "\t-o\t<output>\tSave query output to file." << endl;
61+
cout << "\t-f\t<offset>\tLimit the result list starting after the offset." << endl;
6062
cout << "\t-m\t\t\tDo not show results, just measure query time." << endl;
61-
cout << "\t-V\tPrints the HDT version number." << endl;
63+
cout << "\t-V\t\t\tPrints the HDT version number." << endl;
6264
//cout << "\t-v\tVerbose output" << endl;
6365
}
6466

65-
void iterate(HDT *hdt, char *query, ostream &out, bool measure) {
67+
void iterate(HDT *hdt, char *query, ostream &out, bool measure, uint32_t offset) {
6668
TripleString tripleString;
6769
tripleString.read(query);
6870

@@ -89,6 +91,26 @@ void iterate(HDT *hdt, char *query, ostream &out, bool measure) {
8991
IteratorTripleString *it = hdt->search(subj, pred, obj);
9092

9193
StopWatch st;
94+
95+
// Go to the right offset.
96+
if(it->canGoTo()) {
97+
try {
98+
it->skip(offset);
99+
offset = 0;
100+
}
101+
catch (const runtime_error error) {
102+
/*invalid offset*/
103+
interruptSignal = 1;
104+
}
105+
}
106+
else {
107+
while(offset && it->hasNext()) {
108+
it->next();
109+
offset--;
110+
}
111+
}
112+
113+
// Get results.
92114
unsigned int numTriples=0;
93115
while(it->hasNext() && interruptSignal==0) {
94116
TripleString *ts = it->next();
@@ -109,9 +131,11 @@ void iterate(HDT *hdt, char *query, ostream &out, bool measure) {
109131
int main(int argc, char **argv) {
110132
int c;
111133
string query, inputFile, outputFile;
134+
stringstream sstream;
135+
uint32_t offset = 0;
112136
bool measure = false;
113137

114-
while( (c = getopt(argc,argv,"hq:o:m:V"))!=-1) {
138+
while( (c = getopt(argc,argv,"hq:o:f:mV"))!=-1) {
115139
switch(c) {
116140
case 'h':
117141
help();
@@ -122,6 +146,10 @@ int main(int argc, char **argv) {
122146
case 'o':
123147
outputFile = optarg;
124148
break;
149+
case 'f':
150+
sstream << optarg;
151+
if(!(sstream >> offset)) offset=0;
152+
break;
125153
case 'm':
126154
measure = true;
127155
break;
@@ -160,7 +188,7 @@ int main(int argc, char **argv) {
160188

161189
if(query!="") {
162190
// Supplied query, search and exit.
163-
iterate(hdt, (char*)query.c_str(), *out, measure);
191+
iterate(hdt, (char*)query.c_str(), *out, measure, offset);
164192
} else {
165193
// No supplied query, show terminal.
166194
char line[1024*10];
@@ -179,7 +207,7 @@ int main(int argc, char **argv) {
179207
continue;
180208
}
181209

182-
iterate(hdt, line, *out, measure);
210+
iterate(hdt, line, *out, measure, offset);
183211

184212
cerr << ">> ";
185213
}

0 commit comments

Comments
 (0)