Skip to content

Commit 6b638e0

Browse files
committed
GH-5343 speed-up size calculation in LmdbStore
1 parent db88aa5 commit 6b638e0

File tree

5 files changed

+286
-2
lines changed

5 files changed

+286
-2
lines changed

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,67 @@ CloseableIteration<? extends Statement> createStatementIterator(
403403
}
404404
}
405405

406+
/**
407+
* Returns the number of statements that match the specified pattern.
408+
*
409+
* @param subj The subject of the pattern, or <tt>null</tt> to indicate a wildcard.
410+
* @param pred The predicate of the pattern, or <tt>null</tt> to indicate a wildcard.
411+
* @param obj The object of the pattern, or <tt>null</tt> to indicate a wildcard.
412+
* @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is optional. If
413+
* no contexts are supplied the method operates on the entire repository.
414+
* @return The number of statements that match the specified pattern.
415+
* @throws SailException If an error occurred while determining the size.
416+
*/
417+
public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts)
418+
throws SailException {
419+
try {
420+
long totalSize = 0;
421+
422+
long subjID = LmdbValue.UNKNOWN_ID;
423+
if (subj != null) {
424+
subjID = valueStore.getId(subj);
425+
if (subjID == LmdbValue.UNKNOWN_ID) {
426+
return 0;
427+
}
428+
}
429+
430+
long predID = LmdbValue.UNKNOWN_ID;
431+
if (pred != null) {
432+
predID = valueStore.getId(pred);
433+
if (predID == LmdbValue.UNKNOWN_ID) {
434+
return 0;
435+
}
436+
}
437+
438+
long objID = LmdbValue.UNKNOWN_ID;
439+
if (obj != null) {
440+
objID = valueStore.getId(obj);
441+
if (objID == LmdbValue.UNKNOWN_ID) {
442+
return 0;
443+
}
444+
}
445+
446+
// Handle the case where no contexts are specified (query all contexts)
447+
if (contexts.length == 0) {
448+
totalSize = tripleStore.cardinalityExact(subjID, predID, objID, LmdbValue.UNKNOWN_ID);
449+
} else {
450+
for (Resource context : contexts) {
451+
long contextID = LmdbValue.UNKNOWN_ID;
452+
if (context != null) {
453+
contextID = valueStore.getId(context);
454+
if (contextID == LmdbValue.UNKNOWN_ID) {
455+
return 0;
456+
}
457+
}
458+
totalSize += tripleStore.cardinalityExact(subjID, predID, objID, contextID);
459+
}
460+
}
461+
return totalSize;
462+
} catch (final IOException e) {
463+
throw new SailException(e);
464+
}
465+
}
466+
406467
private final class LmdbSailSource extends BackingSailSource {
407468

408469
private final boolean explicit;

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.eclipse.rdf4j.query.algebra.TupleExpr;
2424
import org.eclipse.rdf4j.sail.SailException;
2525
import org.eclipse.rdf4j.sail.SailReadOnlyException;
26-
import org.eclipse.rdf4j.sail.base.SailSourceConnection;
26+
import org.eclipse.rdf4j.sail.base.*;
2727
import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent;
2828
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
2929

@@ -163,6 +163,12 @@ public Statement next() throws SailException {
163163
};
164164
}
165165

166+
@Override
167+
protected long sizeInternal(final Resource... contexts) throws SailException {
168+
final LmdbSailStore sailStore = lmdbStore.getBackingStore();
169+
return sailStore.size(null, null, null, contexts);
170+
}
171+
166172
/**
167173
* Ensures that all components of the value are initialized from the underlying database.
168174
*

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator;
8686
import org.eclipse.rdf4j.sail.lmdb.Varint.GroupMatcher;
8787
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
88+
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
8889
import org.lwjgl.PointerBuffer;
8990
import org.lwjgl.system.MemoryStack;
9091
import org.lwjgl.util.lmdb.MDBEnvInfo;
@@ -656,6 +657,56 @@ protected void filterUsedIds(Collection<Long> ids) throws IOException {
656657
});
657658
}
658659

660+
/**
661+
* Returns the exact total size of the triple pattern with the given subject, predicate, object and context. If the
662+
* subject, predicate, object or context is not specified (i.e., set to {@link LmdbValue#UNKNOWN_ID}), it will
663+
* return the size of the entire database from the mdb_stat. Otherwise, it will iterate over all matching triples
664+
* and count them.
665+
*
666+
* @param subj Subject ID or {@link LmdbValue#UNKNOWN_ID} if not specified
667+
* @param pred Predicate ID or {@link LmdbValue#UNKNOWN_ID} if not specified
668+
* @param obj Object ID or {@link LmdbValue#UNKNOWN_ID} if not specified
669+
* @param context Context ID or {@link LmdbValue#UNKNOWN_ID} if not specified
670+
* @return The exact size of the triple pattern
671+
*/
672+
protected long cardinalityExact(final long subj, final long pred, final long obj, final long context)
673+
throws IOException {
674+
675+
// get size of entire db
676+
if (subj == LmdbValue.UNKNOWN_ID && pred == LmdbValue.UNKNOWN_ID && obj == LmdbValue.UNKNOWN_ID
677+
&& context == LmdbValue.UNKNOWN_ID) {
678+
return txnManager.doWith((stack, txn) -> {
679+
long cardinality = 0;
680+
final TripleIndex index = getBestIndex(subj, pred, obj, context);
681+
for (boolean explicit : new boolean[] { true, false }) {
682+
int dbi = index.getDB(explicit);
683+
MDBStat stat = MDBStat.mallocStack(stack);
684+
mdb_stat(txn, dbi, stat);
685+
cardinality += stat.ms_entries();
686+
}
687+
return cardinality;
688+
});
689+
}
690+
691+
try (TxnManager.Txn txn = txnManager.createReadTxn()) {
692+
final RecordIterator explicitIter = getTriples(txn, subj, pred, obj, context, true);
693+
final RecordIterator implicitIter = getTriples(txn, subj, pred, obj, context, false);
694+
long size = 0;
695+
try {
696+
for (long[] quad = explicitIter.next(); quad != null; quad = explicitIter.next()) {
697+
size++;
698+
}
699+
for (long[] quad = implicitIter.next(); quad != null; quad = implicitIter.next()) {
700+
size++;
701+
}
702+
} finally {
703+
explicitIter.close();
704+
implicitIter.close();
705+
}
706+
return size;
707+
}
708+
}
709+
659710
protected double cardinality(long subj, long pred, long obj, long context) throws IOException {
660711
TripleIndex index = getBestIndex(subj, pred, obj, context);
661712

@@ -673,7 +724,6 @@ protected double cardinality(long subj, long pred, long obj, long context) throw
673724
return cardinality;
674725
});
675726
}
676-
677727
return txnManager.doWith((stack, txn) -> {
678728
final Statistics s = pool.getStatistics();
679729
try {
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package org.eclipse.rdf4j.sail.lmdb;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
5+
import java.io.File;
6+
import java.util.Random;
7+
8+
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
9+
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
10+
import org.junit.jupiter.api.AfterEach;
11+
import org.junit.jupiter.api.BeforeEach;
12+
import org.junit.jupiter.api.Test;
13+
import org.junit.jupiter.api.io.TempDir;
14+
15+
public class CardinalityExactTest {
16+
private static final int NUM_RESOURCES = 1000;
17+
private static final int MIN_TRIPLES_PER_RESOURCE = 20;
18+
private static final int MAX_TRIPLES_PER_RESOURCE = 100;
19+
private final int[] contextIds = new int[] { 1, 2, 3 };
20+
private final int[] objectIds = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
21+
@TempDir
22+
File tempFolder;
23+
24+
protected TripleStore tripleStore;
25+
26+
@BeforeEach
27+
public void before() throws Exception {
28+
File dataDir = new File(tempFolder, "triplestore");
29+
dataDir.mkdir();
30+
tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"));
31+
}
32+
33+
private long countTriples(RecordIterator iterator) {
34+
long count = 0;
35+
while (iterator.next() != null) {
36+
count++;
37+
}
38+
return count;
39+
}
40+
41+
private long randomObjectId(Random random) {
42+
return objectIds[random.nextInt(objectIds.length)];
43+
}
44+
45+
private long randomContextId(Random random) {
46+
return contextIds[random.nextInt(contextIds.length)];
47+
}
48+
49+
@Test
50+
public void testCardinalityExact() throws Exception {
51+
Random random = new Random();
52+
53+
tripleStore.startTransaction();
54+
55+
for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) {
56+
int tripleCount = MIN_TRIPLES_PER_RESOURCE + random.nextInt(MAX_TRIPLES_PER_RESOURCE);
57+
for (int i = 0; i < tripleCount; i++) {
58+
long objectId = randomObjectId(random);
59+
long randomContextId = randomContextId(random);
60+
tripleStore.storeTriple(resourceId, 2, objectId, randomContextId, true);
61+
62+
int predicateId = 2 + random.nextInt(1000) + 1;
63+
tripleStore.storeTriple(resourceId, predicateId, randomObjectId(random), randomContextId, true);
64+
}
65+
}
66+
67+
tripleStore.commit();
68+
69+
try (TxnManager.Txn txn = tripleStore.getTxnManager().createReadTxn()) {
70+
for (final long contextId : contextIds) {
71+
long actual = tripleStore.cardinalityExact(LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, contextId);
72+
long expected = countTriples(
73+
tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, contextId, false))
74+
+ countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID,
75+
contextId, true));
76+
assertEquals(expected, actual, "Exact size does not match counted triples.");
77+
78+
}
79+
80+
for (final long objectId : objectIds) {
81+
long actual = tripleStore.cardinalityExact(LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId,
82+
LmdbValue.UNKNOWN_ID);
83+
long expected = countTriples(
84+
tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId,
85+
LmdbValue.UNKNOWN_ID, false))
86+
+ countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId,
87+
LmdbValue.UNKNOWN_ID, true));
88+
assertEquals(expected, actual, "Exact size does not match counted triples.");
89+
}
90+
91+
for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) {
92+
long exactSize = tripleStore.cardinalityExact(resourceId, 2, LmdbValue.UNKNOWN_ID, 1);
93+
long count = countTriples(tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, false))
94+
+ countTriples(tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, true));
95+
assertEquals(count, exactSize, "Exact size does not match counted triples.");
96+
}
97+
98+
for (int resourceId = 1; resourceId <= 50; resourceId++) {
99+
long targetObjectId = randomObjectId(random);
100+
long targetContextId = randomContextId(random);
101+
long generalSize = tripleStore.cardinalityExact(LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
102+
targetObjectId,
103+
targetContextId);
104+
long generalExplicitCount = countTriples(
105+
tripleStore.getTriples(
106+
txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, targetObjectId, targetContextId,
107+
true));
108+
long generalImplicitCount = countTriples(
109+
tripleStore.getTriples(
110+
txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, targetObjectId, targetContextId,
111+
false));
112+
assertEquals(
113+
generalImplicitCount + generalExplicitCount, generalSize,
114+
"Exact size does not match counted triples."
115+
);
116+
}
117+
118+
long totalSize = tripleStore.cardinalityExact(LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
119+
LmdbValue.UNKNOWN_ID,
120+
LmdbValue.UNKNOWN_ID);
121+
long totalCount = countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
122+
LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, false))
123+
+ countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
124+
LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, true));
125+
assertEquals(totalCount, totalSize, "Total size does not match counted triples.");
126+
}
127+
}
128+
129+
@AfterEach
130+
public void after() throws Exception {
131+
tripleStore.close();
132+
}
133+
}

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import static org.junit.jupiter.api.Assertions.assertEquals;
1414

1515
import java.io.File;
16+
import java.util.Random;
1617

1718
import org.eclipse.rdf4j.common.iteration.Iterations;
1819
import org.eclipse.rdf4j.common.transaction.IsolationLevel;
@@ -22,6 +23,7 @@
2223
import org.eclipse.rdf4j.repository.sail.SailRepository;
2324
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
2425
import org.eclipse.rdf4j.testsuite.repository.RepositoryConnectionTest;
26+
import org.junit.jupiter.api.Test;
2527
import org.junit.jupiter.params.ParameterizedTest;
2628
import org.junit.jupiter.params.provider.MethodSource;
2729

@@ -64,4 +66,36 @@ public void testSES715(IsolationLevel level) {
6466
testCon2.close();
6567
}
6668

69+
@Test
70+
public void testSize() {
71+
// load 1000 triples in two different contexts
72+
testCon2 = testRepository.getConnection();
73+
74+
testCon.begin();
75+
ValueFactory vf = testCon.getValueFactory();
76+
IRI context1 = vf.createIRI("http://my.context.1");
77+
IRI context2 = vf.createIRI("http://my.context.2");
78+
IRI predicate = vf.createIRI("http://my.predicate");
79+
IRI object = vf.createIRI("http://my.object");
80+
Random random = new Random();
81+
int context1Size = random.nextInt(5000);
82+
int context2Size = random.nextInt(5000);
83+
for (int j = 0; j < context1Size; j++) {
84+
testCon.add(vf.createIRI("http://my.subject" + j), predicate, object, context1);
85+
}
86+
for (int j = 0; j < context2Size; j++) {
87+
testCon.add(vf.createIRI("http://my.subject" + j), predicate, object, context2);
88+
}
89+
// should be 0 before commit
90+
assertEquals(0, testCon.size(context1));
91+
assertEquals(0, testCon.size(context2));
92+
assertEquals(0, testCon2.size());
93+
testCon.commit();
94+
95+
assertEquals(context1Size, testCon2.size(context1));
96+
assertEquals(context2Size, testCon2.size(context2));
97+
assertEquals(context1Size + context2Size, testCon2.size());
98+
99+
testCon2.close();
100+
}
67101
}

0 commit comments

Comments
 (0)