Skip to content

Commit c8c107a

Browse files
committed
GH-5343 speed-up size calculation in LmdbStore
1 parent db88aa5 commit c8c107a

File tree

9 files changed

+335
-10
lines changed

9 files changed

+335
-10
lines changed

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,9 @@ public Set<StatementOrder> getSupportedOrders(Resource subj, IRI pred, Value obj
9393
public Comparator<Value> getComparator() {
9494
return delegate.getComparator();
9595
}
96+
97+
@Override
98+
public long size(Resource subj, IRI pred, Value obj, Resource... contexts) {
99+
return delegate.size(subj, pred, obj, contexts);
100+
}
96101
}

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,8 @@ default Comparator<Value> getComparator() {
130130
return null;
131131
}
132132

133+
@Experimental
134+
default long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) {
135+
throw new SailException("Size not supported by " + this.getClass().getSimpleName());
136+
}
133137
}

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.base;
1212

13-
import java.util.ArrayList;
14-
import java.util.Arrays;
15-
import java.util.HashSet;
16-
import java.util.Iterator;
17-
import java.util.List;
18-
import java.util.Map;
13+
import java.util.*;
1914
import java.util.Map.Entry;
20-
import java.util.NoSuchElementException;
21-
import java.util.Set;
2215
import java.util.function.Function;
2316

2417
import org.eclipse.rdf4j.common.iteration.AbstractCloseableIteration;
@@ -383,4 +376,15 @@ private boolean isDeprecated(Triple triple, List<Statement> deprecatedStatements
383376
}
384377
return true;
385378
}
379+
380+
@Override
381+
public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) {
382+
// Fast path: no approved or deprecated
383+
if (!changes.hasApproved() && !changes.hasDeprecated()) {
384+
return derivedFrom.size(subj, pred, obj, contexts);
385+
}
386+
387+
// fall back to iterating all statements, still fast because it skips LMDB lazy evaluation
388+
return getStatements(subj, pred, obj, contexts).stream().count();
389+
}
386390
}

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.base;
1212

13+
import static org.eclipse.rdf4j.sail.base.SailSourceConnection.IncludeInferred.explicitOnly;
14+
1315
import java.util.Comparator;
1416
import java.util.HashMap;
1517
import java.util.Map;
@@ -1033,4 +1035,10 @@ private boolean hasStatement(SailDataset dataset, Resource subj, IRI pred, Value
10331035
}
10341036
}
10351037

1038+
protected long calculateSize(final boolean includeInferred, final Resource... contexts) throws SailException {
1039+
try (SailSource branch = branch(IncludeInferred.fromBoolean(includeInferred))) {
1040+
return branch.dataset(getIsolationLevel()).size(null, null, null, contexts);
1041+
}
1042+
}
1043+
10361044
}

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,68 @@ CloseableIteration<? extends Statement> createStatementIterator(
403403
}
404404
}
405405

406+
/**
407+
* Returns the number of statements that match the specified pattern.
408+
*
409+
* @param subj The subject of the pattern, or <tt>null</tt> to indicate a wildcard.
410+
* @param pred The predicate of the pattern, or <tt>null</tt> to indicate a wildcard.
411+
* @param obj The object of the pattern, or <tt>null</tt> to indicate a wildcard.
412+
* @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is optional. If
413+
* no contexts are supplied the method operates on the entire repository.
414+
* @return The number of statements that match the specified pattern.
415+
* @throws SailException If an error occurred while determining the size.
416+
*/
417+
private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, final Value obj,
418+
final Resource... contexts)
419+
throws SailException {
420+
try {
421+
long totalSize = 0;
422+
423+
long subjID = LmdbValue.UNKNOWN_ID;
424+
if (subj != null) {
425+
subjID = valueStore.getId(subj);
426+
if (subjID == LmdbValue.UNKNOWN_ID) {
427+
return 0;
428+
}
429+
}
430+
431+
long predID = LmdbValue.UNKNOWN_ID;
432+
if (pred != null) {
433+
predID = valueStore.getId(pred);
434+
if (predID == LmdbValue.UNKNOWN_ID) {
435+
return 0;
436+
}
437+
}
438+
439+
long objID = LmdbValue.UNKNOWN_ID;
440+
if (obj != null) {
441+
objID = valueStore.getId(obj);
442+
if (objID == LmdbValue.UNKNOWN_ID) {
443+
return 0;
444+
}
445+
}
446+
447+
// Handle the case where no contexts are specified (query all contexts)
448+
if (contexts.length == 0) {
449+
totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID);
450+
} else {
451+
for (Resource context : contexts) {
452+
long contextID = LmdbValue.UNKNOWN_ID;
453+
if (context != null) {
454+
contextID = valueStore.getId(context);
455+
if (contextID == LmdbValue.UNKNOWN_ID) {
456+
return 0;
457+
}
458+
}
459+
totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextID);
460+
}
461+
}
462+
return totalSize;
463+
} catch (final IOException e) {
464+
throw new SailException(e);
465+
}
466+
}
467+
406468
private final class LmdbSailSource extends BackingSailSource {
407469

408470
private final boolean explicit;
@@ -955,5 +1017,15 @@ public Set<StatementOrder> getSupportedOrders(Resource subj, IRI pred, Value obj
9551017
public Comparator<Value> getComparator() {
9561018
return null;
9571019
}
1020+
1021+
@Override
1022+
public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts)
1023+
throws SailException {
1024+
try {
1025+
return LmdbSailStore.this.size(txn, subj, pred, obj, contexts);
1026+
} catch (final Exception e) {
1027+
throw new SailException(e);
1028+
}
1029+
}
9581030
}
9591031
}

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.eclipse.rdf4j.query.algebra.TupleExpr;
2424
import org.eclipse.rdf4j.sail.SailException;
2525
import org.eclipse.rdf4j.sail.SailReadOnlyException;
26-
import org.eclipse.rdf4j.sail.base.SailSourceConnection;
26+
import org.eclipse.rdf4j.sail.base.*;
2727
import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent;
2828
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
2929

@@ -163,6 +163,11 @@ public Statement next() throws SailException {
163163
};
164164
}
165165

166+
@Override
167+
protected long sizeInternal(final Resource... contexts) throws SailException {
168+
return super.calculateSize(false, contexts);
169+
}
170+
166171
/**
167172
* Ensures that all components of the value are initialized from the underlying database.
168173
*

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator;
8686
import org.eclipse.rdf4j.sail.lmdb.Varint.GroupMatcher;
8787
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
88+
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
8889
import org.lwjgl.PointerBuffer;
8990
import org.lwjgl.system.MemoryStack;
9091
import org.lwjgl.util.lmdb.MDBEnvInfo;
@@ -656,6 +657,54 @@ protected void filterUsedIds(Collection<Long> ids) throws IOException {
656657
});
657658
}
658659

660+
/**
661+
* Returns the exact total size of the triple pattern with the given subject, predicate, object and context. If the
662+
* subject, predicate, object or context is not specified (i.e., set to {@link LmdbValue#UNKNOWN_ID}), it will
663+
* return the size of the entire database from the mdb_stat. Otherwise, it will iterate over all matching triples
664+
* and count them.
665+
*
666+
* @param subj Subject ID or {@link LmdbValue#UNKNOWN_ID} if not specified
667+
* @param pred Predicate ID or {@link LmdbValue#UNKNOWN_ID} if not specified
668+
* @param obj Object ID or {@link LmdbValue#UNKNOWN_ID} if not specified
669+
* @param context Context ID or {@link LmdbValue#UNKNOWN_ID} if not specified
670+
* @return The exact size of the triple pattern
671+
*/
672+
protected long cardinalityExact(final TxnManager.Txn txn, final long subj, final long pred, final long obj,
673+
final long context)
674+
throws IOException {
675+
676+
// get size of entire db
677+
if (subj == LmdbValue.UNKNOWN_ID && pred == LmdbValue.UNKNOWN_ID && obj == LmdbValue.UNKNOWN_ID
678+
&& context == LmdbValue.UNKNOWN_ID) {
679+
MemoryStack stack = stackPush();
680+
long cardinality = 0;
681+
final TripleIndex index = getBestIndex(subj, pred, obj, context);
682+
for (boolean explicit : new boolean[] { true, false }) {
683+
int dbi = index.getDB(explicit);
684+
MDBStat stat = MDBStat.mallocStack(stack);
685+
mdb_stat(txn.get(), dbi, stat);
686+
cardinality += stat.ms_entries();
687+
}
688+
return cardinality;
689+
}
690+
691+
final RecordIterator explicitIter = getTriples(txn, subj, pred, obj, context, true);
692+
final RecordIterator implicitIter = getTriples(txn, subj, pred, obj, context, false);
693+
long size = 0;
694+
try {
695+
for (long[] quad = explicitIter.next(); quad != null; quad = explicitIter.next()) {
696+
size++;
697+
}
698+
for (long[] quad = implicitIter.next(); quad != null; quad = implicitIter.next()) {
699+
size++;
700+
}
701+
} finally {
702+
explicitIter.close();
703+
implicitIter.close();
704+
}
705+
return size;
706+
}
707+
659708
protected double cardinality(long subj, long pred, long obj, long context) throws IOException {
660709
TripleIndex index = getBestIndex(subj, pred, obj, context);
661710

@@ -673,7 +722,6 @@ protected double cardinality(long subj, long pred, long obj, long context) throw
673722
return cardinality;
674723
});
675724
}
676-
677725
return txnManager.doWith((stack, txn) -> {
678726
final Statistics s = pool.getStatistics();
679727
try {
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2025 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
package org.eclipse.rdf4j.sail.lmdb;
12+
13+
import static org.junit.jupiter.api.Assertions.assertEquals;
14+
15+
import java.io.File;
16+
import java.util.Random;
17+
18+
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
19+
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
20+
import org.junit.jupiter.api.AfterEach;
21+
import org.junit.jupiter.api.BeforeEach;
22+
import org.junit.jupiter.api.Test;
23+
import org.junit.jupiter.api.io.TempDir;
24+
25+
public class CardinalityExactTest {
26+
private static final int NUM_RESOURCES = 1000;
27+
private static final int MIN_TRIPLES_PER_RESOURCE = 20;
28+
private static final int MAX_TRIPLES_PER_RESOURCE = 100;
29+
private final int[] contextIds = new int[] { 1, 2, 3 };
30+
private final int[] objectIds = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
31+
@TempDir
32+
File tempFolder;
33+
34+
protected TripleStore tripleStore;
35+
36+
@BeforeEach
37+
public void before() throws Exception {
38+
File dataDir = new File(tempFolder, "triplestore");
39+
dataDir.mkdir();
40+
tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"));
41+
}
42+
43+
private long countTriples(RecordIterator iterator) {
44+
long count = 0;
45+
while (iterator.next() != null) {
46+
count++;
47+
}
48+
return count;
49+
}
50+
51+
private long randomObjectId(Random random) {
52+
return objectIds[random.nextInt(objectIds.length)];
53+
}
54+
55+
private long randomContextId(Random random) {
56+
return contextIds[random.nextInt(contextIds.length)];
57+
}
58+
59+
@Test
60+
public void testCardinalityExact() throws Exception {
61+
Random random = new Random();
62+
63+
tripleStore.startTransaction();
64+
65+
for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) {
66+
int tripleCount = MIN_TRIPLES_PER_RESOURCE + random.nextInt(MAX_TRIPLES_PER_RESOURCE);
67+
for (int i = 0; i < tripleCount; i++) {
68+
long objectId = randomObjectId(random);
69+
long randomContextId = randomContextId(random);
70+
tripleStore.storeTriple(resourceId, 2, objectId, randomContextId, true);
71+
72+
int predicateId = 2 + random.nextInt(1000) + 1;
73+
tripleStore.storeTriple(resourceId, predicateId, randomObjectId(random), randomContextId, true);
74+
}
75+
}
76+
77+
tripleStore.commit();
78+
79+
try (TxnManager.Txn txn = tripleStore.getTxnManager().createReadTxn()) {
80+
for (final long contextId : contextIds) {
81+
long actual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID,
82+
contextId);
83+
long expected = countTriples(
84+
tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, contextId, false))
85+
+ countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID,
86+
contextId, true));
87+
assertEquals(expected, actual, "Exact size does not match counted triples.");
88+
89+
}
90+
91+
for (final long objectId : objectIds) {
92+
long actual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId,
93+
LmdbValue.UNKNOWN_ID);
94+
long expected = countTriples(
95+
tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId,
96+
LmdbValue.UNKNOWN_ID, false))
97+
+ countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId,
98+
LmdbValue.UNKNOWN_ID, true));
99+
assertEquals(expected, actual, "Exact size does not match counted triples.");
100+
}
101+
102+
for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) {
103+
long exactSize = tripleStore.cardinalityExact(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1);
104+
long count = countTriples(tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, false))
105+
+ countTriples(tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, true));
106+
assertEquals(count, exactSize, "Exact size does not match counted triples.");
107+
}
108+
109+
for (int resourceId = 1; resourceId <= 50; resourceId++) {
110+
long targetObjectId = randomObjectId(random);
111+
long targetContextId = randomContextId(random);
112+
long generalSize = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
113+
targetObjectId,
114+
targetContextId);
115+
long generalExplicitCount = countTriples(
116+
tripleStore.getTriples(
117+
txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, targetObjectId, targetContextId,
118+
true));
119+
long generalImplicitCount = countTriples(
120+
tripleStore.getTriples(
121+
txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, targetObjectId, targetContextId,
122+
false));
123+
assertEquals(
124+
generalImplicitCount + generalExplicitCount, generalSize,
125+
"Exact size does not match counted triples."
126+
);
127+
}
128+
129+
long totalSize = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
130+
LmdbValue.UNKNOWN_ID,
131+
LmdbValue.UNKNOWN_ID);
132+
long totalCount = countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
133+
LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, false))
134+
+ countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID,
135+
LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, true));
136+
assertEquals(totalCount, totalSize, "Total size does not match counted triples.");
137+
}
138+
}
139+
140+
@AfterEach
141+
public void after() throws Exception {
142+
tripleStore.close();
143+
}
144+
}

0 commit comments

Comments
 (0)