Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,9 @@ public Set<StatementOrder> getSupportedOrders(Resource subj, IRI pred, Value obj
public Comparator<Value> getComparator() {
return delegate.getComparator();
}

@Override
public long size(Resource subj, IRI pred, Value obj, Resource... contexts) {
return delegate.size(subj, pred, obj, contexts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,9 @@ default Comparator<Value> getComparator() {
return null;
}

@Experimental
default long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) {
return getStatements(subj, pred, obj, contexts).stream()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that this also needs to be closed.

.count();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public CloseableIteration<? extends Namespace> getNamespaces() throws SailExcept
if (added == null && removed == null) {
return namespaces;
}
final Iterator<Map.Entry<String, String>> addedIter = added;
final Iterator<Entry<String, String>> addedIter = added;
final Set<String> removedSet = removed;
return new AbstractCloseableIteration<>() {

Expand Down Expand Up @@ -383,4 +383,15 @@ private boolean isDeprecated(Triple triple, List<Statement> deprecatedStatements
}
return true;
}

@Override
public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) {
// Fast path: no approved or deprecated
if (!changes.hasApproved() && !changes.hasDeprecated()) {
return derivedFrom.size(subj, pred, obj, contexts);
}

// Fallback path: iterate over all matching statements
return getStatements(subj, pred, obj, contexts).stream().count();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@odysa Does this call materialize/resolve all values in LmdbStore?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. It calls getStatements here

public CloseableIteration<? extends Statement> getStatements(Resource subj, IRI pred, Value obj,
Resource... contexts) throws SailException {
try {
return createStatementIterator(txn, subj, pred, obj, explicit, contexts);
} catch (IOException e) {
try {
logger.warn("Failed to get statements, retrying", e);
// try once more before giving up
Thread.yield();
return createStatementIterator(txn, subj, pred, obj, explicit, contexts);
} catch (IOException e2) {
throw new SailException("Unable to get statements", e);
}
}
}

And eventually calls getNextElement
public Statement getNextElement() throws SailException {
try {
long[] quad = recordIt.next();
if (quad == null) {
return null;
}
long subjID = quad[TripleStore.SUBJ_IDX];
Resource subj = (Resource) valueStore.getLazyValue(subjID);
long predID = quad[TripleStore.PRED_IDX];
IRI pred = (IRI) valueStore.getLazyValue(predID);
long objID = quad[TripleStore.OBJ_IDX];
Value obj = valueStore.getLazyValue(objID);
Resource context = null;
long contextID = quad[TripleStore.CONTEXT_IDX];
if (contextID != 0) {
context = (Resource) valueStore.getLazyValue(contextID);
}
return valueStore.createStatement(subj, pred, obj, context);
} catch (IOException e) {
throw causeIOException(e);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return getStatements(subj, pred, obj, contexts).stream().count(); should probably we wrapped in a try-with-resource.

Copy link
Contributor

@hmottestad hmottestad Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in:

try(var stream = getStatements(subj, pred, obj, contexts).stream()){
  return stream.size();
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Stream;

import org.eclipse.rdf4j.common.annotation.Experimental;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.common.order.StatementOrder;
import org.eclipse.rdf4j.common.transaction.IsolationLevel;
Expand Down Expand Up @@ -1033,4 +1034,21 @@ private boolean hasStatement(SailDataset dataset, Resource subj, IRI pred, Value
}
}

/**
* Returns the number of statements in the snapshot, optionally including inferred statements, for the given
* contexts. This method reads the size directly from the dataset within the current isolation level.
*
* @param includeInferred whether to include inferred statements in the count
* @param contexts the RDF contexts (named graphs) to restrict the count to; if none are provided, counts all
* contexts
* @return the number of statements in the dataset
* @throws SailException if an error occurs while accessing the Sail store
*/
@Experimental
protected long getSizeFromSnapshot(final boolean includeInferred, final Resource... contexts) throws SailException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need includeInferred? I can't see that the other size related methods support differentiating between inferred or explicit.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda wish we could just use the existing sizeInternal(...) method, but I get that it's better to be safe and introduce a separate method for now and then later on we can consider changing up sizeInternal(...).

try (SailSource branch = branch(IncludeInferred.fromBoolean(includeInferred))) {
return branch.dataset(getIsolationLevel()).size(null, null, null, contexts);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,68 @@ CloseableIteration<? extends Statement> createStatementIterator(
}
}

/**
* Returns the number of statements that match the specified pattern.
*
* @param subj The subject of the pattern, or <tt>null</tt> to indicate a wildcard.
* @param pred The predicate of the pattern, or <tt>null</tt> to indicate a wildcard.
* @param obj The object of the pattern, or <tt>null</tt> to indicate a wildcard.
* @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is optional. If
* no contexts are supplied the method operates on the entire repository.
* @return The number of statements that match the specified pattern.
* @throws SailException If an error occurred while determining the size.
*/
private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, final Value obj,
final Resource... contexts)
throws SailException {
try {
long totalSize = 0;

long subjID = LmdbValue.UNKNOWN_ID;
if (subj != null) {
subjID = valueStore.getId(subj);
if (subjID == LmdbValue.UNKNOWN_ID) {
return 0;
}
}

long predID = LmdbValue.UNKNOWN_ID;
if (pred != null) {
predID = valueStore.getId(pred);
if (predID == LmdbValue.UNKNOWN_ID) {
return 0;
}
}

long objID = LmdbValue.UNKNOWN_ID;
if (obj != null) {
objID = valueStore.getId(obj);
if (objID == LmdbValue.UNKNOWN_ID) {
return 0;
}
}

// Handle the case where no contexts are specified (query all contexts)
if (contexts.length == 0) {
totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID, false);
} else {
for (Resource context : contexts) {
long contextID = LmdbValue.UNKNOWN_ID;
if (context != null) {
contextID = valueStore.getId(context);
if (contextID == LmdbValue.UNKNOWN_ID) {
return 0;
}
}
totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextID, false);
}
}
return totalSize;
} catch (final IOException e) {
throw new SailException(e);
}
}

private final class LmdbSailSource extends BackingSailSource {

private final boolean explicit;
Expand Down Expand Up @@ -955,5 +1017,15 @@ public Set<StatementOrder> getSupportedOrders(Resource subj, IRI pred, Value obj
public Comparator<Value> getComparator() {
return null;
}

@Override
public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts)
throws SailException {
try {
return LmdbSailStore.this.size(txn, subj, pred, obj, contexts);
} catch (final Exception e) {
throw new SailException(e);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.SailReadOnlyException;
import org.eclipse.rdf4j.sail.base.SailSourceConnection;
import org.eclipse.rdf4j.sail.base.*;
import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent;
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;

Expand Down Expand Up @@ -200,4 +200,8 @@ public void clearInferred(Resource... contexts) throws SailException {
sailChangedEvent.setStatementsRemoved(true);
}

@Override
protected long sizeInternal(Resource... contexts) throws SailException {
return super.getSizeFromSnapshot(false, contexts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.transaction;
import static org.eclipse.rdf4j.sail.lmdb.Varint.readListUnsigned;
import static org.eclipse.rdf4j.sail.lmdb.Varint.writeUnsigned;
import static org.lwjgl.system.MemoryStack.stackGet;
import static org.lwjgl.system.MemoryStack.stackPush;
import static org.lwjgl.system.MemoryUtil.NULL;
import static org.lwjgl.util.lmdb.LMDB.MDB_CREATE;
Expand Down Expand Up @@ -85,6 +86,7 @@
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator;
import org.eclipse.rdf4j.sail.lmdb.Varint.GroupMatcher;
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue;
import org.lwjgl.PointerBuffer;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.util.lmdb.MDBEnvInfo;
Expand Down Expand Up @@ -656,6 +658,66 @@ protected void filterUsedIds(Collection<Long> ids) throws IOException {
});
}

/**
* Returns the exact total size of the triple pattern with the given subject, predicate, object and context. If the
* subject, predicate, object or context is not specified (i.e., set to {@link LmdbValue#UNKNOWN_ID}), it will
* return the size of the entire database from the mdb_stat. Otherwise, it will iterate over all matching triples
* and count them.
*
* @param subj Subject ID or {@link LmdbValue#UNKNOWN_ID} if not specified
* @param pred Predicate ID or {@link LmdbValue#UNKNOWN_ID} if not specified
* @param obj Object ID or {@link LmdbValue#UNKNOWN_ID} if not specified
* @param context Context ID or {@link LmdbValue#UNKNOWN_ID} if not specified
* @param includeImplicit Whether to include implicit triples in the count
* @return The exact size of the triple pattern
*/
protected long cardinalityExact(final TxnManager.Txn txn, final long subj, final long pred, final long obj,
final long context, final boolean includeImplicit)
throws IOException {

if (subj == LmdbValue.UNKNOWN_ID && pred == LmdbValue.UNKNOWN_ID && obj == LmdbValue.UNKNOWN_ID) {
try (final MemoryStack stack = MemoryStack.stackPush()) {
// Fast path: if all values are unknown, return the total size of the database
if (context == LmdbValue.UNKNOWN_ID) {
long cardinality = 0;
final TripleIndex index = getBestIndex(subj, pred, obj, context);

int dbi = index.getDB(true);
MDBStat stat = MDBStat.mallocStack(stack);
mdb_stat(txn.get(), dbi, stat);
cardinality += stat.ms_entries();

if (includeImplicit) {
dbi = index.getDB(false);
mdb_stat(txn.get(), dbi, stat);
cardinality += stat.ms_entries();
}
return cardinality;
} else {
// Fast path: if only context is specified, return the size of the given context
return getContextSize(txn, stack, context);
}
}
}

long size = 0;

try (RecordIterator explicitIter = getTriples(txn, subj, pred, obj, context, true);
RecordIterator implicitIter = includeImplicit
? getTriples(txn, subj, pred, obj, context, false)
: null) {
for (long[] quad = explicitIter.next(); quad != null; quad = explicitIter.next()) {
size++;
}
if (includeImplicit && implicitIter != null) {
for (long[] quad = implicitIter.next(); quad != null; quad = implicitIter.next()) {
size++;
}
}
}
return size;
}

protected double cardinality(long subj, long pred, long obj, long context) throws IOException {
TripleIndex index = getBestIndex(subj, pred, obj, context);

Expand All @@ -673,7 +735,6 @@ protected double cardinality(long subj, long pred, long obj, long context) throw
return cardinality;
});
}

return txnManager.doWith((stack, txn) -> {
final Statistics s = pool.getStatistics();
try {
Expand Down Expand Up @@ -910,6 +971,33 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean
return stAdded;
}

private long getContextSize(final Txn txn, final MemoryStack stack, final long context) throws IOException {
try {
stack.push();

// Prepare key
MDBVal idVal = MDBVal.calloc(stack);
ByteBuffer keyBuffer = stack.malloc(1 + Long.BYTES);
Varint.writeUnsigned(keyBuffer, context);
keyBuffer.flip();
idVal.mv_data(keyBuffer);

// Prepare value holder
MDBVal dataVal = MDBVal.calloc(stack);
int rc = mdb_get(txn.get(), contextsDbi, idVal, dataVal);
if (rc == MDB_SUCCESS && dataVal.mv_data() != null) {
return Varint.readUnsigned(dataVal.mv_data());
} else if (rc == MDB_NOTFOUND) {
// Context not present in DB
return 0;
} else {
throw new IOException("Failed to read context size: " + mdb_strerror(rc));
}
} finally {
stack.pop();
}
}

private void incrementContext(MemoryStack stack, long context) throws IOException {
try {
stack.push();
Expand Down
Loading
Loading