diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 02e7d71bf5d..634df239ca2 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -767,7 +767,8 @@ private long removeStatements(long subj, long pred, long obj, boolean explicit, tripleStore.removeTriplesByContext(subj, pred, obj, contextId, explicit, quad -> { removeCount[0]++; for (long id : quad) { - if (id != 0L) { + if (id != 0L && !ValueIds.isInlined(id)) { + // only add references, exclude inlined values unusedIds.add(id); } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index ef3ab8ebc67..df85c3fe490 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -605,9 +605,18 @@ protected void filterUsedIds(Collection ids) throws IOException { it.remove(); continue; } - if (component != 2 && (id & 1) == 1) { - // id is a literal and can only appear in object position - continue; + if (component != 2) { + // optimization: ensure that literals are only tested if they appear in object + // position + switch (ValueIds.getIdType(id)) { + case ValueIds.T_URI: + case ValueIds.T_BNODE: + case ValueIds.T_TRIPLE: + // fall through + default: + // id is a literal, do not test it + continue; + } } long subj = c == 0 ? id : -1, pred = c == 1 ? id : -1, diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueIds.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueIds.java new file mode 100644 index 00000000000..4c01efed633 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueIds.java @@ -0,0 +1,93 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb; + +/** + * Constants and functions for working with ids encoded into long values. + */ +public class ValueIds { + /** + * Pointer to an arbitrary value in the value store. This is not used as RDF value. + */ + public static final int T_PTR = 0; + + /** Reference to a URI */ + public static final int T_URI = 1; + /** Reference to a literal */ + public static final int T_LITERAL = 2; + /** Reference to a blank node */ + public static final int T_BNODE = 3; + /** Reference to a triple */ + public static final int T_TRIPLE = 4; + + // inlined values + public static final int T_INTEGER = 16; + public static final int T_DECIMAL = 17; + public static final int T_FLOAT = 18; + public static final int T_DATETIME = 19; + public static final int T_DATETIMESTAMP = 20; + public static final int T_DATE = 21; + public static final int T_BOOLEAN = 22; + public static final int T_SHORTSTRING = 23; + public static final int T_POSITIVE_INTEGER = 24; + public static final int T_NEGATIVE_INTEGER = 25; + public static final int T_NON_NEGATIVE_INTEGER = 26; + public static final int T_NON_POSITIVE_INTEGER = 27; + public static final int T_LONG = 28; + public static final int T_INT = 29; + public static final int T_SHORT = 30; + public static final int T_BYTE = 31; + public static final int T_UNSIGNEDLONG = 32; + public static final int T_UNSIGNEDINT = 33; + public static final int T_UNSIGNEDSHORT = 34; + public static final int T_UNSIGNEDBYTE = 35; + + /** + * Returns the type section of the given id. + * + * @param id The id of which the type should be extracted. + * @return The id's type. + */ + public static int getIdType(long id) { + return (int) ((id >> 1) & 0x3F); + } + + /** + * Returns the value section of the given id. + * + * @param id The id of which the value should be extracted. + * @return The id's value. + */ + public static long getValue(long id) { + return id >> 7; + } + + /** + * Combines an id type and a value into a single long id. + * + * @param idType The id's type. + * @param value The id's value. + * @return A composite id. + */ + public static long createId(int idType, long value) { + return value << 7 | idType << 1; + } + + /** + * Tests if the given id is an inlined value or a reference. + * + * @param id The id to test + * @return true if the value is inlined, else false + */ + public static boolean isInlined(long id) { + return getIdType(id) >= T_INTEGER; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java index 342ab9d97e3..59166d44929 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java @@ -53,7 +53,9 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -74,6 +76,7 @@ import org.eclipse.rdf4j.model.util.Literals; import org.eclipse.rdf4j.sail.lmdb.LmdbUtil.Transaction; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.inlined.Values; import org.eclipse.rdf4j.sail.lmdb.model.LmdbBNode; import org.eclipse.rdf4j.sail.lmdb.model.LmdbIRI; import org.eclipse.rdf4j.sail.lmdb.model.LmdbLiteral; @@ -94,24 +97,30 @@ class ValueStore extends AbstractValueFactory { private final static Logger logger = LoggerFactory.getLogger(ValueStore.class); - private static final byte URI_VALUE = 0x0; // 00 + private static final long VALUE_EVICTION_INTERVAL = 60000; // 60 seconds - private static final byte LITERAL_VALUE = 0x1; // 01 + private static final byte URI_VALUE = 0; - private static final byte BNODE_VALUE = 0x2; // 10 + private static final byte LITERAL_VALUE = 1; - private static final byte NAMESPACE_VALUE = 0x3; // 11 + private static final byte BNODE_VALUE = 2; - private static final byte ID_KEY = 0x4; + private static final byte TRIPLE_VALUE = 3; - private static final byte HASH_KEY = 0x5; + private static final byte NAMESPACE_VALUE = 4; - private static final byte HASHID_KEY = 0x6; + private static final byte ID_KEY = 5; + + private static final byte HASH_KEY = 6; + + private static final byte HASHID_KEY = 7; /*** * Maximum size of keys before hashing is used (size of two long values) */ private static final int MAX_KEY_SIZE = 16; + // package-protected for testing + final Set unusedRevisionIds = new HashSet<>(); /** * Used to do the actual storage of values, once they're translated to byte arrays. */ @@ -137,6 +146,16 @@ class ValueStore extends AbstractValueFactory { * namespace. */ private final ConcurrentCache namespaceIDCache; + private final boolean forceSync; + private final boolean autoGrow; + /** + * This lock is required to block transactions while auto-growing the map size. + */ + private final ReadWriteLock txnLock = new ReentrantReadWriteLock(); + private final ConcurrentCleaner cleaner = new ConcurrentCleaner(); + private final Map refCountsTxCache = new HashMap<>(); + private final long valueEvictionInterval; + private final ConcurrentHashMap commonVocabulary = new ConcurrentHashMap<>(); /** * Used to do the actual storage of values, once they're translated to byte arrays. */ @@ -152,14 +171,7 @@ class ValueStore extends AbstractValueFactory { // database with internal reference counts for IRIs and namespaces private int refCountsDbi; private long writeTxn; - private final boolean forceSync; - private final boolean autoGrow; private boolean invalidateRevisionOnCommit = false; - /** - * This lock is required to block transactions while auto-growing the map size. - */ - private final ReadWriteLock txnLock = new ReentrantReadWriteLock(); - /** * An object that indicates the revision of the value store, which is used to check if cached value IDs are still * valid. In order to be valid, the ValueStoreRevision object of a LmdbValue needs to be equal to this object. @@ -170,22 +182,13 @@ class ValueStore extends AbstractValueFactory { * object is GCed then it is safe to finally remove the ID-value associations and to reuse IDs. */ private volatile ValueStoreRevision.Lazy lazyRevision; - /** * The next ID that is associated with a stored value */ private long nextId = 1; private boolean freeIdsAvailable; - private volatile long nextValueEvictionTime = 0; - // package-protected for testing - final Set unusedRevisionIds = new HashSet<>(); - - private final ConcurrentCleaner cleaner = new ConcurrentCleaner(); - - private final long valueEvictionInterval; - ValueStore(File dir, LmdbStoreConfig config) throws IOException { this.dir = dir; this.forceSync = config.getForceSync(); @@ -239,6 +242,14 @@ class ValueStore extends AbstractValueFactory { commit(); } + private static boolean isCommonVocabulary(IRI nv) { + String string = nv.toString(); + return string.startsWith("http://www.w3.org/") || + string.startsWith("http://purl.org/") || + string.startsWith("http://publications.europa.eu/resource/authority") || + string.startsWith("http://xmlns.com/"); + } + private void logValues() throws IOException { readTransaction(env, (stack, txn) -> { long cursor = 0; @@ -349,7 +360,25 @@ private void open() throws IOException { }); } - private long nextId(byte type) throws IOException { + private long nextId(byte valueType) throws IOException { + int idType; + switch (valueType) { + case URI_VALUE: + idType = ValueIds.T_URI; + break; + case BNODE_VALUE: + idType = ValueIds.T_BNODE; + break; + case LITERAL_VALUE: + idType = ValueIds.T_LITERAL; + break; + case NAMESPACE_VALUE: + idType = ValueIds.T_PTR; + break; + default: + throw new IllegalArgumentException("Unexpected value type: " + valueType); + + } if (freeIdsAvailable) { // next id from store Long reusedId = writeTransaction((stack, txn) -> { @@ -361,9 +390,10 @@ private long nextId(byte type) throws IOException { MDBVal keyData = MDBVal.calloc(stack); MDBVal valueData = MDBVal.calloc(stack); + if (mdb_cursor_get(cursor, keyData, valueData, MDB_FIRST) == MDB_SUCCESS) { - // remove lower 2 type bits - long value = data2id(keyData.mv_data()) >> 2; + // unpack value from compound id + long value = ValueIds.getValue(data2id(keyData.mv_data())); // delete entry E(mdb_cursor_del(cursor, 0)); return value; @@ -377,17 +407,12 @@ private long nextId(byte type) throws IOException { } }); if (reusedId != null) { - long result = reusedId; - // encode type in lower 2 bits of id - result = (result << 2) | type; - return result; + return ValueIds.createId(idType, reusedId); } } long result = nextId; nextId++; - // encode type in lower 2 bits of id - result = (result << 2) | type; - return result; + return ValueIds.createId(idType, result); } protected ByteBuffer idBuffer(MemoryStack stack) { @@ -442,7 +467,7 @@ protected byte[] getData(long id) throws IOException { * @return the value object or null if not found */ LmdbValue cachedValue(long id) { - LmdbValue value = valueCache[(int) (id % valueCache.length)]; + LmdbValue value = valueCache[(int) ((id < 0 ? -id : id) % valueCache.length)]; if (value != null && value.getInternalID() == id) { return value; } @@ -459,7 +484,7 @@ LmdbValue cachedValue(long id) { * @return the value object or null if not found */ void cacheValue(long id, LmdbValue value) { - valueCache[(int) (id % valueCache.length)] = value; + valueCache[(int) ((id < 0 ? -id : id) % valueCache.length)] = value; } /** @@ -477,18 +502,23 @@ public LmdbValue getLazyValue(long id) throws IOException { LmdbValue resultValue = cachedValue(cacheID); if (resultValue == null) { - switch ((byte) (id & 0x3)) { - case URI_VALUE: + int idType = ValueIds.getIdType(id); + switch (idType) { + case ValueIds.T_URI: resultValue = new LmdbIRI(lazyRevision, id); break; - case LITERAL_VALUE: + case ValueIds.T_LITERAL: resultValue = new LmdbLiteral(lazyRevision, id); break; - case BNODE_VALUE: + case ValueIds.T_BNODE: resultValue = new LmdbBNode(lazyRevision, id); break; default: - throw new IOException("Unsupported value with type id " + (id & 0x3)); + if (ValueIds.isInlined(id)) { + resultValue = new LmdbLiteral(lazyRevision, id); + break; + } + throw new IOException("Unsupported value with id type: " + idType); } // Store value in cache cacheValue(cacheID, resultValue); @@ -515,6 +545,12 @@ public LmdbValue getValue(long id) throws IOException { LmdbValue resultValue = cachedValue(cacheID); if (resultValue == null) { + // unpack inlined values if possible + if (ValueIds.isInlined(id)) { + Literal unpacked = Values.unpackLiteral(id, this); + return new LmdbLiteral(revision, unpacked.getLabel(), unpacked.getDatatype(), id); + } + // Value not in cache, fetch it from file byte[] data = getData(id); @@ -539,6 +575,13 @@ public LmdbValue getValue(long id) throws IOException { * @return true if value could be successfully resolved, else false */ public boolean resolveValue(long id, LmdbValue value) { + // unpack inlined values if possible + if (ValueIds.isInlined(id)) { + Literal unpacked = Values.unpackLiteral(id, this); + ((LmdbLiteral) value).setLabel(unpacked.getLabel()); + ((LmdbLiteral) value).setDatatype(unpacked.getDatatype()); + return true; + } try { byte[] data = getData(id); if (data != null) { @@ -574,7 +617,7 @@ private void resizeMap(long txn, long requiredSize) throws IOException { mdb_txn_reset(txn); } if (activeWriteTxn) { - endTransaction(true); + endTransaction(true, true); } long oldMapSize = mapSize; @@ -599,55 +642,83 @@ private void resizeMap(long txn, long requiredSize) throws IOException { } } - private void incrementRefCount(MemoryStack stack, long writeTxn, byte[] data) throws IOException { + private void incrementRefCount(MemoryStack stack, long writeTxn, byte[] data) { // literals have a datatype id and URIs have a namespace id if (data[0] == LITERAL_VALUE || data[0] == URI_VALUE) { - try { - stack.push(); - ByteBuffer bb = ByteBuffer.wrap(data); - // skip type marker - int idLength = Varint.firstToLength(bb.get(1)); - MDBVal idVal = MDBVal.calloc(stack); - MDBVal dataVal = MDBVal.calloc(stack); - idVal.mv_data(idBuffer(stack).put(ID_KEY).put(data, 1, idLength).flip()); - long newCount = 1; - if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { - // update count - newCount = Varint.readUnsigned(dataVal.mv_data()) + 1; + // skip type marker + long id = Varint.readUnsigned(ByteBuffer.wrap(data, 1, data.length - 1)); + refCountsTxCache.compute(id, (k, v) -> { + if (v == null) { + try { + stack.push(); + MDBVal idVal = MDBVal.calloc(stack); + MDBVal dataVal = MDBVal.calloc(stack); + idVal.mv_data(idBuffer(stack).put(data, 1, Varint.calcLengthUnsigned(id)).flip()); + long newCount = 1; + if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { + // update count + newCount = Varint.readUnsigned(dataVal.mv_data()) + 1; + } + return newCount; + } finally { + stack.pop(); + } + } else { + return v + 1; } - // write count - ByteBuffer countBb = stack.malloc(Varint.calcLengthUnsigned(newCount)); - Varint.writeUnsigned(countBb, newCount); - dataVal.mv_data(countBb.flip()); - E(mdb_put(writeTxn, refCountsDbi, idVal, dataVal, 0)); - } finally { - stack.pop(); - } + }); } } - private boolean decrementRefCount(MemoryStack stack, long writeTxn, ByteBuffer idBb) throws IOException { + private boolean decrementRefCount(MemoryStack stack, long writeTxn, long id) { + return refCountsTxCache.compute(id, (k, v) -> { + if (v == null) { + try { + stack.push(); + MDBVal idVal = MDBVal.calloc(stack); + MDBVal dataVal = MDBVal.calloc(stack); + ByteBuffer idBb = idBuffer(stack).put(ID_KEY); + Varint.writeUnsigned(idBb, id); + idVal.mv_data(idBb.flip()); + long newCount = 0; + if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { + // update count + newCount = Varint.readUnsigned(dataVal.mv_data()) - 1; + } + return newCount; + } finally { + stack.pop(); + } + } else { + return v - 1; + } + }) == 0; + } + + private void updateRefCounts(MemoryStack stack, long writeTxn) throws IOException { try { stack.push(); - MDBVal idVal = MDBVal.calloc(stack); - idVal.mv_data(idBb); + ByteBuffer idBb = idBuffer(stack); + ByteBuffer countBb = stack.malloc(Long.BYTES + 1); MDBVal dataVal = MDBVal.calloc(stack); - if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { - // update count - long newCount = Varint.readUnsigned(dataVal.mv_data()) - 1; - if (newCount <= 0) { + for (Map.Entry entry : refCountsTxCache.entrySet()) { + long count = entry.getValue(); + idBb.clear(); + idBb.put(ID_KEY); + Varint.writeUnsigned(idBb, entry.getKey()); + idVal.mv_data(idBb.flip()); + if (count <= 0) { + // delete count entry E(mdb_del(writeTxn, refCountsDbi, idVal, null)); - return true; } else { - // write count - ByteBuffer countBb = stack.malloc(Varint.calcLengthUnsigned(newCount)); - Varint.writeUnsigned(countBb, newCount); + // update count + countBb.clear(); + Varint.writeUnsigned(countBb, count); dataVal.mv_data(countBb.flip()); E(mdb_put(writeTxn, refCountsDbi, idVal, dataVal, 0)); } } - return false; } finally { stack.pop(); } @@ -840,8 +911,6 @@ public long getId(Value value) throws IOException { return getId(value, false); } - private final ConcurrentHashMap commonVocabulary = new ConcurrentHashMap<>(); - /** * Gets the ID for the specified value. * @@ -852,13 +921,10 @@ public long getId(Value value) throws IOException { public long getId(Value value, boolean create) throws IOException { // Try to get the internal ID from the value itself boolean isOwnValue = isOwnValue(value); - if (isOwnValue) { LmdbValue lmdbValue = (LmdbValue) value; - if (revisionIsCurrent(lmdbValue)) { long id = lmdbValue.getInternalID(); - if (id != LmdbValue.UNKNOWN_ID) { return id; } @@ -875,43 +941,57 @@ public long getId(Value value, boolean create) throws IOException { if (cachedID != null) { long id = cachedID; - if (isOwnValue) { // Store id in value for fast access in any consecutive calls ((LmdbValue) value).setInternalID(id, revision); } - return id; } - // ID not cached, search in file - byte[] data = value2data(value, create); - if (data == null && value instanceof Literal) { - data = literal2legacy((Literal) value); + long id = LmdbValue.UNKNOWN_ID; + if (value instanceof Literal) { + // inline value into id if possible + try { + long packedId = Values.packLiteral((Literal) value); + if (packedId != 0L) { + Literal unpacked = Values.unpackLiteral(packedId, this); + if (unpacked.equals(value)) { + id = packedId; + } + } + } catch (IllegalArgumentException e) { + // ignore, invalid literal + } } - if (data != null) { - long id = findId(data, create); + if (id == LmdbValue.UNKNOWN_ID) { + // not inlined or ID not cached, search in index + byte[] data = value2data(value, create); + if (data == null && value instanceof Literal) { + data = literal2legacy((Literal) value); + } - if (id != LmdbValue.UNKNOWN_ID) { - if (isOwnValue) { - // Store id in value for fast access in any consecutive calls - ((LmdbValue) value).setInternalID(id, revision); - // Store id in cache - valueIDCache.put((LmdbValue) value, id); - } else { - // Store id in cache - LmdbValue nv = getLmdbValue(value); - nv.setInternalID(id, revision); + if (data != null) { + id = findId(data, create); + } + } - if (nv.isIRI() && isCommonVocabulary(((IRI) nv))) { - commonVocabulary.put(value, id); - } + if (id != LmdbValue.UNKNOWN_ID) { + if (isOwnValue) { + // Store id in value for fast access in any consecutive calls + ((LmdbValue) value).setInternalID(id, revision); + // Store id in cache + valueIDCache.put((LmdbValue) value, id); + } else { + // Store id in cache + LmdbValue nv = getLmdbValue(value); + nv.setInternalID(id, revision); - valueIDCache.put(nv, id); + if (nv.isIRI() && isCommonVocabulary(((IRI) nv))) { + commonVocabulary.put(value, id); } + valueIDCache.put(nv, id); } - return id; } } finally { @@ -921,14 +1001,6 @@ public long getId(Value value, boolean create) throws IOException { return LmdbValue.UNKNOWN_ID; } - private static boolean isCommonVocabulary(IRI nv) { - String string = nv.toString(); - return string.startsWith("http://www.w3.org/") || - string.startsWith("http://purl.org/") || - string.startsWith("http://publications.europa.eu/resource/authority") || - string.startsWith("http://xmlns.com/"); - } - public void gcIds(Collection ids, Collection nextIds) throws IOException { if (!enableGC()) { return; @@ -937,6 +1009,8 @@ public void gcIds(Collection ids, Collection nextIds) throws IOExcep if (!ids.isEmpty()) { // wrap into read txn as resizeMap expects an active surrounding read txn readTransaction(env, (stack1, txn1) -> { + // contains IDs for data types and namespaces which are freed by garbage collecting literals and URIs + resizeMap(writeTxn, 2 * ids.size() * (1 + Long.BYTES + 2 + Long.BYTES)); final Collection finalIds = ids; final Collection finalNextIds = nextIds; @@ -949,16 +1023,19 @@ public void gcIds(Collection ids, Collection nextIds) throws IOExcep Varint.writeUnsigned(revIdBb, revision.getRevisionId()); int revLength = revIdBb.position(); for (Long id : finalIds) { - // contains IDs for data types and namespaces which are freed by garbage collecting literals and - // URIs - resizeMap(writeTxn, 10L * ids.size() * (1L + Long.BYTES + 2L + Long.BYTES)); - revIdBb.position(revLength).limit(revIdBb.capacity()); revIdVal.mv_data(id2data(revIdBb, id).flip()); // check if id has internal references and therefore cannot be deleted idVal.mv_data(revIdBb.slice().position(revLength)); - if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { - continue; + Long refCount = refCountsTxCache.get(id); + if (refCount == null) { + if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { + continue; + } + } else { + if (refCount > 0) { + continue; + } } // mark id as unused E(mdb_put(writeTxn, unusedDbi, revIdVal, dataVal, 0)); @@ -966,7 +1043,7 @@ public void gcIds(Collection ids, Collection nextIds) throws IOExcep deleteValueToIdMappings(stack, writeTxn, finalIds, finalNextIds); - invalidateRevisionOnCommit = enableGC(); + invalidateRevisionOnCommit = true; if (nextValueEvictionTime < 0) { nextValueEvictionTime = System.currentTimeMillis() + this.valueEvictionInterval; } @@ -987,30 +1064,24 @@ protected void deleteValueToIdMappings(MemoryStack stack, long txn, Collection= 0 && System.currentTimeMillis() >= nextValueEvictionTime) { synchronized (unusedRevisionIds) { - MDBStat stat = MDBStat.malloc(stack); - mdb_stat(writeTxn, unusedDbi, stat); + if (!unusedRevisionIds.isEmpty()) { + MDBStat stat = MDBStat.malloc(stack); + mdb_stat(writeTxn, unusedDbi, stat); - if (resize) { - resizeMap(writeTxn, stat.ms_entries() * (2L + Long.BYTES)); - } + if (resize) { + resizeMap(writeTxn, stat.ms_entries() * (2L + Long.BYTES)); + } - freeUnusedIdsAndValues(stack, writeTxn, unusedRevisionIds); - unusedRevisionIds.clear(); + freeUnusedIdsAndValues(stack, writeTxn, unusedRevisionIds); + unusedRevisionIds.clear(); + clearCaches(); + } } nextValueEvictionTime = -1; } @@ -1166,9 +1240,15 @@ public void startTransaction(boolean resize) throws IOException { /** * Closes the snapshot and the DB iterator if any was opened in the current transaction */ - void endTransaction(boolean commit) throws IOException { + void endTransaction(boolean commit, boolean autoGrow) throws IOException { if (writeTxn != 0) { if (commit) { + if (!autoGrow) { + try (MemoryStack stack = stackPush()) { + updateRefCounts(stack, writeTxn); + } + refCountsTxCache.clear(); + } if (invalidateRevisionOnCommit) { long stamp = revisionLock.writeLock(); try { @@ -1191,6 +1271,7 @@ void endTransaction(boolean commit) throws IOException { E(mdb_txn_commit(writeTxn)); } } else { + refCountsTxCache.clear(); mdb_txn_abort(writeTxn); } writeTxn = 0; @@ -1199,11 +1280,11 @@ void endTransaction(boolean commit) throws IOException { } public void commit() throws IOException { - endTransaction(true); + endTransaction(true, false); } public void rollback() throws IOException { - endTransaction(false); + endTransaction(false, false); } /** @@ -1261,7 +1342,7 @@ protected void clearCaches() { */ public void close() throws IOException { if (env != 0) { - endTransaction(false); + endTransaction(false, false); mdb_env_close(env); env = 0; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java index 283186c0246..0a6a387ea3e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java @@ -89,7 +89,11 @@ private Varint() { * @param value value to encode */ public static void writeUnsigned(final ByteBuffer bb, final long value) { - if (value <= 240) { + if (value < 0) { + int bytes = descriptor(value) + 1; + bb.put((byte) (250 + (bytes - 3))); + writeSignificantBits(bb, value, bytes); + } else if (value <= 240) { bb.put((byte) value); } else if (value <= 2287) { bb.put((byte) ((value - 240) / 256 + 241)); @@ -112,7 +116,10 @@ public static void writeUnsigned(final ByteBuffer bb, final long value) { * @return length in bytes */ public static int calcLengthUnsigned(long value) { - if (value <= 240) { + if (value < 0) { + int bytes = descriptor(value) + 1; + return 1 + bytes; + } else if (value <= 240) { return 1; } else if (value <= 2287) { return 2; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Booleans.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Booleans.java new file mode 100644 index 00000000000..096146618e9 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Booleans.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Booleans { + + static long packBoolean(Literal literal) { + return ValueIds.createId(ValueIds.T_BOOLEAN, literal.booleanValue() ? 1L : 0L); + } + + static Literal unpackBoolean(long value, ValueFactory valueFactory) { + return valueFactory.createLiteral(ValueIds.getValue(value) != 0); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Bytes.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Bytes.java new file mode 100644 index 00000000000..8b39569c7aa --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Bytes.java @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +public class Bytes { + + /** + * Packs a byte array into a long value. + * + * @param bytes the byte array to be packed into a long. + * @return the long value representing the packed bytes. + * + * Note: Assumes the length of the byte array is within a reasonable range for packing into a long + * (typically 8 bytes or less, depending on use case). + */ + static long packBytes(byte[] bytes) { + long value = 0; + for (int i = 0; i < bytes.length; i++) { + value = value << 8; + value = value | (bytes[i] & 0xFF); + } + return value; + } + + /** + * Unpacks a long value into a byte array. + * + * @param value the long value to be unpacked. + * @param length the number of bytes to unpack from the long value. + * @return the byte array representing the unpacked bytes. + */ + static byte[] unpackBytes(long value, int length) { + byte[] bytes = new byte[length]; + for (int i = bytes.length - 1; i >= 0; i--) { + bytes[i] = (byte) (value & 0xFF); + value = value >>> 8; + } + return bytes; + } +} \ No newline at end of file diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Dates.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Dates.java new file mode 100644 index 00000000000..84837e82cad --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Dates.java @@ -0,0 +1,169 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import javax.xml.datatype.DatatypeConfigurationException; +import javax.xml.datatype.DatatypeFactory; +import javax.xml.datatype.XMLGregorianCalendar; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.model.impl.CalendarLiteral; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Dates { + private static final ThreadLocal DATATYPE_FACTORY = ThreadLocal.withInitial(() -> { + try { + return DatatypeFactory.newInstance(); // not guaranteed to be thread-safe + } catch (DatatypeConfigurationException e) { + throw new RuntimeException("unable to create datatype factory", e); + } + }); + + static long packDateTime(Literal literal) { + try { + XMLGregorianCalendar calendar = literal.calendarValue(); + return ValueIds.createId(ValueIds.T_DATETIME, encodeToLong(calendar, XSD.DATETIME)); + } catch (IllegalArgumentException iae) { + // packing is not possible + } + return 0L; + } + + static long packDateTimeStamp(Literal literal) { + try { + XMLGregorianCalendar calendar = literal.calendarValue(); + return ValueIds.createId(ValueIds.T_DATETIME, encodeToLong(calendar, XSD.DATETIMESTAMP)); + } catch (IllegalArgumentException iae) { + // packing is not possible + } + return 0L; + } + + static long packDate(Literal literal) { + try { + XMLGregorianCalendar calendar = literal.calendarValue(); + return ValueIds.createId(ValueIds.T_DATE, encodeToLong(calendar, XSD.DATE)); + } catch (IllegalArgumentException iae) { + // packing is not possible + } + return 0L; + } + + static Literal unpackDateTime(long value, ValueFactory valueFactory) { + XMLGregorianCalendar calendar = decodeFromLong(ValueIds.getValue(value), XSD.DATETIME); + return valueFactory.createLiteral(calendar.toXMLFormat(), XSD.DATETIME); + } + + static Literal unpackDateTimeStamp(long value, ValueFactory valueFactory) { + XMLGregorianCalendar calendar = decodeFromLong(ValueIds.getValue(value), XSD.DATETIMESTAMP); + return valueFactory.createLiteral(calendar.toXMLFormat(), XSD.DATETIMESTAMP); + } + + static Literal unpackDate(long value, ValueFactory valueFactory) { + XMLGregorianCalendar calendar = decodeFromLong(ValueIds.getValue(value), XSD.DATE); + return valueFactory.createLiteral(calendar.toXMLFormat(), XSD.DATE); + } + + /** + * Encodes an XSD dateTime/date/time string (with optional millis, timezone) into 7 bytes of a long. Supports: + *
    + *
  • dateTime: "YYYY-MM-DDThh:mm:ss(.SSS)(Z|±hh:mm)"
  • + *
  • date: "YYYY-MM-DD(Z|±hh:mm)"
  • + *
  • time: "hh:mm:ss(.SSS)(Z|±hh:mm)"
  • + *
+ */ + static long encodeToLong(XMLGregorianCalendar calendar, CoreDatatype type) { + int year = calendar.getYear(); + int month = calendar.getMonth(); + int day = calendar.getDay(); + int hour = calendar.getHour(); + int minute = calendar.getMinute(); + int second = calendar.getSecond(); + int milli = calendar.getMinute(); + // in 15-min steps + int tzOffsetStep = calendar.getTimezone() / 15; + + // Range checks + if (type != XSD.TIME) { + if (year < 0 || year > 8191) { + throw new IllegalArgumentException("Year out of range for encoding: " + year); + } + if (month < 1 || month > 12) { + throw new IllegalArgumentException("Month out of range: " + month); + } + if (day < 1 || day > 31) { + throw new IllegalArgumentException("Day out of range: " + day); + } + } + if (type != XSD.DATE) { + if (hour < 0 || hour > 23) { + throw new IllegalArgumentException("Hour out of range: " + hour); + } + if (minute < 0 || minute > 59) { + throw new IllegalArgumentException("Minute out of range: " + minute); + } + if (second < 0 || second > 59) { + throw new IllegalArgumentException("Second out of range: " + second); + } + if (milli < 0 || milli > 999) { + throw new IllegalArgumentException("Millis out of range: " + milli); + } + } + if (tzOffsetStep < -64 || tzOffsetStep > 63) { + throw new IllegalArgumentException("Timezone offset out of encodable range ±15h 45min"); + } + + int tzBits = tzOffsetStep + 64; + + long bits = 0; + bits |= ((long) tzBits & 0x7F) << 49; // 7 bits (most significant) + bits |= ((long) milli & 0x3FF) << 39; // 10 bits + bits |= ((long) second & 0x3F) << 33; // 6 bits + bits |= ((long) minute & 0x3F) << 27; // 6 bits + bits |= ((long) hour & 0x1F) << 22; // 5 bits + bits |= ((long) day & 0x1F) << 17; // 5 bits + bits |= ((long) month & 0x0F) << 13; // 4 bits + bits |= ((long) year & 0x1FFF); // 13 bits (least significant) + + return bits; + } + + /** + * Decodes a 7-byte long back to an XSD dateTime/date/time string (uses 3-digit millis if present). + */ + static XMLGregorianCalendar decodeFromLong(long bits, CoreDatatype type) { + int year = (int) (bits & 0x1FFF); // 13 bits + int month = (int) ((bits >>> 13) & 0x0F); // 4 bits + int day = (int) ((bits >>> 17) & 0x1F); // 5 bits + int hour = (int) ((bits >>> 22) & 0x1F); // 5 bits + int minute = (int) ((bits >>> 27) & 0x3F); // 6 bits + int second = (int) ((bits >>> 33) & 0x3F); // 6 bits + int milli = (int) ((bits >>> 39) & 0x3FF); // 10 bits + int tzBits = (int) ((bits >>> 49) & 0x7F); // 7 bits (most significant) + int tzOffsetStep = tzBits - 64; + int tzOffsetMin = tzOffsetStep * 15; + + XMLGregorianCalendar calendar = DATATYPE_FACTORY.get().newXMLGregorianCalendar(); + calendar.setYear(year); + calendar.setMonth(month); + calendar.setDay(day); + calendar.setHour(hour); + calendar.setMinute(minute); + calendar.setSecond(second); + calendar.setMillisecond(milli); + calendar.setTimezone(tzOffsetMin); + return calendar; + } + +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java new file mode 100644 index 00000000000..1f5c651e248 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java @@ -0,0 +1,153 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Decimals { + + static final int DECIMAL_VALUE_BITS = 48; + static final int DECIMAL_SCALE_BITS = 8; + static final BigInteger MAX_DECIMAL_VALUE = BigInteger.valueOf((1L << (DECIMAL_VALUE_BITS - 1)) - 1); + static final BigInteger MIN_DECIMAL_VALUE = BigInteger.valueOf(-(1L << (DECIMAL_VALUE_BITS - 1))); + static final int MAX_DECIMAL_SCALE = 2 ^ (DECIMAL_SCALE_BITS - 1) - 1; + static final int MIN_DECIMAL_SCALE = -2 ^ (DECIMAL_SCALE_BITS - 1); + + /** + * Encodes a {@link BigDecimal} in 56 bits [48 bits value, 8 bits scale]. + * + * @param value The decimal value + * @return Encoded value with type marker + */ + static long packDecimal(BigDecimal value) { + BigInteger unscaled = value.unscaledValue(); + if (unscaled.compareTo(MAX_DECIMAL_VALUE) > 0 || unscaled.compareTo(MIN_DECIMAL_VALUE) < 0) { + return 0L; + } + int scale = value.scale(); + if (scale > MAX_DECIMAL_SCALE || scale < MIN_DECIMAL_SCALE) { + return 0L; + } + long encoded = Integers.encodeZigZag(unscaled.longValue()) << DECIMAL_SCALE_BITS | scale; + return ValueIds.createId(ValueIds.T_DECIMAL, encoded); + } + + /** + * Extracts the exponent of a double, unbiased, and encodes it into 10 bits if possible. Handles special cases: NaN + * and Infinity. + * + * @param exponent11 The original 11-bit exponent. + * @return Encoded 10-bit exponent as int (0-1023), or -1 if not encodable. + */ + public static int encodeExponent10Bits(int exponent11) { + boolean isNaN = exponent11 == 0x7FF && (exponent11 & 0xFFFFFFFFFFFFFL) != 0; + boolean isInf = exponent11 == 0x7FF && (exponent11 & 0xFFFFFFFFFFFFFL) == 0; + + if (isNaN || isInf) { + // Reserve special pattern, e.g., 0x3FF (all 10 bits set) for NaN/Inf + return 0x3FF; + } + + if (exponent11 == 0) { + // Subnormal number or zero, exponent = -1022 + return 0; // Use 0 for subnormal/zero + } + + // Normal number, unbiased exponent in [-1022, 1023] + int unbiasedExp = exponent11 - 1023; + int encoded = unbiasedExp + 511; // Shift range to [0, 1023] + if (encoded < 1 || encoded > 1022) { + // Out of range for 10 bits (excluding reserved 0 and 0x3FF) + return -1; + } + return encoded; + } + + /** + * Decodes a 10-bit encoded exponent back to unbiased exponent. + * + * @param encoded 10-bit encoded exponent + * @return 11-bit biased exponent or special values for reserved patterns + */ + public static int decodeExponent10Bits(int encoded) { + if (encoded == 0) { + // Subnormal/zero + return 0; // -1022; + } + if (encoded == 0x3FF) { + // Reserved for NaN/Inf + return 0x7FF; + } + // Normal + int unbiased = encoded - 511; + return unbiased + 1023; + } + + static long packDouble(double value) { + long valueBits = Double.doubleToRawLongBits(value); + // 11-bit exponent + int exponent11 = (int) ((valueBits >>> 52) & 0x7FF); + // encode to 10 bits + int exponent10 = encodeExponent10Bits(exponent11); + if (exponent10 >= 0) { + // encoding of exponent was possible + int sign = value < 0 ? 1 : 0; + long mantissa = valueBits & 0x000fffffffffffffL; + long encoded = ((long) exponent10) << 54 | mantissa << 2 | sign << 1 | 1; + return encoded; + } + return 0L; + } + + static long packFloat(float value) { + return ValueIds.createId(ValueIds.T_FLOAT, Integers.encodeZigZag(Float.floatToRawIntBits(value))); + } + + static Literal unpackDecimal(long value, ValueFactory valueFactory) { + long encoded = ValueIds.getValue(value); + int scale = (byte) (encoded & 0xFF); + long unscaled = Integers.decodeZigZag(encoded >>> DECIMAL_SCALE_BITS); + return valueFactory.createLiteral(new BigDecimal(BigInteger.valueOf(unscaled), scale)); + } + + static boolean isDouble(long value) { + return (value & 1L) != 0; + } + + static Literal unpackDouble(long value, ValueFactory valueFactory) { + if ((value & 1L) == 0) { + throw new IllegalArgumentException("Invalid packed double value: zero bit not set."); + } + int sign = (int) ((value >> 1) & 1); + long mantissa = (value >> 2) & 0x000fffffffffffffL; + int exponent10 = (int) (value >>> 54); + + // Decode back to original exponent + int exponent11 = decodeExponent10Bits(exponent10); + + // Reconstruct raw bits + long valueBits = ((long) sign << 63) | + ((long) (exponent11 & 0x7FF) << 52) | + mantissa; + + return valueFactory.createLiteral(Double.longBitsToDouble(valueBits)); + } + + static Literal unpackFloat(long value, ValueFactory valueFactory) { + float floatValue = Float.intBitsToFloat((int) Integers.decodeZigZag(ValueIds.getValue(value))); + return valueFactory.createLiteral(floatValue); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Integers.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Integers.java new file mode 100644 index 00000000000..614b3f2feef --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Integers.java @@ -0,0 +1,183 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import java.math.BigInteger; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +/** + * Functions for inlining of values into long ids. + */ +public class Integers { + + static final int INTEGER_VALUE_BITS = 56; + static final long MAX_INTEGER = (1L << (INTEGER_VALUE_BITS - 1)) - 1; + static final BigInteger MAX_BIG_INTEGER = BigInteger.valueOf(MAX_INTEGER); + static final long MIN_INTEGER = -(1L << (INTEGER_VALUE_BITS - 1)); + static final BigInteger MIN_BIG_INTEGER = BigInteger.valueOf(MIN_INTEGER); + + /** + * Encode a signed long to ZigZag-encoded long. + * + * @param value the long value to be encoded + * @return the encoded long value + */ + static long encodeZigZag(long value) { + return (value << 1) ^ (value >> 63); + } + + /** + * Decode a ZigZag-encoded long back to signed long. + * + * @param encoded the encoded long value + * @return the original long value with proper sign + */ + static long decodeZigZag(long encoded) { + return (encoded >>> 1) ^ -(encoded & 0x1); + } + + private static long packInteger(Literal literal, int idType) { + BigInteger value = literal.integerValue(); + if (value.compareTo(MAX_BIG_INTEGER) > 0 || value.compareTo(MIN_BIG_INTEGER) < 0) { + return 0L; + } + return ValueIds.createId(idType, encodeZigZag(value.longValue())); + } + + static long packInteger(Literal literal) { + return packInteger(literal, ValueIds.T_INTEGER); + } + + static long packLong(Literal literal) { + long value = literal.longValue(); + if (value > MAX_INTEGER || value < MIN_INTEGER) { + return 0L; + } + return ValueIds.createId(ValueIds.T_LONG, encodeZigZag(value)); + } + + static long packInt(Literal literal) { + return ValueIds.createId(ValueIds.T_INT, encodeZigZag(literal.intValue())); + } + + static long packShort(Literal literal) { + return ValueIds.createId(ValueIds.T_SHORT, encodeZigZag(literal.shortValue())); + } + + static long packByte(Literal literal) { + return ValueIds.createId(ValueIds.T_BYTE, literal.byteValue()); + } + + static long packUnsignedLong(Literal literal) { + long value = Long.parseUnsignedLong(literal.getLabel()); + if (value > MAX_INTEGER || value < MIN_INTEGER) { + return 0L; + } + return ValueIds.createId(ValueIds.T_UNSIGNEDLONG, encodeZigZag(value)); + } + + static long packUnsignedInt(Literal literal) { + return ValueIds.createId(ValueIds.T_UNSIGNEDINT, encodeZigZag(literal.longValue())); + } + + static long packUnsignedShort(Literal literal) { + return ValueIds.createId(ValueIds.T_UNSIGNEDSHORT, encodeZigZag(literal.intValue())); + } + + static long packUnsignedByte(Literal literal) { + return ValueIds.createId(ValueIds.T_UNSIGNEDBYTE, literal.intValue()); + } + + static long packPositiveInteger(Literal literal) { + return packInteger(literal, ValueIds.T_POSITIVE_INTEGER); + } + + static long packNegativeInteger(Literal literal) { + return packInteger(literal, ValueIds.T_NEGATIVE_INTEGER); + } + + static long packNonNegativeInteger(Literal literal) { + return packInteger(literal, ValueIds.T_NON_NEGATIVE_INTEGER); + } + + static long packNonPositiveInteger(Literal literal) { + return packInteger(literal, ValueIds.T_NON_POSITIVE_INTEGER); + } + + static Literal unpackInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(BigInteger.valueOf(decoded)); + } + + static Literal unpackLong(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(decoded); + } + + static Literal unpackInt(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral((int) decoded); + } + + static Literal unpackShort(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral((short) decoded); + } + + static Literal unpackByte(long value, ValueFactory valueFactory) { + return valueFactory.createLiteral((byte) ValueIds.getValue(value)); + } + + static Literal unpackUnsignedLong(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toUnsignedString(decoded), XSD.UNSIGNED_LONG); + } + + static Literal unpackUnsignedInt(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Integer.toUnsignedString((int) decoded), XSD.UNSIGNED_INT); + } + + static Literal unpackUnsignedShort(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Integer.toUnsignedString((int) decoded), XSD.UNSIGNED_SHORT); + } + + static Literal unpackUnsignedByte(long value, ValueFactory valueFactory) { + long decoded = ValueIds.getValue(value); + return valueFactory.createLiteral(Integer.toUnsignedString((int) decoded), XSD.UNSIGNED_BYTE); + } + + static Literal unpackPositiveInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.POSITIVE_INTEGER); + } + + static Literal unpackNegativeInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.NEGATIVE_INTEGER); + } + + static Literal unpackNonNegativeInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.NON_NEGATIVE_INTEGER); + } + + static Literal unpackNonPositiveInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.NON_POSITIVE_INTEGER); + } + +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java new file mode 100644 index 00000000000..9ec4457ccde --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java @@ -0,0 +1,44 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import java.nio.charset.StandardCharsets; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Strings { + + static long packString(Literal literal) { + String label = literal.getLabel(); + if (label.length() > Values.MAX_LENGTH) { + // in any case string is longer than maximum encodable length + return 0L; + } + byte[] bytes = label.getBytes(StandardCharsets.UTF_8); + int maxLength = Values.MAX_LENGTH - 1; + if (bytes.length > maxLength) { + // multi-byte string is longer than maximum encodable length + return 0L; + } + + return ValueIds.createId(ValueIds.T_SHORTSTRING, Bytes.packBytes(bytes) << 8 | bytes.length); + } + + static Literal unpackString(long value, ValueFactory valueFactory) { + value = ValueIds.getValue(value); + int length = (int) (value & 0xFF); + String strValue = new String(Bytes.unpackBytes(value >>> 8, length), StandardCharsets.UTF_8); + return valueFactory.createLiteral(strValue, XSD.STRING); + } +} \ No newline at end of file diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Values.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Values.java new file mode 100644 index 00000000000..6bab555a579 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Values.java @@ -0,0 +1,140 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.eclipse.rdf4j.sail.lmdb.inlined.Booleans.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Dates.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Decimals.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Integers.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Strings.*; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +/** + * Functions for inlining of values into long ids. + */ +public class Values { + /** + * Maximum length of inlined values in bytes. + */ + static int MAX_LENGTH = 7; + + public static long packLiteral(Literal literal) { + XSD xsdDataType = literal.getCoreDatatype().asXSDDatatypeOrNull(); + if (xsdDataType == null) { + return 0L; + } + switch (xsdDataType) { + case DECIMAL: + return packDecimal(literal.decimalValue()); + case DOUBLE: + return packDouble(literal.doubleValue()); + case FLOAT: + return packFloat(literal.floatValue()); + case INTEGER: + return packInteger(literal); + case LONG: + return packLong(literal); + case INT: + return packInt(literal); + case SHORT: + return packShort(literal); + case BYTE: + return packByte(literal); + case UNSIGNED_LONG: + return packUnsignedLong(literal); + case UNSIGNED_INT: + return packUnsignedInt(literal); + case UNSIGNED_SHORT: + return packUnsignedShort(literal); + case UNSIGNED_BYTE: + return packUnsignedByte(literal); + case POSITIVE_INTEGER: + return packPositiveInteger(literal); + case NEGATIVE_INTEGER: + return packNegativeInteger(literal); + case NON_NEGATIVE_INTEGER: + return packNonNegativeInteger(literal); + case NON_POSITIVE_INTEGER: + return packNonPositiveInteger(literal); + case STRING: + return packString(literal); + case DATETIME: + return packDateTime(literal); + case DATETIMESTAMP: + return packDateTimeStamp(literal); + case DATE: + return packDate(literal); + case BOOLEAN: + return packBoolean(literal); + default: + // unsupported type + return 0L; + } + } + + public static Literal unpackLiteral(long value, ValueFactory valueFactory) { + // special handling for double values + if (isDouble(value)) { + return unpackDouble(value, valueFactory); + } + + int idType = ValueIds.getIdType(value); + + switch (idType) { + case ValueIds.T_DECIMAL: + return unpackDecimal(value, valueFactory); + case ValueIds.T_FLOAT: + return unpackFloat(value, valueFactory); + case ValueIds.T_INTEGER: + return unpackInteger(value, valueFactory); + case ValueIds.T_LONG: + return unpackLong(value, valueFactory); + case ValueIds.T_INT: + return unpackInt(value, valueFactory); + case ValueIds.T_SHORT: + return unpackShort(value, valueFactory); + case ValueIds.T_BYTE: + return unpackByte(value, valueFactory); + case ValueIds.T_UNSIGNEDLONG: + return unpackUnsignedLong(value, valueFactory); + case ValueIds.T_UNSIGNEDINT: + return unpackUnsignedInt(value, valueFactory); + case ValueIds.T_UNSIGNEDSHORT: + return unpackUnsignedShort(value, valueFactory); + case ValueIds.T_UNSIGNEDBYTE: + return unpackUnsignedByte(value, valueFactory); + case ValueIds.T_POSITIVE_INTEGER: + return unpackPositiveInteger(value, valueFactory); + case ValueIds.T_NEGATIVE_INTEGER: + return unpackNegativeInteger(value, valueFactory); + case ValueIds.T_NON_NEGATIVE_INTEGER: + return unpackNonNegativeInteger(value, valueFactory); + case ValueIds.T_NON_POSITIVE_INTEGER: + return unpackNonPositiveInteger(value, valueFactory); + case ValueIds.T_SHORTSTRING: + return unpackString(value, valueFactory); + case ValueIds.T_DATETIME: + return unpackDateTime(value, valueFactory); + case ValueIds.T_DATETIMESTAMP: + return unpackDateTimeStamp(value, valueFactory); + case ValueIds.T_DATE: + return unpackDate(value, valueFactory); + case ValueIds.T_BOOLEAN: + return unpackBoolean(value, valueFactory); + default: + throw new IllegalArgumentException("Invalid packed value with id type: " + idType); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java index cdfef66f530..39e205eedbf 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java @@ -87,6 +87,7 @@ public void testGcValues() throws Exception { for (int i = 0; i < values.length; i++) { Assert.assertEquals(LmdbValue.UNKNOWN_ID, valueStore.getId(values[i])); + // access to value must be ensured as long as revision is not invalidated Assert.assertTrue(valueStore.getValue(ids[i]) != null); } @@ -99,7 +100,8 @@ public void testGcValues() throws Exception { for (int i = 0; i < values.length; i++) { Assert.assertEquals(LmdbValue.UNKNOWN_ID, valueStore.getId(values[i])); - Assert.assertTrue(valueStore.getValue(ids[i]) != null); + // value should be removed after invalidating the revision + Assert.assertTrue(valueStore.getValue(ids[i]) == null); } valueStore.startTransaction(true); @@ -150,33 +152,35 @@ public void testGcValuesAfterRestart() throws Exception { @Test public void testGcDatatypes() throws Exception { - IRI[] types = new IRI[] { XSD.STRING, XSD.INTEGER, XSD.DOUBLE, XSD.DECIMAL, XSD.FLOAT }; + IRI[] types = new IRI[] { XSD.STRING, XSD.INTEGER, XSD.LONG, XSD.DECIMAL }; LmdbValue values[] = new LmdbValue[types.length]; valueStore.startTransaction(true); for (int i = 0; i < values.length; i++) { - values[i] = valueStore.createLiteral("123", types[i]); + // use a value that is large enough to not being inlined + values[i] = valueStore.createLiteral(Long.toString(Long.MAX_VALUE - 1), types[i]); valueStore.storeValue(values[i]); } valueStore.commit(); valueStore.startTransaction(true); List datatypeIds = new LinkedList<>(); - for (int i = 1; i < types.length; i++) { + for (int i = 0; i < types.length; i++) { datatypeIds.add(valueStore.storeValue(types[i])); } valueStore.commit(); valueStore.startTransaction(true); valueStore.gcIds(Collections.singleton(values[0].getInternalID()), new HashSet<>()); - valueStore.gcIds(datatypeIds, new HashSet<>()); + valueStore.gcIds(datatypeIds.subList(1, datatypeIds.size() - 1), new HashSet<>()); valueStore.commit(); // close and recreate store valueStore.close(); valueStore = createValueStore(); + // the first value is directly GCed assertNull(valueStore.getValue(values[0].getInternalID())); - // the first datatype is not directly garbage collected and must not be + // the first datatype is not directly GCed and must not be // removed from the store if the related literal is removed assertNotNull(valueStore.getValue(datatypeIds.remove(0))); @@ -195,7 +199,8 @@ public void testGcDatatypes() throws Exception { public void testGcURIs() throws Exception { for (boolean storeAndGcUri : List.of(false, true)) { valueStore.startTransaction(true); - LmdbLiteral literal = valueStore.createLiteral("123", XSD.STRING); + // use a value that is large enough to not being inlined + LmdbLiteral literal = valueStore.createLiteral("123".repeat(5), XSD.STRING); valueStore.storeValue(literal); if (storeAndGcUri) { valueStore.storeValue(XSD.STRING); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BooleansTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BooleansTest.java new file mode 100644 index 00000000000..7a785e86752 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BooleansTest.java @@ -0,0 +1,60 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; +import org.junit.jupiter.api.Test; + +class BooleansTest { + + private final ValueFactory valueFactory = SimpleValueFactory.getInstance(); + + @Test + void testPackBooleanTrue() { + Literal trueLiteral = valueFactory.createLiteral(true); + long expectedId = ValueIds.createId(ValueIds.T_BOOLEAN, 1L); + long actualId = Booleans.packBoolean(trueLiteral); + + assertEquals(expectedId, actualId, "Packing true literal should return the correct ID"); + } + + @Test + void testPackBooleanFalse() { + Literal falseLiteral = valueFactory.createLiteral(false); + long expectedId = ValueIds.createId(ValueIds.T_BOOLEAN, 0L); + long actualId = Booleans.packBoolean(falseLiteral); + + assertEquals(expectedId, actualId, "Packing false literal should return the correct ID"); + } + + @Test + void testUnpackBooleanTrue() { + long trueId = ValueIds.createId(ValueIds.T_BOOLEAN, 1L); + Literal expectedLiteral = valueFactory.createLiteral(true); + Literal actualLiteral = Booleans.unpackBoolean(trueId, valueFactory); + + assertEquals(expectedLiteral, actualLiteral, "Unpacking ID for true should return true literal"); + } + + @Test + void testUnpackBooleanFalse() { + long falseId = ValueIds.createId(ValueIds.T_BOOLEAN, 0L); + Literal expectedLiteral = valueFactory.createLiteral(false); + Literal actualLiteral = Booleans.unpackBoolean(falseId, valueFactory); + + assertEquals(expectedLiteral, actualLiteral, "Unpacking ID for false should return false literal"); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BytesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BytesTest.java new file mode 100644 index 00000000000..147d69c3b10 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BytesTest.java @@ -0,0 +1,67 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +class BytesTest { + + @Test + void testPackBytesWithValidInput() { + byte[] bytes = { 0x01, 0x02, 0x03, 0x04 }; + long expected = 0x01020304L; + assertEquals(expected, Bytes.packBytes(bytes), "Packing bytes should result in the correct long value."); + assertArrayEquals(bytes, Bytes.unpackBytes(expected, 4), + "Unpacking long should result in the correct byte array."); + } + + @Test + void testPackBytesWithEmptyArray() { + byte[] bytes = {}; + long expected = 0L; + assertEquals(expected, Bytes.packBytes(bytes), "Packing an empty array should result in 0L."); + } + + @Test + void testPackBytesWithSingleByte() { + byte[] bytes = { 0x7F }; + long expected = 0x7FL; + assertEquals(expected, Bytes.packBytes(bytes), + "Packing a single byte should result in its long representation."); + } + + @Test + void testUnpackBytesWithZeroLength() { + long value = 0x01020304L; + byte[] expected = {}; + assertArrayEquals(expected, Bytes.unpackBytes(value, 0), + "Unpacking with zero length should result in an empty array."); + } + + @Test + void testUnpackBytesWithSingleByte() { + long value = 0x7FL; + byte[] expected = { 0x7F }; + assertArrayEquals(expected, Bytes.unpackBytes(value, 1), + "Unpacking a single byte should return the correct byte array."); + } + + @Test + void testPackAndUnpackConsistency() { + byte[] originalBytes = { 0x01, 0x02, 0x03, 0x04 }; + long packedValue = Bytes.packBytes(originalBytes); + byte[] unpackedBytes = Bytes.unpackBytes(packedValue, originalBytes.length); + assertArrayEquals(originalBytes, unpackedBytes, + "Packing then unpacking should result in the original byte array."); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java new file mode 100644 index 00000000000..e5e57407e0c --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java @@ -0,0 +1,110 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.junit.jupiter.api.Test; + +public class DecimalsTest { + + @Test + void testPackDecimalValid() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(123456), 2); + long packedValue = Decimals.packDecimal(value); + assertNotEquals(0L, packedValue); + } + + @Test + void testPackDecimalInvalidScale() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(123456), Decimals.MAX_DECIMAL_SCALE + 1); + long packedValue = Decimals.packDecimal(value); + assertEquals(0L, packedValue); + } + + @Test + void testPackDecimalInvalidValue() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(Decimals.MAX_DECIMAL_VALUE.longValue() + 1), 2); + long packedValue = Decimals.packDecimal(value); + assertEquals(0L, packedValue); + } + + @Test + void testUnpackDecimal() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(123456), 2); + long packedValue = Decimals.packDecimal(value); + Literal literal = Decimals.unpackDecimal(packedValue, SimpleValueFactory.getInstance()); + assertEquals(value, literal.decimalValue()); + } + + @Test + void testPackDouble() { + double[] values = { + 123.456, // typical positive + 3.14, // small positive + -123.456, // typical negative + -3.14, // small negative + 0, // positive zero + -0.0, // negative zero + 1, // simple positive + -1, // simple negative + Double.NaN, // not-a-number + Double.POSITIVE_INFINITY, // positive infinity + Double.NEGATIVE_INFINITY // negative infinity + }; + + for (double value : values) { + long packedValue = Decimals.packDouble(value); + assertNotEquals(0L, packedValue, "Packing failed for value: " + value); + Literal literal = Decimals.unpackDouble(packedValue, SimpleValueFactory.getInstance()); + if (Double.isNaN(value)) { + assertTrue(Double.isNaN(literal.doubleValue()), "Expected NaN but got: " + literal.doubleValue()); + } else { + assertEquals(value, literal.doubleValue(), 0.0, "Mismatch for value: " + value); + } + } + } + + @Test + void testPackFloat() { + float[] values = { + 123.456f, // typical positive + 3.14f, // small positive + -123.456f, // typical negative + -3.14f, // small negative + 0f, // positive zero + -0.0f, // negative zero + 1f, // simple positive + -1f, // simple negative + Float.NaN, // not-a-number + Float.POSITIVE_INFINITY, // positive infinity + Float.NEGATIVE_INFINITY // negative infinity + }; + + for (float value : values) { + long packedValue = Decimals.packFloat(value); + assertNotEquals(0L, packedValue, "Packing failed for value: " + value); + Literal literal = Decimals.unpackFloat(packedValue, SimpleValueFactory.getInstance()); + if (Float.isNaN(value)) { + assertTrue(Double.isNaN(literal.floatValue()), "Expected NaN but got: " + literal.floatValue()); + } else { + assertEquals(value, literal.floatValue(), 0.0, "Mismatch for value: " + value); + } + } + } +} \ No newline at end of file diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/StringsTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/StringsTest.java new file mode 100644 index 00000000000..c78e7563bf9 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/StringsTest.java @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.junit.jupiter.api.Test; + +class StringsTest { + + @Test + void testPackStringWithinMaxLength() { + ValueFactory valueFactory = SimpleValueFactory.getInstance(); + Literal literal = valueFactory.createLiteral("test", XSD.STRING); + long packed = Strings.packString(literal); + + // Assert that the packed value is not 0 + assertNotEquals(0L, packed, "Packed value should not be 0 for valid input."); + } + + @Test + void testPackStringExceedsMaxLength() { + ValueFactory valueFactory = SimpleValueFactory.getInstance(); + // Create a string longer than MAX_LENGTH - 1, one byte is used to encode string length + String longString = "a".repeat(Values.MAX_LENGTH); + Literal literal = valueFactory.createLiteral(longString, XSD.STRING); + long packed = Strings.packString(literal); + + // Assert that the packed value is 0 + assertEquals(0L, packed, "Packed value should be 0 for input exceeding max length."); + } + + @Test + void testUnpackString() { + ValueFactory valueFactory = SimpleValueFactory.getInstance(); + Literal literal = valueFactory.createLiteral("test", XSD.STRING); + long packed = Strings.packString(literal); + + Literal unpackedLiteral = Strings.unpackString(packed, valueFactory); + + // Assert that the unpacked value matches the original + assertEquals(literal.getLabel(), unpackedLiteral.getLabel(), "Unpacked label should match original."); + } +} \ No newline at end of file diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/ValuesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/ValuesTest.java new file mode 100644 index 00000000000..11c55dbe509 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/ValuesTest.java @@ -0,0 +1,180 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.List; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.XSD; +import org.eclipse.rdf4j.sail.lmdb.Varint; +import org.junit.jupiter.api.Test; + +class ValuesTest { + + private final ValueFactory vf = SimpleValueFactory.getInstance(); + + private List literals = Arrays.asList( + // DECIMAL + vf.createLiteral(BigDecimal.ZERO), + vf.createLiteral(BigDecimal.ONE.negate()), + vf.createLiteral(new BigDecimal("123456789.987654321")), + vf.createLiteral(new BigDecimal("0.00000000000000000001")), + vf.createLiteral(BigDecimal.valueOf(42.42)), + vf.createLiteral(BigDecimal.TEN), + // DOUBLE + vf.createLiteral(Double.NaN), + vf.createLiteral(Double.POSITIVE_INFINITY), + vf.createLiteral(Double.NEGATIVE_INFINITY), + vf.createLiteral(Double.MIN_VALUE), + vf.createLiteral(Double.MAX_VALUE), + // vf.createLiteral(-0.0d), + vf.createLiteral(3.14159d), + vf.createLiteral(2.0d), + vf.createLiteral(7.11d), + // FLOAT + vf.createLiteral(Float.NaN), + vf.createLiteral(Float.POSITIVE_INFINITY), + vf.createLiteral(Float.NEGATIVE_INFINITY), + vf.createLiteral(Float.MIN_VALUE), + vf.createLiteral(Float.MAX_VALUE), + vf.createLiteral(-0.0f), + vf.createLiteral(1.5f), + vf.createLiteral(0.25f), + // INTEGER + vf.createLiteral(BigInteger.ZERO), + vf.createLiteral(BigInteger.ONE.negate()), + vf.createLiteral(BigInteger.valueOf(Long.MAX_VALUE)), + vf.createLiteral(BigInteger.valueOf(Long.MIN_VALUE)), + vf.createLiteral(BigInteger.valueOf(100)), + vf.createLiteral(BigInteger.valueOf(-12345)), + // LONG + vf.createLiteral(Long.MAX_VALUE), + vf.createLiteral(Long.MIN_VALUE), + vf.createLiteral(0L), + vf.createLiteral(123456789L), + // INT + vf.createLiteral(Integer.MAX_VALUE), + vf.createLiteral(Integer.MIN_VALUE), + vf.createLiteral(0), + vf.createLiteral(42), + // SHORT + vf.createLiteral(Short.MAX_VALUE), + vf.createLiteral(Short.MIN_VALUE), + vf.createLiteral((short) 0), + vf.createLiteral((short) 999), + // BYTE + vf.createLiteral(Byte.MAX_VALUE), + vf.createLiteral(Byte.MIN_VALUE), + vf.createLiteral((byte) 0), + vf.createLiteral((byte) 42), + // UNSIGNED_LONG + vf.createLiteral("0", XSD.UNSIGNED_LONG), + vf.createLiteral("18446744073709551615", XSD.UNSIGNED_LONG), // 2^64-1 + vf.createLiteral("123456789", XSD.UNSIGNED_LONG), + // UNSIGNED_INT + vf.createLiteral("0", XSD.UNSIGNED_INT), + vf.createLiteral("4294967295", XSD.UNSIGNED_INT), // 2^32-1 + vf.createLiteral("123456", XSD.UNSIGNED_INT), + // UNSIGNED_SHORT + vf.createLiteral("0", XSD.UNSIGNED_SHORT), + vf.createLiteral("65535", XSD.UNSIGNED_SHORT), // 2^16-1 + vf.createLiteral("12345", XSD.UNSIGNED_SHORT), + // UNSIGNED_BYTE + vf.createLiteral("0", XSD.UNSIGNED_BYTE), + vf.createLiteral("255", XSD.UNSIGNED_BYTE), // 2^8-1 + vf.createLiteral("42", XSD.UNSIGNED_BYTE), + // POSITIVE_INTEGER + vf.createLiteral("1", XSD.POSITIVE_INTEGER), + vf.createLiteral("999999999999999999999999", XSD.POSITIVE_INTEGER), + vf.createLiteral("42", XSD.POSITIVE_INTEGER), + // NEGATIVE_INTEGER + vf.createLiteral("-1", XSD.NEGATIVE_INTEGER), + vf.createLiteral("-999999999999999999999999", XSD.NEGATIVE_INTEGER), + vf.createLiteral("-42", XSD.NEGATIVE_INTEGER), + // NON_NEGATIVE_INTEGER + vf.createLiteral("0", XSD.NON_NEGATIVE_INTEGER), + vf.createLiteral("123456789012345678", XSD.NON_NEGATIVE_INTEGER), + vf.createLiteral("123", XSD.NON_NEGATIVE_INTEGER), + // NON_POSITIVE_INTEGER + vf.createLiteral("0", XSD.NON_POSITIVE_INTEGER), + vf.createLiteral("-123456789012345678", XSD.NON_POSITIVE_INTEGER), + vf.createLiteral("-99", XSD.NON_POSITIVE_INTEGER), + // STRING (short string; edge + standard) + vf.createLiteral("", XSD.STRING), + vf.createLiteral("a", XSD.STRING), + vf.createLiteral("abcdefg", XSD.STRING), // max inlined length + vf.createLiteral("RDF4J", XSD.STRING), + vf.createLiteral("test", XSD.STRING), + // DATETIME + vf.createLiteral(LocalDateTime.of(1970, 1, 1, 0, 0, 0)), + vf.createLiteral(LocalDateTime.of(9999, 12, 31, 23, 59, 59)), + vf.createLiteral(LocalDateTime.of(2020, 2, 29, 12, 0, 0)), + vf.createLiteral(LocalDateTime.of(1999, 12, 31, 23, 59, 59)), + // DATETIMESTAMP + // vf.createLiteral(OffsetDateTime.of(1970, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC)), + // vf.createLiteral(OffsetDateTime.of(9999, 12, 31, 23, 59, 59, 0, ZoneOffset.ofHours(14))), + // vf.createLiteral(OffsetDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.ofHours(-5))), + // DATE + vf.createLiteral(LocalDate.of(1970, 1, 1)), + vf.createLiteral(LocalDate.of(9999, 12, 31)), + vf.createLiteral(LocalDate.of(2024, 6, 13)), + // BOOLEAN + vf.createLiteral(true), + vf.createLiteral(false) + ); + + @Test + void testPackAndUnpack_AllLiteralTypesWithEdgeAndStandardCases() { + for (Literal literal : literals) { + long packed = Values.packLiteral(literal); + // If the literal is not inlined, packed==0. Only test roundtrip if it is inlined. + if (packed != 0L) { + Literal unpacked = Values.unpackLiteral(packed, vf); + assertEqualLiterals(unpacked, literal); + } else { + // (optional) ensure non-inlined values can be detected + assertThat(packed).isZero(); + } + } + } + + @Test + void testPackAndUnpack_AllLiteralTypesWithVarintConversion() { + ByteBuffer bb = ByteBuffer.allocate(Long.BYTES + 1); + for (Literal literal : literals) { + long packed = Values.packLiteral(literal); + // If the literal is not inlined, packed==0. Only test roundtrip if it is inlined. + if (packed != 0L) { + bb.clear(); + Varint.writeUnsigned(bb, packed); + bb.flip(); + assertThat(Varint.readUnsigned(bb)).isEqualTo(packed); + } else { + // (optional) ensure non-inlined values can be detected + assertThat(packed).isZero(); + } + } + } + + private void assertEqualLiterals(Literal actual, Literal expected) { + assertThat(actual).isEqualTo(expected); + } +}