From c04a0ae333f63dab156287621371374df8805ee5 Mon Sep 17 00:00:00 2001 From: Ken Wenzel Date: Fri, 19 Sep 2025 13:14:31 +0200 Subject: [PATCH 1/2] GH-5442 Further optimize multi-threading performance and reduce copy operations. --- .../rdf4j/sail/lmdb/LmdbSailStore.java | 25 +- .../eclipse/rdf4j/sail/lmdb/ValueStore.java | 238 ++++++++++-------- 2 files changed, 151 insertions(+), 112 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 02e7d71bf5d..f5c2f76c9e7 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -216,7 +216,7 @@ void rollback() throws SailException { if (tripleStoreException != null) { throw wrapTripleStoreException(); } else { - Thread.yield(); + Thread.onSpinWait(); } } } else { @@ -478,7 +478,7 @@ public void flush() throws SailException { if (tripleStoreException != null) { throw wrapTripleStoreException(); } else { - Thread.yield(); + Thread.onSpinWait(); } } } @@ -491,7 +491,7 @@ public void flush() throws SailException { if (tripleStoreException != null) { throw wrapTripleStoreException(); } else { - Thread.yield(); + Thread.onSpinWait(); } } } @@ -676,22 +676,21 @@ private void startTransaction(boolean preferThreading) throws SailException { } else if (Thread.interrupted()) { throw new InterruptedException(); } else { - Thread.yield(); + Thread.onSpinWait(); } } } - // keep thread running for at least 2ms to lock-free wait for the next + // keep thread running for a short while to lock-free wait for the next // transaction long start = 0; while (running.get() && !nextTransactionAsync) { if (start == 0) { - // System.currentTimeMillis() is expensive, so only call it when we - // are sure we need to wait - start = System.currentTimeMillis(); + // only call System.nanoTime() if we need to wait + start = System.nanoTime(); } - if (System.currentTimeMillis() - start > 2) { + if (System.nanoTime() - start > 100000) { synchronized (storeTxnStarted) { if (!nextTransactionAsync) { running.set(false); @@ -699,7 +698,7 @@ private void startTransaction(boolean preferThreading) throws SailException { } } } else { - Thread.yield(); + Thread.onSpinWait(); } } } @@ -846,7 +845,7 @@ public void execute() throws Exception { if (tripleStoreException != null) { throw wrapTripleStoreException(); } else { - Thread.yield(); + Thread.onSpinWait(); } } @@ -854,7 +853,7 @@ public void execute() throws Exception { if (tripleStoreException != null) { throw wrapTripleStoreException(); } else { - Thread.yield(); + Thread.onSpinWait(); } } return removeCount[0]; @@ -932,7 +931,7 @@ public CloseableIteration getStatements(Resource subj, IRI try { logger.warn("Failed to get statements, retrying", e); // try once more before giving up - Thread.yield(); + Thread.onSpinWait(); return createStatementIterator(txn, subj, pred, obj, explicit, contexts); } catch (IOException e2) { throw new SailException("Unable to get statements", e); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java index 4cfc59a5be0..c9bda448202 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java @@ -82,6 +82,7 @@ import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; import org.lwjgl.PointerBuffer; import org.lwjgl.system.MemoryStack; +import org.lwjgl.system.MemoryUtil; import org.lwjgl.util.lmdb.MDBEnvInfo; import org.lwjgl.util.lmdb.MDBStat; import org.lwjgl.util.lmdb.MDBVal; @@ -127,6 +128,8 @@ class ValueStore extends AbstractValueFactory { * A simple cache containing the [VALUE_CACHE_SIZE] most-recently used values stored by their ID. */ private final AtomicReferenceArray valueCache; + private final int VALUE_CACHE_SIZE; + /** * A simple cache containing the [ID_CACHE_SIZE] most-recently used value-IDs stored by their value. */ @@ -194,7 +197,8 @@ class ValueStore extends AbstractValueFactory { this.mapSize = config.getValueDBSize(); open(); - valueCache = new AtomicReferenceArray<>(config.getValueCacheSize()); + VALUE_CACHE_SIZE = config.getValueCacheSize(); + valueCache = new AtomicReferenceArray<>(VALUE_CACHE_SIZE); valueIDCache = new ConcurrentCache<>(config.getValueIDCacheSize()); namespaceCache = new ConcurrentCache<>(config.getNamespaceCacheSize()); namespaceIDCache = new ConcurrentCache<>(config.getNamespaceIDCacheSize()); @@ -420,18 +424,14 @@ ValueStoreRevision getRevision() { return revision; } - protected byte[] getData(long id) throws IOException { - return readTransaction(env, (stack, txn) -> { - MDBVal keyData = MDBVal.calloc(stack); - keyData.mv_data(id2data(idBuffer(stack), id).flip()); - MDBVal valueData = MDBVal.calloc(stack); - if (mdb_get(txn, dbi, keyData, valueData) == MDB_SUCCESS) { - byte[] valueBytes = new byte[valueData.mv_data().remaining()]; - valueData.mv_data().get(valueBytes); - return valueBytes; - } - return null; - }); + protected ByteBuffer getData(MemoryStack stack, long txn, long id) throws IOException { + MDBVal keyData = MDBVal.calloc(stack); + keyData.mv_data(id2data(idBuffer(stack), id).flip()); + MDBVal valueData = MDBVal.calloc(stack); + if (mdb_get(txn, dbi, keyData, valueData) == MDB_SUCCESS) { + return valueData.mv_data(); + } + return null; } /** @@ -442,8 +442,8 @@ protected byte[] getData(long id) throws IOException { * @param id ID of a value object * @return the value object or null if not found */ - LmdbValue cachedValue(long id) { - LmdbValue value = valueCache.get((int) (id % valueCache.length())); + LmdbValue cachedValue(final long id) { + LmdbValue value = valueCache.get((int) (id % VALUE_CACHE_SIZE)); if (value != null && value.getInternalID() == id) { return value; } @@ -457,10 +457,9 @@ LmdbValue cachedValue(long id) { * * @param id ID of a value object * @param value ID of a value object - * @return the value object or null if not found */ - void cacheValue(long id, LmdbValue value) { - valueCache.lazySet((int) (id % valueCache.length()), value); + void cacheValue(final long id, final LmdbValue value) { + valueCache.lazySet((int) (id % VALUE_CACHE_SIZE), value); } /** @@ -502,21 +501,24 @@ public LmdbValue getLazyValue(long id) throws IOException { * @return The value for the ID, or null no such value could be found. * @throws IOException If an I/O error occurred. */ - public LmdbValue getValue(long id) throws IOException { + public LmdbValue getValue(final long id) throws IOException { long stamp = revisionLock.readLock(); try { // Check value cache LmdbValue resultValue = cachedValue(id); if (resultValue == null) { - // Value not in cache, fetch it from file - byte[] data = getData(id); - - if (data != null) { - resultValue = data2value(id, data, null); - // Store value in cache - cacheValue(id, resultValue); - } + // Value not in cache, fetch it from database + resultValue = readTransaction(env, (stack, txn) -> { + ByteBuffer data = getData(stack, txn, id); + if (data != null) { + LmdbValue value = data2value(id, data, null); + // Store value in cache + cacheValue(id, value); + return value; + } + return null; + }); } return resultValue; @@ -534,11 +536,14 @@ public LmdbValue getValue(long id) throws IOException { */ public boolean resolveValue(long id, LmdbValue value) { try { - byte[] data = getData(id); - if (data != null) { - data2value(id, data, value); - return true; - } + return readTransaction(env, (stack, txn) -> { + ByteBuffer data = getData(stack, txn, id); + if (data != null) { + data2value(id, data, value); + return true; + } + return false; + }); } catch (IOException e) { // should not happen } @@ -593,17 +598,16 @@ private void resizeMap(long txn, long requiredSize) throws IOException { } } - private void incrementRefCount(MemoryStack stack, long writeTxn, byte[] data) throws IOException { + private void incrementRefCount(MemoryStack stack, long writeTxn, ByteBuffer data) throws IOException { // literals have a datatype id and URIs have a namespace id - if (data[0] == LITERAL_VALUE || data[0] == URI_VALUE) { + if (data.get(0) == LITERAL_VALUE || data.get(0) == URI_VALUE) { try { stack.push(); - ByteBuffer bb = ByteBuffer.wrap(data); // skip type marker - int idLength = Varint.firstToLength(bb.get(1)); + int idLength = Varint.firstToLength(data.get(1)); MDBVal idVal = MDBVal.calloc(stack); MDBVal dataVal = MDBVal.calloc(stack); - idVal.mv_data(idBuffer(stack).put(ID_KEY).put(data, 1, idLength).flip()); + idVal.mv_data(idBuffer(stack).put(ID_KEY).put(data.duplicate().position(1).limit(1 + idLength)).flip()); long newCount = 1; if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { // update count @@ -647,11 +651,12 @@ private boolean decrementRefCount(MemoryStack stack, long writeTxn, ByteBuffer i } } - private long findId(byte[] data, boolean create) throws IOException { + private long findId(ByteBuffer data, boolean create) throws IOException { Long id = readTransaction(env, (stack, txn) -> { - if (data.length <= MAX_KEY_SIZE) { + int dataLength = data.remaining(); + if (dataLength <= MAX_KEY_SIZE) { MDBVal dataVal = MDBVal.calloc(stack); - dataVal.mv_data(stack.bytes(data)); + dataVal.mv_data(data); MDBVal idVal = MDBVal.calloc(stack); if (mdb_get(txn, dbi, dataVal, idVal) == MDB_SUCCESS) { return data2id(idVal.mv_data()); @@ -660,15 +665,13 @@ private long findId(byte[] data, boolean create) throws IOException { return null; } // id was not found, create a new one - resizeMap(txn, 2L * data.length + 2L * (2L + Long.BYTES)); + resizeMap(txn, 2L * dataLength + 2L * (2L + Long.BYTES)); - long newId = nextId(data[0]); + long newId = nextId(data.get(0)); writeTransaction((stack2, writeTxn) -> { idVal.mv_data(id2data(idBuffer(stack), newId).flip()); - E(mdb_put(writeTxn, dbi, dataVal, idVal, 0)); E(mdb_put(writeTxn, dbi, idVal, dataVal, 0)); - // update ref count if necessary incrementRefCount(stack2, writeTxn, data); return null; @@ -677,7 +680,6 @@ private long findId(byte[] data, boolean create) throws IOException { } else { MDBVal idVal = MDBVal.calloc(stack); - ByteBuffer dataBb = ByteBuffer.wrap(data); long dataHash = hash(data); int maxHashKeyLength = 2 + 2 * Long.BYTES + 2; ByteBuffer hashBb = stack.malloc(maxHashKeyLength); @@ -693,7 +695,8 @@ private long findId(byte[] data, boolean create) throws IOException { // ID of first value is directly stored with hash as key if (mdb_get(txn, dbi, hashVal, dataVal) == MDB_SUCCESS) { idVal.mv_data(dataVal.mv_data()); - if (mdb_get(txn, dbi, idVal, dataVal) == MDB_SUCCESS && dataVal.mv_data().compareTo(dataBb) == 0) { + if (mdb_get(txn, dbi, idVal, dataVal) == MDB_SUCCESS + && dataVal.mv_data().compareTo(data.duplicate()) == 0) { return data2id(idVal.mv_data()); } } else { @@ -702,18 +705,17 @@ private long findId(byte[] data, boolean create) throws IOException { return null; } - resizeMap(txn, 2L * data.length + 2L * (2L + Long.BYTES)); + resizeMap(txn, 2L * dataLength + 2L * (2L + Long.BYTES)); - long newId = nextId(data[0]); + long newId = nextId(data.get(0)); writeTransaction((stack2, writeTxn) -> { - dataVal.mv_size(data.length); idVal.mv_data(id2data(idBuffer(stack), newId).flip()); // store mapping of hash -> ID E(mdb_put(txn, dbi, hashVal, idVal, 0)); // store mapping of ID -> data - E(mdb_put(writeTxn, dbi, idVal, dataVal, MDB_RESERVE)); - dataVal.mv_data().put(data); + dataVal.mv_data(data); + E(mdb_put(writeTxn, dbi, idVal, dataVal, 0)); // update ref count if necessary incrementRefCount(stack2, writeTxn, data); @@ -744,7 +746,7 @@ private long findId(byte[] data, boolean create) throws IOException { hashIdBb.position(hashLength); idVal.mv_data(hashIdBb); if (mdb_get(txn, dbi, idVal, dataVal) == MDB_SUCCESS - && dataVal.mv_data().compareTo(dataBb) == 0) { + && dataVal.mv_data().compareTo(data.duplicate()) == 0) { // id was found if stored value is equal to requested value return data2id(hashIdBb); } @@ -761,9 +763,9 @@ private long findId(byte[] data, boolean create) throws IOException { } // id was not found, create a new one - resizeMap(txn, 1 + Long.BYTES + maxHashKeyLength + 2L * data.length); + resizeMap(txn, 1 + Long.BYTES + maxHashKeyLength + 2L * dataLength); - long newId = nextId(data[0]); + long newId = nextId(data.get(0)); writeTransaction((stack2, writeTxn) -> { // encode ID ByteBuffer idBb = id2data(idBuffer(stack), newId).flip(); @@ -781,10 +783,9 @@ private long findId(byte[] data, boolean create) throws IOException { dataVal.mv_data(stack.bytes()); E(mdb_put(txn, dbi, hashVal, dataVal, 0)); - dataVal.mv_size(data.length); + dataVal.mv_data(data); // store mapping of ID -> data - E(mdb_put(txn, dbi, idVal, dataVal, MDB_RESERVE)); - dataVal.mv_data().put(data); + E(mdb_put(txn, dbi, idVal, dataVal, 0)); // update ref count if necessary incrementRefCount(stack2, writeTxn, data); @@ -885,7 +886,24 @@ public long getId(Value value, boolean create) throws IOException { } if (data != null) { - long id = findId(data, create); + long id; + try (MemoryStack stack = MemoryStack.stackPush()) { + int stackSize = stack.getSize(); + boolean allocateOnStack = data.length < stackSize - 8; + ByteBuffer bb; + if (allocateOnStack) { + bb = stack.bytes(data); + } else { + bb = MemoryUtil.memAlloc(data.length).put(data).flip(); + } + try { + id = findId(bb, create); + } finally { + if (!allocateOnStack) { + MemoryUtil.memFree(bb); + } + } + } if (id != LmdbValue.UNKNOWN_ID) { if (isOwnValue) { @@ -1006,9 +1024,7 @@ protected void deleteValueToIdMappings(MemoryStack stack, long txn, Collection MAX_KEY_SIZE) { - byte[] data = new byte[dataLength]; - dataBuffer.get(data); - long dataHash = hash(data); + long dataHash = hash(dataBuffer); hashBb.clear(); hashBb.put(HASH_KEY); @@ -1214,10 +1230,15 @@ public long storeValue(Value value) throws IOException { * @param data The data to calculate the hash code for. * @return A hash code for the supplied data. */ - private long hash(byte[] data) { - CRC32 crc32 = new CRC32(); - crc32.update(data); - return crc32.getValue(); + private long hash(ByteBuffer data) { + try { + data.mark(); + CRC32 crc32 = new CRC32(); + crc32.update(data); + return crc32.getValue(); + } finally { + data.reset(); + } } /** @@ -1373,8 +1394,9 @@ private boolean isNamespaceData(byte[] data) { return data[0] == NAMESPACE_VALUE; } - private LmdbValue data2value(long id, byte[] data, LmdbValue value) throws IOException { - switch (data[0]) { + private LmdbValue data2value(long id, ByteBuffer data, LmdbValue value) throws IOException { + byte type = data.get(0); + switch (type) { case URI_VALUE: return data2uri(id, data, (LmdbIRI) value); case BNODE_VALUE: @@ -1382,17 +1404,15 @@ private LmdbValue data2value(long id, byte[] data, LmdbValue value) throws IOExc case LITERAL_VALUE: return data2literal(id, data, (LmdbLiteral) value); default: - throw new IllegalArgumentException("Invalid type " + data[0] + " for value with id " + id); + throw new IllegalArgumentException("Invalid type " + type + " for value with id " + id); } } - private LmdbIRI data2uri(long id, byte[] data, LmdbIRI value) throws IOException { - ByteBuffer bb = ByteBuffer.wrap(data); - // skip type marker - bb.get(); - long nsID = Varint.readUnsigned(bb); + private LmdbIRI data2uri(long id, ByteBuffer data, LmdbIRI value) throws IOException { + data.position(1); // skip type marker + long nsID = Varint.readUnsigned(data); String namespace = getNamespace(nsID); - String localName = new String(data, bb.position(), bb.remaining(), StandardCharsets.UTF_8); + String localName = StandardCharsets.UTF_8.decode(data).toString(); if (value == null) { return new LmdbIRI(revision, namespace, localName, id); @@ -1402,8 +1422,9 @@ private LmdbIRI data2uri(long id, byte[] data, LmdbIRI value) throws IOException } } - private LmdbBNode data2bnode(long id, byte[] data, LmdbBNode value) { - String nodeID = new String(data, 1, data.length - 1, StandardCharsets.UTF_8); + private LmdbBNode data2bnode(long id, ByteBuffer data, LmdbBNode value) { + data.position(1); // skip type marker + String nodeID = StandardCharsets.UTF_8.decode(data).toString(); if (value == null) { return new LmdbBNode(revision, nodeID, id); } else { @@ -1412,27 +1433,22 @@ private LmdbBNode data2bnode(long id, byte[] data, LmdbBNode value) { } } - private LmdbLiteral data2literal(long id, byte[] data, LmdbLiteral value) throws IOException { - ByteBuffer bb = ByteBuffer.wrap(data); - // skip type marker - bb.get(); - // Get datatype - long datatypeID = Varint.readUnsigned(bb); + private LmdbLiteral data2literal(long id, ByteBuffer data, LmdbLiteral value) throws IOException { + data.position(1); // skip type marker + long datatypeID = Varint.readUnsigned(data); IRI datatype = null; if (datatypeID != LmdbValue.UNKNOWN_ID) { datatype = (IRI) getValue(datatypeID); } - // Get language tag + int langLength = data.get() & 0xFF; String lang = null; - int langLength = bb.get() & 0xFF; if (langLength > 0) { - lang = new String(data, bb.position(), langLength, StandardCharsets.UTF_8); + lang = StandardCharsets.UTF_8.decode(data.slice().limit(langLength)).toString(); + data.position(data.position() + langLength); } - // Get label - String label = new String(data, bb.position() + langLength, data.length - bb.position() - langLength, - StandardCharsets.UTF_8); + String label = StandardCharsets.UTF_8.decode(data).toString(); if (value == null) { if (lang != null) { @@ -1456,8 +1472,11 @@ private LmdbLiteral data2literal(long id, byte[] data, LmdbLiteral value) throws } } - private String data2namespace(byte[] data) { - return new String(data, 1, data.length - 1, StandardCharsets.UTF_8); + private String data2namespace(ByteBuffer data) { + // skip type marker + data.get(); + // convert rest to UTF-8 string + return StandardCharsets.UTF_8.decode(data).toString(); } private long getNamespaceID(String namespace, boolean create) throws IOException { @@ -1467,11 +1486,28 @@ private long getNamespaceID(String namespace, boolean create) throws IOException } byte[] namespaceBytes = namespace.getBytes(StandardCharsets.UTF_8); - byte[] namespaceData = new byte[namespaceBytes.length + 1]; - namespaceData[0] = NAMESPACE_VALUE; - System.arraycopy(namespaceBytes, 0, namespaceData, 1, namespaceBytes.length); - long id = findId(namespaceData, create); + long id; + try (MemoryStack stack = MemoryStack.stackPush()) { + int dataLength = 1 + namespaceBytes.length; + int stackSize = stack.getSize(); + boolean allocateOnStack = dataLength < stackSize - 8; + ByteBuffer bb; + if (allocateOnStack) { + bb = stack.malloc(dataLength); + } else { + bb = MemoryUtil.memAlloc(dataLength); + } + try { + bb.put(NAMESPACE_VALUE).put(namespaceBytes).flip(); + id = findId(bb, create); + } finally { + if (!allocateOnStack) { + MemoryUtil.memFree(bb); + } + } + } + if (id != LmdbValue.UNKNOWN_ID) { namespaceIDCache.put(namespace, id); } @@ -1488,11 +1524,15 @@ private String getNamespace(long id) throws IOException { String namespace = namespaceCache.get(cacheID); if (namespace == null) { - byte[] namespaceData = getData(id); - if (namespaceData != null) { - namespace = data2namespace(namespaceData); - namespaceCache.put(cacheID, namespace); - } + namespace = readTransaction(env, (stack, txn) -> { + ByteBuffer namespaceData = getData(stack, txn, id); + if (namespaceData != null) { + String ns = data2namespace(namespaceData); + namespaceCache.put(cacheID, ns); + return ns; + } + return null; + }); } return namespace; From 757cc5a2d2c75671814eb975ece9e1e00059f06f Mon Sep 17 00:00:00 2001 From: Ken Wenzel Date: Tue, 23 Sep 2025 10:59:35 +0200 Subject: [PATCH 2/2] GH-5442 Optimize Varint by replacing multiple put calls with putShort/putInt --- .../org/eclipse/rdf4j/sail/lmdb/Varint.java | 93 +++++++++++++------ .../eclipse/rdf4j/sail/lmdb/VarintTest.java | 46 +++++---- 2 files changed, 93 insertions(+), 46 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java index 283186c0246..5636ac2ad25 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java @@ -11,6 +11,7 @@ package org.eclipse.rdf4j.sail.lmdb; import java.nio.ByteBuffer; +import java.nio.ByteOrder; /** * Encodes and decodes unsigned values using variable-length encoding. @@ -89,26 +90,54 @@ private Varint() { * @param value value to encode */ public static void writeUnsigned(final ByteBuffer bb, final long value) { + if (bb.order() == ByteOrder.BIG_ENDIAN) { + writeUnsignedBigEndian(bb, value); + } else { + writeUnsignedLittleEndian(bb, value); + } + } + + private static void writeUnsignedBigEndian(final ByteBuffer bb, final long value) { + if (value <= 240) { + bb.put((byte) value); + } else if (value <= 2287) { + int v = (int) (value - 240); + bb.putShort((short) (((v >>> 8) + 241) << 8 | (v & 0xFF))); + } else if (value <= 67823) { + int v = (int) (value - 2288); + bb.put((byte) 249); + bb.putShort((short) ((v >>> 8) << 8 | (v & 0xFF))); + } else { + int bytes = descriptor(value) + 1; + writeSignificantBitsBigEndian(bb, value, bytes); + } + } + + private static void writeUnsignedLittleEndian(final ByteBuffer bb, final long value) { if (value <= 240) { bb.put((byte) value); } else if (value <= 2287) { - bb.put((byte) ((value - 240) / 256 + 241)); - bb.put((byte) ((value - 240) % 256)); + int v = (int) (value - 240); + bb.putShort((short) ((v & 0xFF) << 8 | ((v >>> 8) + 241))); } else if (value <= 67823) { + int v = (int) (value - 2288); bb.put((byte) 249); - bb.put((byte) ((value - 2288) / 256)); - bb.put((byte) ((value - 2288) % 256)); + bb.putShort((short) ((v & 0xFF) << 8 | (v >>> 8))); } else { int bytes = descriptor(value) + 1; - bb.put((byte) (250 + (bytes - 3))); - writeSignificantBits(bb, value, bytes); + bb.order(ByteOrder.BIG_ENDIAN); + try { + writeSignificantBitsBigEndian(bb, value, bytes); + } finally { + bb.order(ByteOrder.LITTLE_ENDIAN); + } } } /** * Calculates required length in bytes to encode the given long value using variable-length encoding. * - * @param value the value value + * @param value the value * @return length in bytes */ public static int calcLengthUnsigned(long value) { @@ -245,22 +274,8 @@ public static long readListElementUnsigned(ByteBuffer bb, int index) { * @param values array with values to write */ public static void writeListUnsigned(final ByteBuffer bb, final long[] values) { - for (int i = 0; i < values.length; i++) { - final long value = values[i]; - if (value <= 240) { - bb.put((byte) value); - } else if (value <= 2287) { - bb.put((byte) ((value - 240) / 256 + 241)); - bb.put((byte) ((value - 240) % 256)); - } else if (value <= 67823) { - bb.put((byte) 249); - bb.put((byte) ((value - 2288) / 256)); - bb.put((byte) ((value - 2288) % 256)); - } else { - int bytes = descriptor(value) + 1; - bb.put((byte) (250 + (bytes - 3))); - writeSignificantBits(bb, value, bytes); - } + for (final long value : values) { + writeUnsigned(bb, value); } } @@ -296,9 +311,35 @@ public static void readListUnsigned(ByteBuffer bb, int[] indexMap, long[] values * @param value value to encode * @param bytes number of significant bytes */ - private static void writeSignificantBits(ByteBuffer bb, long value, int bytes) { - while (bytes-- > 0) { - bb.put((byte) (0xFF & (value >>> (bytes * 8)))); + private static void writeSignificantBitsBigEndian(ByteBuffer bb, long value, int bytes) { + switch (bytes) { + case 3: + bb.putInt((int) ((((250 << 8) | ((value >>> 16) & 0xFF)) << 16) | (value & 0xFFFF))); + break; + case 4: + bb.put((byte) 251); // 250 + (4 - 3) + bb.putInt((int) (value & 0xFFFFFFFFL)); + break; + case 5: + bb.putShort((short) ((252 << 8) | ((value >>> 32) & 0xFF))); + bb.putInt((int) (value & 0xFFFFFFFFL)); + break; + case 6: + bb.put((byte) 253); + bb.putShort((short) ((value >>> 32) & 0xFFFF)); + bb.putInt((int) (value & 0xFFFFFFFFL)); + break; + case 7: + bb.putLong(((254 << 8 | ((value >>> 48) & 0xFF)) << 48) | (((value >>> 32) & 0xFFFF) << 32) + | (value & 0xFFFFFFFFL)); + break; + case 8: + bb.put((byte) 255); + bb.putLong(value); + break; + default: + throw new IllegalArgumentException( + "Invalid number of bytes " + bytes + " for value " + value + " (must be 3..8)"); } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java index 79cdf1b293d..e277cc23a7c 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java @@ -10,15 +10,16 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.lmdb; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class VarintTest { + private final ByteOrder[] byteOrders = new ByteOrder[] { ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN }; + long[] values = new long[] { 240, 2287, 67823, 16777215, 4294967295L, 1099511627775L, 281474976710655L, 72057594037927935L, 72057594037927935L + 1 @@ -26,28 +27,33 @@ public class VarintTest { @Test public void testVarint() { - ByteBuffer bb = ByteBuffer.allocate(9); - for (int i = 0; i < values.length; i++) { - bb.clear(); - Varint.writeUnsigned(bb, values[i]); - bb.flip(); - assertEquals("Encoding should use " + (i + 1) + " bytes", i + 1, bb.remaining()); - assertEquals("Encoded and decoded value should be equal", values[i], Varint.readUnsigned(bb)); + for (ByteOrder order : byteOrders) { + ByteBuffer bb = ByteBuffer.allocate(9).order(order); + for (int i = 0; i < values.length; i++) { + bb.clear(); + Varint.writeUnsigned(bb, values[i]); + bb.flip(); + Assertions.assertEquals(i + 1, bb.remaining(), "Encoding should use " + (i + 1) + " bytes"); + Assertions.assertEquals(values[i], Varint.readUnsigned(bb), + "Encoded and decoded value should be equal"); + } } } @Test public void testVarintList() { - ByteBuffer bb = ByteBuffer.allocate(2 + 4 * Long.BYTES); - for (int i = 0; i < values.length - 4; i++) { - long[] expected = new long[4]; - System.arraycopy(values, 0, expected, 0, 4); - bb.clear(); - Varint.writeListUnsigned(bb, expected); - bb.flip(); - long[] actual = new long[4]; - Varint.readListUnsigned(bb, actual); - assertArrayEquals("Encoded and decoded value should be equal", expected, actual); + for (ByteOrder order : byteOrders) { + ByteBuffer bb = ByteBuffer.allocate(2 + 4 * Long.BYTES).order(order); + for (int i = 0; i < values.length - 4; i++) { + long[] expected = new long[4]; + System.arraycopy(values, 0, expected, 0, 4); + bb.clear(); + Varint.writeListUnsigned(bb, expected); + bb.flip(); + long[] actual = new long[4]; + Varint.readListUnsigned(bb, actual); + Assertions.assertArrayEquals(expected, actual, "Encoded and decoded value should be equal"); + } } } }