diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java index f12dc200f63a..4da1e9c38dea 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java @@ -121,6 +121,7 @@ static ColumnStorage makeLocal(ColumnStorage storage) { DoubleBuilder.fromAddress(size, data, validity, type).seal(storage, type); case TextType type -> StringBuilder.fromAddress(size, data, validity, type).seal(storage, type); + case DateType type -> DateBuilder.fromAddress(size, data, validity).seal(storage, type); default -> storage; }; assert assertSameStorages(storage, localStorage); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/DateBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/DateBuilder.java index 74e5f31d4a35..aa377897ecfa 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/DateBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/DateBuilder.java @@ -1,8 +1,12 @@ package org.enso.table.data.column.builder; +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; import java.time.LocalDate; +import java.util.BitSet; import java.util.Objects; import org.enso.table.data.column.storage.ColumnStorage; +import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.TypedStorage; import org.enso.table.data.column.storage.type.DateTimeType; import org.enso.table.data.column.storage.type.DateType; @@ -18,6 +22,28 @@ final class DateBuilder extends TypedBuilder { this.allowDateToDateTimeConversion = allowDateToDateTimeConversion; } + static DateBuilder fromAddress(int size, long data, long validity) { + var validityBuffer = + MemorySegment.ofAddress(validity).reinterpret((size + 7) / 8).asByteBuffer(); + var bits = BitSet.valueOf(validityBuffer); + var buf = + MemorySegment.ofAddress(data) + .reinterpret(Integer.BYTES * size) + .asByteBuffer() + .order(ByteOrder.LITTLE_ENDIAN); + + var b = new DateBuilder(size, false); + for (var i = 0; i < size; i++) { + var day = buf.getInt(); + if (bits.get(i)) { + b.append(LocalDate.ofEpochDay(day)); + } else { + b.appendNulls(1); + } + } + return b; + } + @Override public DateBuilder append(Object o) { ensureSpaceToAppend(); @@ -40,7 +66,11 @@ public boolean accepts(Object o) { @Override protected ColumnStorage doSeal() { - return new TypedStorage<>(DateType.INSTANCE, data); + return seal(null, DateType.INSTANCE); + } + + final Storage seal(ColumnStorage otherStorage, DateType type) { + return new TypedStorage<>(type, data, otherStorage); } @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TypedStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TypedStorage.java index 6ef3098c0963..b4fcb67388a1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TypedStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TypedStorage.java @@ -4,9 +4,11 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; +import java.time.LocalDate; import java.util.Arrays; import java.util.BitSet; import java.util.Iterator; +import org.enso.table.data.column.storage.type.DateType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.column.storage.type.TextType; import org.enso.table.util.ImmutableBitSet; @@ -70,6 +72,28 @@ public long addressOfData() { offheapBuffer = buf; validitySet = new ImmutableBitSet(validity, data.length); } + if (offheapBuffer == null && getType() instanceof DateType) { + var fullSize = data.length * Integer.BYTES; + var buf = ByteBuffer.allocateDirect(fullSize).order(ByteOrder.LITTLE_ENDIAN); + var validity = new BitSet(); + var at = 0; + for (var value : data) { + if (value instanceof LocalDate s) { + buf.putInt(Math.toIntExact(s.toEpochDay())); + validity.set(at, true); + } else { + buf.putInt(0); + validity.set(at, false); + } + at++; + } + assert buf.limit() == buf.position(); + buf.flip(); + assert buf.position() == 0; + assert buf.limit() == fullSize; + offheapBuffer = buf; + validitySet = new ImmutableBitSet(validity, data.length); + } if (offheapBuffer != null) { return MemorySegment.ofBuffer(offheapBuffer).address(); } diff --git a/std-bits/tests/src/test/java/org/enso/base/polyglot/tests/DateStorageTest.java b/std-bits/tests/src/test/java/org/enso/base/polyglot/tests/DateStorageTest.java new file mode 100644 index 000000000000..f595bf037533 --- /dev/null +++ b/std-bits/tests/src/test/java/org/enso/base/polyglot/tests/DateStorageTest.java @@ -0,0 +1,92 @@ +package org.enso.base.polyglot.tests; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotSame; + +import java.time.LocalDate; +import java.util.Objects; +import java.util.Random; +import java.util.stream.IntStream; +import org.enso.table.data.column.builder.Builder; +import org.enso.table.problems.BlackholeProblemAggregator; +import org.enso.table.problems.ProblemAggregator; +import org.enso.test.utils.ContextUtils; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +public class DateStorageTest { + @ClassRule + public static final ContextUtils ctx = + ContextUtils.newBuilder("enso", "arrow").assertGC(false).build(); + + @BeforeClass + public static void importAll() { + ctx.eval("enso", "from Standard.Base import all"); + } + + @Test + public void makeLocalFromDateStorage() { + var b = Builder.getForDate(3); + var one = LocalDate.of(1973, 12, 10); + var two = LocalDate.of(1975, 5, 3); + b.append(one).appendNulls(1).append(two); + var storage = b.seal(); + var localStorage = Builder.makeLocal(storage); + assertNotSame("local storage is a copy of storage", storage, localStorage); + assertEquals("They have the same size", storage.getSize(), localStorage.getSize()); + assertEquals("They have the same type", storage.getType(), localStorage.getType()); + for (var i = 0L; i < storage.getSize(); i++) { + var elem = storage.getItemBoxed(i); + var localElem = localStorage.getItemBoxed(i); + assertEquals("At " + i, elem, localElem); + } + } + + @Test + public void testCreateViaBuilderAndReadViaArrowSimple16() { + generateAndCompare("Simple 16 values", 16, IntStream.range(0, 16)); + } + + @Test + public void testCreateViaBuilderAndReadViaArrowRandom() { + generateAndCompareWithSeed(System.currentTimeMillis()); + } + + private void generateAndCompareWithSeed(long seed) { + var r = new Random(seed); + var size = r.nextInt(256, 4096); + var stream = r.ints(size, 0, 20000); + var msg = "with seed " + seed + " size " + size; + System.err.println(msg); + generateAndCompare(msg, size, stream); + } + + private void generateAndCompare(String info, int size, IntStream r) { + var sb = new StringBuilder(); + var b = Builder.getForDate(size); + r.mapToObj(LocalDate::ofEpochDay).forEach(b::append); + var storage = b.seal(); + assertEquals("Storage has the right size: " + storage, size, storage.getSize()); + assertNotEquals("Storage provides acccess to raw data", 0L, storage.addressOfData()); + assertNotEquals("Storage provides access to validity bitmap", 0L, storage.addressOfValidity()); + + var arr = + ctx.eval("arrow", "cast[Date32]") + .execute(storage.addressOfData(), storage.getSize(), storage.addressOfValidity()); + for (var i = 0L; i < size; i++) { + var elem0 = storage.getItemBoxed(i); + var value1 = arr.getArrayElement(i); + var elem1 = value1.isNull() ? null : value1.asDate(); + if (!Objects.equals(elem0, elem1)) { + sb.append("\n at ").append(i).append(" ").append(elem0).append(" != ").append(elem1); + } + } + assertEquals(info + "\n" + sb.toString(), 0, sb.length()); + } + + private static ProblemAggregator problemAggregator() { + return BlackholeProblemAggregator.INSTANCE; + } +}