From a9ed16df1e308c1d1cc07b9c3bdcd573b455d904 Mon Sep 17 00:00:00 2001 From: Toby Zhang Date: Mon, 5 Feb 2024 12:25:04 -0800 Subject: [PATCH] SNOW-1032874: Fix an Overflow Issue for Integer-store Timestamp (#673) According to https://docs.snowflake.com/en/sql-reference/functions/to_timestamp#usage-notes: - If the integer is less than 31536000000 (the number of milliseconds in a year), then the value is treated as a number of seconds. - If the value is greater than or equal to 31536000000 and less than 31536000000000, then the value is treated as milliseconds. - If the value is greater than or equal to 31536000000000 and less than 31536000000000000, then the value is treated as microseconds. - If the value is greater than or equal to 31536000000000000, then the value is treated as nanoseconds. Due to the multiplication logic in the code, we run into an overflow issue when the input is bigger than a certain value (for example: 31535999999999) and it causes silent corrupted values in the table, this change updates the code to use BigInteger instead of long to resolve the overflow issue. --- .../internal/DataValidationUtil.java | 31 +++++++++------- .../streaming/internal/TimestampWrapper.java | 2 +- .../internal/DataValidationUtilTest.java | 35 +++++++++++++++++-- .../datatypes/AbstractDataTypeTest.java | 4 ++- .../internal/datatypes/DateTimeIT.java | 34 +++++++++++++++++- 5 files changed, 88 insertions(+), 18 deletions(-) diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java index a1831f829..814423c28 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java @@ -728,20 +728,27 @@ static BigInteger validateAndParseTime( * @throws NumberFormatException If the input in not a valid long */ private static Instant parseInstantGuessScale(String input) { - long epochNanos; - long val = Long.parseLong(input); - - if (val > -SECONDS_LIMIT_FOR_EPOCH && val < SECONDS_LIMIT_FOR_EPOCH) { - epochNanos = val * Power10.intTable[9]; - } else if (val > -MILLISECONDS_LIMIT_FOR_EPOCH && val < MILLISECONDS_LIMIT_FOR_EPOCH) { - epochNanos = val * Power10.intTable[6]; - } else if (val > -MICROSECONDS_LIMIT_FOR_EPOCH && val < MICROSECONDS_LIMIT_FOR_EPOCH) { - epochNanos = val * Power10.intTable[3]; - } else { - epochNanos = val; + BigInteger epochNanos; + try { + long val = Long.parseLong(input); + + if (val > -SECONDS_LIMIT_FOR_EPOCH && val < SECONDS_LIMIT_FOR_EPOCH) { + epochNanos = BigInteger.valueOf(val).multiply(Power10.sb16Table[9]); + } else if (val > -MILLISECONDS_LIMIT_FOR_EPOCH && val < MILLISECONDS_LIMIT_FOR_EPOCH) { + epochNanos = BigInteger.valueOf(val).multiply(Power10.sb16Table[6]); + } else if (val > -MICROSECONDS_LIMIT_FOR_EPOCH && val < MICROSECONDS_LIMIT_FOR_EPOCH) { + epochNanos = BigInteger.valueOf(val).multiply(Power10.sb16Table[3]); + } else { + epochNanos = BigInteger.valueOf(val); + } + } catch (NumberFormatException e) { + // The input is bigger than max long value, treat it as nano-seconds directly + epochNanos = new BigInteger(input); } + return Instant.ofEpochSecond( - epochNanos / Power10.intTable[9], epochNanos % Power10.intTable[9]); + epochNanos.divide(Power10.sb16Table[9]).longValue(), + epochNanos.remainder(Power10.sb16Table[9]).longValue()); } /** diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/TimestampWrapper.java b/src/main/java/net/snowflake/ingest/streaming/internal/TimestampWrapper.java index 871cf8c14..a2dc55570 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/TimestampWrapper.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/TimestampWrapper.java @@ -74,7 +74,7 @@ public BigInteger toBinary(boolean includeTimezone) { } /** Get epoch in seconds */ - public long getEpoch() { + public long getEpochSecond() { return epoch; } diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java index 86706fcf2..9467899e2 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java @@ -26,6 +26,8 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.charset.StandardCharsets; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -251,20 +253,47 @@ public void testValidateAndParseTime() { } @Test - public void testValidateAndParseTimestamp() { + public void testValidateAndParseTimestamp() throws ParseException { TimestampWrapper wrapper = DataValidationUtil.validateAndParseTimestamp( "COL", "2021-01-01T01:00:00.123+01:00", 4, UTC, false, 0); - assertEquals(1609459200, wrapper.getEpoch()); + assertEquals(1609459200, wrapper.getEpochSecond()); assertEquals(123000000, wrapper.getFraction()); assertEquals(3600, wrapper.getTimezoneOffsetSeconds()); assertEquals(1500, wrapper.getTimeZoneIndex()); wrapper = validateAndParseTimestamp("COL", " 2021-01-01T01:00:00.123 \t\n", 9, UTC, true, 0); - Assert.assertEquals(1609462800, wrapper.getEpoch()); + Assert.assertEquals(1609462800, wrapper.getEpochSecond()); Assert.assertEquals(123000000, wrapper.getFraction()); Assert.assertEquals(new BigInteger("1609462800123000000"), wrapper.toBinary(false)); + // Test integer-stored time and scale guessing + SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + assertEquals( + BigInteger.valueOf(df.parse("1971-01-01 00:00:00.001").getTime()) + .multiply(BigInteger.valueOf(1000000)), + validateAndParseTimestamp("COL", "31536000001", 9, UTC, true, 0).toBinary(false)); + + assertEquals( + BigInteger.valueOf(df.parse("2969-05-02 23:59:59.999").getTime()) + .multiply(BigInteger.valueOf(1000000)), + validateAndParseTimestamp("COL", "31535999999999", 9, UTC, true, 0).toBinary(false)); + + assertEquals( + BigInteger.valueOf(df.parse("1971-01-01 00:00:00.000").getTime()) + .multiply(BigInteger.valueOf(1000000)), + validateAndParseTimestamp("COL", "31536000000000", 9, UTC, true, 0).toBinary(false)); + + assertEquals( + BigInteger.valueOf(df.parse("2969-05-02 23:59:59.999").getTime()) + .multiply(BigInteger.valueOf(1000000)), + validateAndParseTimestamp("COL", "31535999999999", 9, UTC, true, 0).toBinary(false)); + + assertEquals( + BigInteger.valueOf(df.parse("1971-01-01 00:00:00.000").getTime()) + .multiply(BigInteger.valueOf(1000000)), + validateAndParseTimestamp("COL", "31536000000000000", 9, UTC, true, 0).toBinary(false)); + // Time input is not supported expectError( ErrorCode.INVALID_VALUE_ROW, diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/AbstractDataTypeTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/AbstractDataTypeTest.java index b78473a11..6718a0aeb 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/AbstractDataTypeTest.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/AbstractDataTypeTest.java @@ -91,7 +91,9 @@ public void before() throws Exception { @After public void after() throws Exception { this.defaultTimezone = Optional.empty(); - conn.createStatement().executeQuery(String.format("drop database %s", databaseName)); + if (conn != null) { + conn.createStatement().executeQuery(String.format("drop database %s", databaseName)); + } if (client != null) { client.close(); } diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/DateTimeIT.java b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/DateTimeIT.java index 9bb8f349a..62829656c 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/DateTimeIT.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/datatypes/DateTimeIT.java @@ -27,7 +27,9 @@ public void setup() throws Exception { @After public void tearDown() throws Exception { - conn.createStatement().execute("alter session unset timezone;"); + if (conn != null) { + conn.createStatement().execute("alter session unset timezone;"); + } } @Test @@ -639,6 +641,36 @@ public void testTimestampWithoutTimeZone() throws Exception { "1916-12-09 10:57:53.876543211 Z", new StringProvider(), new StringProvider()); + testJdbcTypeCompatibility( + "TIMESTAMP_NTZ", + "31536000001", + "1971-01-01 00:00:00.001000000 Z", + new StringProvider(), + new StringProvider()); + testJdbcTypeCompatibility( + "TIMESTAMP_NTZ", + "31535999999999", + "2969-05-02 23:59:59.999000000 Z", + new StringProvider(), + new StringProvider()); + testJdbcTypeCompatibility( + "TIMESTAMP_NTZ", + "31536000000000", + "1971-01-01 00:00:00.000000000 Z", + new StringProvider(), + new StringProvider()); + testJdbcTypeCompatibility( + "TIMESTAMP_NTZ", + "31535999999999", + "2969-05-02 23:59:59.999000000 Z", + new StringProvider(), + new StringProvider()); + testJdbcTypeCompatibility( + "TIMESTAMP_NTZ", + "31536000000000000", + "1971-01-01 00:00:00.000000000 Z", + new StringProvider(), + new StringProvider()); // java.time.LocalDate testIngestion(