Skip to content

Commit 16bc664

Browse files
committed
[CALCITE-6062] Parse timestamps more permissively
1 parent 9bf5b33 commit 16bc664

File tree

1 file changed

+51
-34
lines changed

1 file changed

+51
-34
lines changed

core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import java.text.Normalizer;
8686
import java.text.ParsePosition;
8787
import java.text.SimpleDateFormat;
88+
import java.time.DateTimeException;
8889
import java.time.Instant;
8990
import java.time.LocalDate;
9091
import java.time.LocalDateTime;
@@ -149,6 +150,8 @@ public class SqlFunctions {
149150
private static final DecimalFormat DOUBLE_FORMAT =
150151
NumberUtil.decimalFormat("0.0E0");
151152

153+
private static final ZoneId UTC_ZONE_ID = ZoneId.of("UTC");
154+
152155
private static final TimeZone LOCAL_TZ = TimeZone.getDefault();
153156

154157
private static final DateTimeFormatter ROOT_DAY_FORMAT =
@@ -208,34 +211,37 @@ public class SqlFunctions {
208211
private static final ByteString SINGLE_SPACE_BYTE_STRING =
209212
ByteString.of("20", 16);
210213

211-
// Date formatter for BigQuery's timestamp literals:
212-
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#timestamp_literals
213-
private static final DateTimeFormatter BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER =
214+
/** Date formatter used to *parse* timestamp literals. */
215+
private static final DateTimeFormatter TIMESTAMP_LITERAL_FORMATTER =
214216
new DateTimeFormatterBuilder()
215-
// Unlike ISO 8601, BQ only supports years between 1 - 9999,
216-
// but can support single-digit month and day parts.
217+
// Support 4-digit years between 0001 - 9999.
218+
// Month and day parts may be single-digit.
217219
.appendValue(ChronoField.YEAR, 4)
218220
.appendLiteral('-')
219221
.appendValue(ChronoField.MONTH_OF_YEAR, 1, 2, SignStyle.NOT_NEGATIVE)
220222
.appendLiteral('-')
221223
.appendValue(ChronoField.DAY_OF_MONTH, 1, 2, SignStyle.NOT_NEGATIVE)
222224
// Everything after the date is optional. Optional sections can be nested.
223225
.optionalStart()
224-
// BQ accepts either a literal 'T' or a space to separate the date from the time,
226+
// Accept either a literal 'T' or a space to separate the date from the time,
225227
// so make the 'T' optional but pad with 1 space if it's omitted.
226228
.padNext(1, ' ')
227229
.optionalStart()
230+
.parseCaseInsensitive()
228231
.appendLiteral('T')
229232
.optionalEnd()
230-
// Unlike ISO 8601, BQ can support single-digit hour, minute, and second parts.
233+
// Support single-digit hour, minute, and second parts.
231234
.appendValue(ChronoField.HOUR_OF_DAY, 1, 2, SignStyle.NOT_NEGATIVE)
232235
.appendLiteral(':')
233236
.appendValue(ChronoField.MINUTE_OF_HOUR, 1, 2, SignStyle.NOT_NEGATIVE)
234237
.appendLiteral(':')
235238
.appendValue(ChronoField.SECOND_OF_MINUTE, 1, 2, SignStyle.NOT_NEGATIVE)
236-
// ISO 8601 supports up to nanosecond precision, but BQ only up to microsecond.
239+
// Calcite's internal representation for timestamps (integer milliseconds since epoch)
240+
// does not support nanosecond precision, but we will pretend like it does for the purpose
241+
// of parsing timestamp literals. Sub-millisecond precision will be truncated :(.
242+
// See [CALCITE-5308].
237243
.optionalStart()
238-
.appendFraction(ChronoField.MICRO_OF_SECOND, 0, 6, true)
244+
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
239245
.optionalEnd()
240246
.optionalStart()
241247
.parseLenient()
@@ -4152,25 +4158,40 @@ public static long datetime(long millisSinceEpoch, String timeZone) {
41524158
public static long timestamp(String expression) {
41534159
// Calcite represents TIMESTAMP WITH LOCAL TIME ZONE as Unix integers
41544160
// (milliseconds since epoch).
4155-
return parseBigQueryTimestampLiteral(expression).toInstant().toEpochMilli();
4161+
return parseTimestampLiteralPermissively(expression, UTC_ZONE_ID).toInstant().toEpochMilli();
41564162
}
41574163

41584164
/** SQL {@code TIMESTAMP(<string>, <timeZone>)} function. */
41594165
public static long timestamp(String expression, String timeZone) {
41604166
// Calcite represents TIMESTAMP WITH LOCAL TIME ZONE as Unix integers
41614167
// (milliseconds since epoch).
4162-
return parseBigQueryTimestampLiteral(expression)
4163-
.atZoneSimilarLocal(ZoneId.of(timeZone))
4168+
return parseTimestampLiteralPermissively(expression, ZoneId.of(timeZone))
41644169
.toInstant()
41654170
.toEpochMilli();
41664171
}
41674172

4168-
private static OffsetDateTime parseBigQueryTimestampLiteral(String expression) {
4169-
// First try to parse with an offset, otherwise parse as a local and assume
4170-
// UTC ("no offset").
4173+
private static OffsetDateTime parseTimestampLiteralPermissively(
4174+
String expression, ZoneId defaultZoneId) {
4175+
// First, look for a zone ID, e.g. "America/Los_Angeles", at the end of the expression.
4176+
// This is different from a zone offset, e.g. "-07:00".
4177+
final int lastSpaceIndex = expression.lastIndexOf(' ');
4178+
if (lastSpaceIndex > 0 && lastSpaceIndex < expression.length() - 1) {
4179+
final String maybeZoneId = expression.substring(lastSpaceIndex + 1);
4180+
try {
4181+
// Look up the zone ID, supplanting defaultZoneId if it's valid.
4182+
defaultZoneId = ZoneId.of(maybeZoneId);
4183+
// If the zone ID lookup succeeded, parse the rest of the expression without it.
4184+
// We'll apply the right offset before returning.
4185+
expression = expression.substring(0, lastSpaceIndex);
4186+
} catch (DateTimeException e) {
4187+
// maybeZoneId lookup failed. Neither the expression nor defaultZoneId has been modified.
4188+
}
4189+
}
4190+
4191+
// Try to parse with an offset,
4192+
// otherwise parse as a local datetime and apply the default zone ID.
41714193
try {
4172-
return OffsetDateTime.parse(expression,
4173-
BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER);
4194+
return OffsetDateTime.parse(expression, TIMESTAMP_LITERAL_FORMATTER);
41744195
} catch (DateTimeParseException e) {
41754196
// ignore
41764197
}
@@ -4180,20 +4201,19 @@ private static OffsetDateTime parseBigQueryTimestampLiteral(String expression) {
41804201
// match "+00:00".
41814202
try {
41824203
expression += ":00";
4183-
return OffsetDateTime.parse(expression,
4184-
BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER);
4204+
return OffsetDateTime.parse(expression, TIMESTAMP_LITERAL_FORMATTER);
41854205
} catch (DateTimeParseException e) {
41864206
// ignore
41874207
}
41884208
}
41894209
try {
4190-
return LocalDateTime
4191-
.parse(expression, BIG_QUERY_TIMESTAMP_LITERAL_FORMATTER)
4192-
.atOffset(ZoneOffset.UTC);
4210+
LocalDateTime localDateTime =
4211+
LocalDateTime.parse(expression, TIMESTAMP_LITERAL_FORMATTER);
4212+
return localDateTime.atOffset(
4213+
defaultZoneId.getRules().getOffset(localDateTime));
41934214
} catch (DateTimeParseException e2) {
41944215
throw new IllegalArgumentException(
4195-
String.format(Locale.ROOT,
4196-
"Could not parse BigQuery timestamp literal: %s", expression),
4216+
String.format(Locale.ROOT, "Could not parse timestamp literal: %s", expression),
41974217
e2);
41984218
}
41994219
}
@@ -4235,8 +4255,7 @@ public static long timestamp(long millisSinceEpoch, String timeZone) {
42354255
// TIME ZONE and TIMESTAMP, respectively) are represented internally as
42364256
// milliseconds since epoch UTC and epoch.
42374257
final Instant instant = Instant.ofEpochMilli(millisSinceEpoch);
4238-
final ZoneId utcZone = ZoneId.of("UTC");
4239-
return OffsetDateTime.ofInstant(instant, utcZone)
4258+
return OffsetDateTime.ofInstant(instant, UTC_ZONE_ID)
42404259
.atZoneSimilarLocal(ZoneId.of(timeZone))
42414260
.toInstant()
42424261
.toEpochMilli();
@@ -4279,21 +4298,19 @@ public static int time(long timestampMillis, String timeZone) {
42794298
if (v == null) {
42804299
return castNonNull(null);
42814300
}
4282-
return new TimestampWithTimeZoneString(v)
4283-
.withTimeZone(DateTimeUtils.UTC_ZONE)
4284-
.getLocalTimestampString()
4285-
.getMillisSinceEpoch();
4301+
return parseTimestampLiteralPermissively(v, UTC_ZONE_ID)
4302+
.toInstant()
4303+
.toEpochMilli();
42864304
}
42874305

42884306
public static @PolyNull Long toTimestampWithLocalTimeZone(@PolyNull String v,
42894307
TimeZone timeZone) {
42904308
if (v == null) {
42914309
return castNonNull(null);
42924310
}
4293-
return new TimestampWithTimeZoneString(v + " " + timeZone.getID())
4294-
.withTimeZone(DateTimeUtils.UTC_ZONE)
4295-
.getLocalTimestampString()
4296-
.getMillisSinceEpoch();
4311+
return parseTimestampLiteralPermissively(v, timeZone.toZoneId())
4312+
.toInstant()
4313+
.toEpochMilli();
42974314
}
42984315

42994316
// Don't need shortValueOf etc. - Short.valueOf is sufficient.

0 commit comments

Comments
 (0)