Skip to content

Commit a2e5330

Browse files
mhiltonalamb
andauthored
fix: date_bin() on timstamps before 1970 (#13204)
* fix: date_bin() on timstamps before 1970 The date_bin() function was not working correctly for timestamps before 1970. Specifically if the input timestamp was the exact time of the start of a bin then it would be placed in the previous bin. The % operator has a negative result when the dividend is negative. This causes the date_bin calculation to round up to the next bin. To compensate the size of 1 interval is subtracted from the result if the input is negative. This subtraction is no longer performed if the input is already the exact time of the start of a bin. * fix clippy --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 592b924 commit a2e5330

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

datafusion/functions/src/datetime/date_bin.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 {
240240
fn compute_distance(time_diff: i64, stride: i64) -> i64 {
241241
let time_delta = time_diff - (time_diff % stride);
242242

243-
if time_diff < 0 && stride > 1 {
243+
if time_diff < 0 && stride > 1 && time_delta != time_diff {
244244
// The origin is later than the source timestamp, round down to the previous bin
245245
time_delta - stride
246246
} else {
@@ -864,4 +864,32 @@ mod tests {
864864
assert_eq!(result, expected1, "{source} = {expected}");
865865
})
866866
}
867+
868+
#[test]
869+
fn test_date_bin_before_epoch() {
870+
let cases = [
871+
(
872+
(TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"),
873+
"1969-12-31T23:30:00",
874+
),
875+
(
876+
(TimeDelta::try_minutes(15), "1969-12-31T23:45:00"),
877+
"1969-12-31T23:45:00",
878+
),
879+
(
880+
(TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"),
881+
"1969-12-31T23:45:00",
882+
),
883+
];
884+
885+
cases.iter().for_each(|((stride, source), expected)| {
886+
let stride = stride.unwrap();
887+
let stride1 = stride.num_nanoseconds().unwrap();
888+
let source1 = string_to_timestamp_nanos(source).unwrap();
889+
890+
let expected1 = string_to_timestamp_nanos(expected).unwrap();
891+
let result = date_bin_nanos_interval(stride1, source1, 0);
892+
assert_eq!(result, expected1, "{source} = {expected}");
893+
})
894+
}
867895
}

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z');
980980
----
981981
2022-06-01T00:00:00
982982

983+
# Times before the unix epoch
984+
query P
985+
select date_bin('1 hour', column1)
986+
from (values
987+
(timestamp '1969-01-01 00:00:00'),
988+
(timestamp '1969-01-01 00:15:00'),
989+
(timestamp '1969-01-01 00:30:00'),
990+
(timestamp '1969-01-01 00:45:00'),
991+
(timestamp '1969-01-01 01:00:00')
992+
) as sq
993+
----
994+
1969-01-01T00:00:00
995+
1969-01-01T00:00:00
996+
1969-01-01T00:00:00
997+
1969-01-01T00:00:00
998+
1969-01-01T01:00:00
999+
9831000
###
9841001
## test date_trunc function
9851002
###

0 commit comments

Comments
 (0)