diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index a36d072b38a80..c1beb2d3a1350 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -391,6 +391,7 @@ impl<'a> Evaluator<'a> { | DataType::Nullable(box DataType::Binary) | DataType::Nullable(box DataType::Date) | DataType::Nullable(box DataType::Timestamp) + | DataType::Nullable(box DataType::TimestampTz) | DataType::Nullable(box DataType::Interval), ) => { // allow cast variant to nullable types. @@ -427,6 +428,7 @@ impl<'a> Evaluator<'a> { | DataType::Binary | DataType::Date | DataType::Timestamp + | DataType::TimestampTz | DataType::Interval, ) => { // allow cast variant to not null types. diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index a0f12f91fa952..cee156b29d065 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -835,7 +835,7 @@ pub fn get_simple_cast_function( { // parse JSON string to variant instead of cast "parse_json".to_owned() - } else if dest_type.is_timestamp_tz() { + } else if dest_type.remove_nullable().is_timestamp_tz() { "to_timestamp_tz".to_owned() } else { format!("to_{}", dest_type.to_string().to_lowercase()) diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 189aba6f52915..9e38a562a21e3 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use bstr::ByteSlice; use databend_common_column::types::months_days_micros; +use databend_common_column::types::timestamp_tz; use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Domain; @@ -61,8 +62,11 @@ use databend_common_expression::types::nullable::NullableColumnBuilder; use databend_common_expression::types::nullable::NullableDomain; use databend_common_expression::types::number::*; use databend_common_expression::types::string::StringColumnBuilder; +use databend_common_expression::types::timestamp::MICROS_PER_SEC; use databend_common_expression::types::timestamp::clamp_timestamp; use databend_common_expression::types::timestamp::string_to_timestamp; +use databend_common_expression::types::timestamp_tz::TimestampTzType; +use databend_common_expression::types::timestamp_tz::string_to_timestamp_tz; use databend_common_expression::types::variant::cast_scalar_to_variant; use databend_common_expression::types::variant::cast_scalars_to_variants; use databend_common_expression::vectorize_1_arg; @@ -71,6 +75,7 @@ use databend_common_expression::vectorize_with_builder_2_arg; use databend_common_expression::vectorize_with_builder_3_arg; use databend_common_expression::with_number_mapped_type; use databend_common_io::Interval; +use jiff::Timestamp; use jiff::Unit; use jiff::civil::date; use jiff::tz::TimeZone; @@ -849,6 +854,26 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); + registry.register_passthrough_nullable_1_arg::( + "is_timestamp_tz", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_1_arg::(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(false); + return; + } + } + match RawJsonb::new(v).is_timestamp_tz() { + Ok(res) => output.push(res), + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push(false); + } + } + }), + ); + registry.register_combine_nullable_1_arg::( "as_timestamp", |_, _| FunctionDomain::MayThrow, @@ -872,6 +897,32 @@ pub fn register(registry: &mut FunctionRegistry) { ), ); + registry.register_combine_nullable_1_arg::( + "as_timestamp_tz", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_1_arg::>( + |v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match RawJsonb::new(v).as_timestamp_tz() { + Ok(Some(res)) => { + let offset_seconds = i32::from(res.offset) * 3_600; + output.push(timestamp_tz::new(res.value, offset_seconds)); + } + Ok(None) => output.push_null(), + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push_null(); + } + } + }, + ), + ); + registry.register_passthrough_nullable_1_arg::( "is_interval", |_, _| FunctionDomain::MayThrow, @@ -1391,6 +1442,48 @@ pub fn register(registry: &mut FunctionRegistry) { ), ); + registry.register_combine_nullable_1_arg::( + "to_timestamp_tz", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_1_arg::>( + |val, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match cast_to_timestamp_tz(val, &ctx.func_ctx.tz) { + Ok(Some(ts)) => output.push(ts), + Ok(None) => output.push_null(), + Err(err) => { + ctx.set_error(output.len(), format!("{}", err)); + output.push_null(); + } + } + }, + ), + ); + + registry.register_combine_nullable_1_arg::( + "try_to_timestamp_tz", + |_, _| FunctionDomain::Full, + vectorize_with_builder_1_arg::>( + |val, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match cast_to_timestamp_tz(val, &ctx.func_ctx.tz) { + Ok(Some(ts)) => output.push(ts), + _ => output.push_null(), + } + }, + ), + ); + for dest_type in ALL_NUMERICS_TYPES { with_number_mapped_type!(|NUM_TYPE| match dest_type { NumberDataType::NUM_TYPE => { @@ -3377,6 +3470,38 @@ fn cast_to_timestamp(val: &[u8], tz: &TimeZone) -> Result, jsonb::Er } } +fn cast_to_timestamp_tz(val: &[u8], tz: &TimeZone) -> Result, jsonb::Error> { + let value = jsonb::from_slice(val)?; + match value { + JsonbValue::Null => Ok(None), + JsonbValue::TimestampTz(ts) => { + let offset_seconds = i32::from(ts.offset) * 3_600; + Ok(Some(timestamp_tz::new(ts.value, offset_seconds))) + } + JsonbValue::Timestamp(ts) => { + let mut value = ts.value; + clamp_timestamp(&mut value); + let timestamp = Timestamp::from_microsecond(value).map_err(|err| { + jsonb::Error::Message(format!("unable to cast to type `TIMESTAMP_TZ` {}.", err)) + })?; + let offset = tz.to_offset(timestamp); + Ok(Some(timestamp_tz::new( + value - (offset.seconds() as i64 * MICROS_PER_SEC), + offset.seconds(), + ))) + } + JsonbValue::String(s) => string_to_timestamp_tz(s.as_bytes(), || tz) + .map_err(|e| { + jsonb::Error::Message(format!( + "unable to cast to type `TIMESTAMP_TZ` {}.", + e.message() + )) + }) + .map(Some), + _ => Err(jsonb::Error::InvalidJsonType), + } +} + fn cast_to_interval(val: &[u8]) -> Result, jsonb::Error> { let value = jsonb::from_slice(val)?; match value { diff --git a/src/query/functions/tests/it/scalars/testdata/variant.txt b/src/query/functions/tests/it/scalars/testdata/variant.txt index 727f1ae895789..85a99bd8559dd 100644 --- a/src/query/functions/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions/tests/it/scalars/testdata/variant.txt @@ -1226,6 +1226,15 @@ output domain : {1746093600000000..=1746093600000000} output : '2025-05-01 10:00:00.000000' +ast : as_timestamp_tz(to_timestamp_tz('2025-05-01 10:00:00 +0800')::variant) +raw expr : as_timestamp_tz(CAST(to_timestamp_tz('2025-05-01 10:00:00 +0800') AS Variant)) +checked expr : as_timestamp_tz(CAST(CAST("2025-05-01 10:00:00 +0800" AS TimestampTz) AS Variant)) +optimized expr : timestamp_tz(531266231068899886540800) +output type : TimestampTz NULL +output domain : {2025-05-01 10:00:00.000000 +0800..=2025-05-01 10:00:00.000000 +0800} +output : '2025-05-01 10:00:00.000000 +0800' + + ast : as_interval(to_interval('1 year 2 month')::variant) raw expr : as_interval(CAST(to_interval('1 year 2 month') AS Variant)) checked expr : as_interval(CAST(CAST("1 year 2 month" AS Interval) AS Variant)) @@ -1562,6 +1571,15 @@ output domain : {TRUE} output : true +ast : is_timestamp_tz(to_timestamp_tz('2025-05-01 10:00:00 +0800')::variant) +raw expr : is_timestamp_tz(CAST(to_timestamp_tz('2025-05-01 10:00:00 +0800') AS Variant)) +checked expr : is_timestamp_tz(CAST(CAST("2025-05-01 10:00:00 +0800" AS TimestampTz) AS Variant)) +optimized expr : true +output type : Boolean +output domain : {TRUE} +output : true + + ast : is_interval(to_interval('1 year 2 month')::variant) raw expr : is_interval(CAST(to_interval('1 year 2 month') AS Variant)) checked expr : is_interval(CAST(CAST("1 year 2 month" AS Interval) AS Variant)) @@ -1916,6 +1934,41 @@ error: +ast : to_timestamp_tz(parse_json('null')) +raw expr : to_timestamp_tz(parse_json('null')) +checked expr : CAST(CAST("null" AS Variant) AS TimestampTz NULL) +optimized expr : NULL +output type : TimestampTz NULL +output domain : {NULL} +output : NULL + + +ast : to_timestamp_tz(parse_json('"2023-01-01 00:00:00 +0000"')) +raw expr : to_timestamp_tz(parse_json('"2023-01-01 00:00:00 +0000"')) +checked expr : CAST(CAST("\"2023-01-01 00:00:00 +0000\"" AS Variant) AS TimestampTz NULL) +optimized expr : timestamp_tz(1672531200000000) +output type : TimestampTz NULL +output domain : {2023-01-01 00:00:00.000000 +0000..=2023-01-01 00:00:00.000000 +0000} +output : '2023-01-01 00:00:00.000000 +0000' + + +ast : to_timestamp_tz(parse_json('"2023-01-01 08:00:00 +0800"')) +raw expr : to_timestamp_tz(parse_json('"2023-01-01 08:00:00 +0800"')) +checked expr : CAST(CAST("\"2023-01-01 08:00:00 +0800\"" AS Variant) AS TimestampTz NULL) +optimized expr : timestamp_tz(531266230995366286540800) +output type : TimestampTz NULL +output domain : {2023-01-01 08:00:00.000000 +0800..=2023-01-01 08:00:00.000000 +0800} +output : '2023-01-01 08:00:00.000000 +0800' + + +error: + --> SQL:1:1 + | +1 | to_timestamp_tz(parse_json('"abc"')) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ unable to cast to type `TIMESTAMP_TZ` strptime parsing failed: %Y failed: failed to parse year: invalid number, no digits found. while evaluating function `to_timestamp_tz('"abc"')` in expr `CAST(CAST('"abc"' AS Variant) AS TimestampTz NULL)` + + + ast : to_string(parse_json('null')) raw expr : to_string(parse_json('null')) checked expr : CAST(CAST("null" AS Variant) AS String NULL) @@ -2206,6 +2259,28 @@ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------+ +ast : to_timestamp_tz(parse_json(s)) +raw expr : to_timestamp_tz(parse_json(s::String NULL)) +checked expr : CAST(CAST(s AS Variant NULL) AS TimestampTz NULL) +evaluation: ++--------+------------------------------------------------------------------------------+------------------------------------+ +| | s | Output | ++--------+------------------------------------------------------------------------------+------------------------------------+ +| Type | String NULL | TimestampTz NULL | +| Domain | {"\"2020-01-01 00:00:00 +0000\""..="\"2023-10-01 10:11:12 +0800\""} ∪ {NULL} | Unknown | +| Row 0 | '"2020-01-01 00:00:00 +0000"' | '2020-01-01 00:00:00.000000 +0000' | +| Row 1 | NULL | NULL | +| Row 2 | '"2023-10-01 10:11:12 +0800"' | '2023-10-01 10:11:12.000000 +0800' | ++--------+------------------------------------------------------------------------------+------------------------------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | Column(NullableColumn { column: StringColumn["2020-01-01 00:00:00 +0000", , "2023-10-01 10:11:12 +0800"], validity: [0b_____101] }) | +| Output | NullableColumn { column: TimestampTz([timestamp_tz(1577836800000000), timestamp_tz(0), timestamp_tz(531266231018961358540800)]), validity: [0b_____101] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ + + ast : to_string(parse_json(s)) raw expr : to_string(parse_json(s::String NULL)) checked expr : CAST(CAST(s AS Variant NULL) AS String NULL) @@ -2399,6 +2474,42 @@ output domain : {NULL} output : NULL +ast : try_to_timestamp_tz(parse_json('null')) +raw expr : try_to_timestamp_tz(parse_json('null')) +checked expr : try_to_timestamp_tz(CAST("null" AS Variant)) +optimized expr : NULL +output type : TimestampTz NULL +output domain : {NULL} +output : NULL + + +ast : try_to_timestamp_tz(parse_json('"2023-01-01 00:00:00 +0000"')) +raw expr : try_to_timestamp_tz(parse_json('"2023-01-01 00:00:00 +0000"')) +checked expr : try_to_timestamp_tz(CAST("\"2023-01-01 00:00:00 +0000\"" AS Variant)) +optimized expr : timestamp_tz(1672531200000000) +output type : TimestampTz NULL +output domain : {2023-01-01 00:00:00.000000 +0000..=2023-01-01 00:00:00.000000 +0000} +output : '2023-01-01 00:00:00.000000 +0000' + + +ast : try_to_timestamp_tz(parse_json('"2023-01-01 08:00:00 +0800"')) +raw expr : try_to_timestamp_tz(parse_json('"2023-01-01 08:00:00 +0800"')) +checked expr : try_to_timestamp_tz(CAST("\"2023-01-01 08:00:00 +0800\"" AS Variant)) +optimized expr : timestamp_tz(531266230995366286540800) +output type : TimestampTz NULL +output domain : {2023-01-01 08:00:00.000000 +0800..=2023-01-01 08:00:00.000000 +0800} +output : '2023-01-01 08:00:00.000000 +0800' + + +ast : try_to_timestamp_tz(parse_json('"abc"')) +raw expr : try_to_timestamp_tz(parse_json('"abc"')) +checked expr : try_to_timestamp_tz(CAST("\"abc\"" AS Variant)) +optimized expr : NULL +output type : TimestampTz NULL +output domain : {NULL} +output : NULL + + ast : try_to_string(parse_json('null')) raw expr : try_to_string(parse_json('null')) checked expr : TRY_CAST(CAST("null" AS Variant) AS String NULL) @@ -2588,6 +2699,33 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +ast : try_to_timestamp_tz(parse_json(s)) +raw expr : try_to_timestamp_tz(parse_json(s::String NULL)) +checked expr : try_to_timestamp_tz(CAST(s AS Variant NULL)) +evaluation: ++--------+--------------------------------------+------------------------------------+ +| | s | Output | ++--------+--------------------------------------+------------------------------------+ +| Type | String NULL | TimestampTz NULL | +| Domain | {"\"2020-01-01\""..="true"} ∪ {NULL} | Unknown | +| Row 0 | 'true' | NULL | +| Row 1 | '123' | NULL | +| Row 2 | '-100' | NULL | +| Row 3 | '12.34' | NULL | +| Row 4 | NULL | NULL | +| Row 5 | '"2020-01-01"' | '2020-01-01 00:00:00.000000 +0000' | +| Row 6 | '"2021-01-01 20:00:00"' | '2021-01-01 20:00:00.000000 +0000' | +| Row 7 | '"abc"' | NULL | ++--------+--------------------------------------+------------------------------------+ +evaluation (internal): ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | Column(NullableColumn { column: StringColumn[true, 123, -100, 12.34, , "2020-01-01", "2021-01-01 20:00:00", "abc"], validity: [0b11101111] }) | +| Output | NullableColumn { column: TimestampTz([timestamp_tz(0), timestamp_tz(0), timestamp_tz(0), timestamp_tz(0), timestamp_tz(0), timestamp_tz(1577836800000000), timestamp_tz(1609531200000000), timestamp_tz(0)]), validity: [0b01100000] } | ++--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + ast : try_to_string(parse_json(s)) raw expr : try_to_string(parse_json(s::String NULL)) checked expr : TRY_CAST(CAST(s AS Variant NULL) AS String NULL) diff --git a/src/query/functions/tests/it/scalars/variant.rs b/src/query/functions/tests/it/scalars/variant.rs index d56658fac5180..67dbe2cc5a549 100644 --- a/src/query/functions/tests/it/scalars/variant.rs +++ b/src/query/functions/tests/it/scalars/variant.rs @@ -554,6 +554,11 @@ fn test_as_type(file: &mut impl Write) { "as_timestamp(to_timestamp('2025-05-01 10:00:00')::variant)", &[], ); + run_ast( + file, + "as_timestamp_tz(to_timestamp_tz('2025-05-01 10:00:00 +0800')::variant)", + &[], + ); run_ast( file, "as_interval(to_interval('1 year 2 month')::variant)", @@ -612,6 +617,11 @@ fn test_is_type(file: &mut impl Write) { "is_timestamp(to_timestamp('2025-05-01 10:00:00')::variant)", &[], ); + run_ast( + file, + "is_timestamp_tz(to_timestamp_tz('2025-05-01 10:00:00 +0800')::variant)", + &[], + ); run_ast( file, "is_interval(to_interval('1 year 2 month')::variant)", @@ -663,6 +673,18 @@ fn test_to_type(file: &mut impl Write) { &[], ); run_ast(file, "to_timestamp(parse_json('\"abc\"'))", &[]); + run_ast(file, "to_timestamp_tz(parse_json('null'))", &[]); + run_ast( + file, + "to_timestamp_tz(parse_json('\"2023-01-01 00:00:00 +0000\"'))", + &[], + ); + run_ast( + file, + "to_timestamp_tz(parse_json('\"2023-01-01 08:00:00 +0800\"'))", + &[], + ); + run_ast(file, "to_timestamp_tz(parse_json('\"abc\"'))", &[]); run_ast(file, "to_string(parse_json('null'))", &[]); run_ast(file, "to_string(parse_json('12.34'))", &[]); run_ast(file, "to_string(parse_json('\"abc\"'))", &[]); @@ -740,6 +762,17 @@ fn test_to_type(file: &mut impl Write) { vec![true, false, true], ), )]); + run_ast(file, "to_timestamp_tz(parse_json(s))", &[( + "s", + StringType::from_data_with_validity( + vec![ + "\"2020-01-01 00:00:00 +0000\"", + "", + "\"2023-10-01 10:11:12 +0800\"", + ], + vec![true, false, true], + ), + )]); run_ast(file, "to_string(parse_json(s))", &[( "s", StringType::from_data_with_validity(vec!["\"abc\"", "", "123"], vec![true, false, true]), @@ -770,6 +803,18 @@ fn test_try_to_type(file: &mut impl Write) { &[], ); run_ast(file, "try_to_timestamp(parse_json('\"abc\"'))", &[]); + run_ast(file, "try_to_timestamp_tz(parse_json('null'))", &[]); + run_ast( + file, + "try_to_timestamp_tz(parse_json('\"2023-01-01 00:00:00 +0000\"'))", + &[], + ); + run_ast( + file, + "try_to_timestamp_tz(parse_json('\"2023-01-01 08:00:00 +0800\"'))", + &[], + ); + run_ast(file, "try_to_timestamp_tz(parse_json('\"abc\"'))", &[]); run_ast(file, "try_to_string(parse_json('null'))", &[]); run_ast(file, "try_to_string(parse_json('12.34'))", &[]); run_ast(file, "try_to_string(parse_json('\"abc\"'))", &[]); @@ -796,6 +841,7 @@ fn test_try_to_type(file: &mut impl Write) { run_ast(file, "try_to_float64(parse_json(s))", columns); run_ast(file, "try_to_date(parse_json(s))", columns); run_ast(file, "try_to_timestamp(parse_json(s))", columns); + run_ast(file, "try_to_timestamp_tz(parse_json(s))", columns); run_ast(file, "try_to_string(parse_json(s))", columns); } diff --git a/tests/sqllogictests/suites/query/functions/02_0002_function_cast.test b/tests/sqllogictests/suites/query/functions/02_0002_function_cast.test index bb1962ea9cc8c..c0d7bff50379b 100644 --- a/tests/sqllogictests/suites/query/functions/02_0002_function_cast.test +++ b/tests/sqllogictests/suites/query/functions/02_0002_function_cast.test @@ -469,6 +469,11 @@ SELECT parse_json('"2022-01-01 01:01:01"')::datetime ---- 2022-01-01 01:01:01.000000 +query T +SELECT parse_json('"2022-01-01 01:01:01 +0800"')::timestamp_tz +---- +2022-01-01 01:01:01.000000 +0800 + statement error 1006 SELECT parse_json('"test"')::date @@ -477,12 +482,20 @@ SELECT parse_json('"test"')::date statement error 1006 SELECT parse_json('"test"')::datetime +statement error 1006 +SELECT parse_json('"test"')::timestamp_tz + query T SELECT parse_json('null')::datetime ---- NULL +query T +SELECT parse_json('null')::timestamp_tz +---- +NULL + query T SELECT as_array(parse_json('[1,2,3]')) ---- diff --git a/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test b/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test index 231a780aeb127..56fbcace06251 100644 --- a/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test +++ b/tests/sqllogictests/suites/query/functions/02_0056_function_semi_structureds_as.test @@ -73,6 +73,11 @@ select to_timestamp('2025-01-01 10:00:00')::variant, to_timestamp(to_timestamp(' ---- "2025-01-01 10:00:00.000000" 2025-01-01 10:00:00.000000 2025-01-01 10:00:00.000000 2025-01-01 10:00:00.000000 +query TTTT +select to_timestamp_tz('2025-01-01 10:00:00 +0800')::variant, to_timestamp_tz(to_timestamp_tz('2025-01-01 10:00:00 +0800')::variant), to_timestamp_tz(parse_json('"2025-01-01 10:00:00 +0800"')), as_timestamp_tz(to_timestamp_tz('2025-01-01 10:00:00 +0800')::variant) +---- +"2025-01-01 10:00:00.000000" 2025-01-01 10:00:00.000000 +0800 2025-01-01 10:00:00.000000 +0800 2025-01-01 10:00:00.000000 +0800 + query TTTT select to_interval('10 months 2 days')::variant, to_interval(to_interval('10 months 2 days')::variant), to_interval(parse_json('"10 months 2 days"')), as_interval(to_interval('10 months 2 days')::variant) ---- @@ -168,6 +173,11 @@ select is_timestamp(to_timestamp('2025-01-01 10:00:00')::variant) ---- 1 +query B +select is_timestamp_tz(to_timestamp_tz('2025-01-01 10:00:00 +0800')::variant) +---- +1 + query B select is_interval(to_interval('10 months 2 days')::variant) ----