From ea15184005a0c349ba17319cf57ce7cfdda754ed Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Fri, 7 Jul 2023 15:28:04 -0700 Subject: [PATCH] updated get_format, extract, timestampdiff, and timestampadd to be in line with sql, as well as updated tests and documentation Signed-off-by: Matthew Wells --- docs/user/ppl/functions/datetime.rst | 50 +++++++-------- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 62 +++++++++++++++++-- .../sql/ppl/parser/AstExpressionBuilder.java | 51 +++++++++++++++ .../sql/ppl/antlr/PPLSyntaxParserTest.java | 50 +++++++++++++++ 4 files changed, 184 insertions(+), 29 deletions(-) diff --git a/docs/user/ppl/functions/datetime.rst b/docs/user/ppl/functions/datetime.rst index fbf4f1a445..7cdcd19fb0 100644 --- a/docs/user/ppl/functions/datetime.rst +++ b/docs/user/ppl/functions/datetime.rst @@ -875,13 +875,13 @@ Return type: LONG Example:: - os> source=people | eval `extract('YEAR_MONTH', "2023-02-07 10:11:12")` = extract('YEAR_MONTH', "2023-02-07 10:11:12") | fields `extract('YEAR_MONTH', "2023-02-07 10:11:12")` + os> source=people | eval `extract(YEAR_MONTH, "2023-02-07 10:11:12")` = extract(YEAR_MONTH, "2023-02-07 10:11:12") | fields `extract(YEAR_MONTH, "2023-02-07 10:11:12")` fetched rows / total rows = 1/1 - +------------------------------------------------+ - | extract('YEAR_MONTH', "2023-02-07 10:11:12") | - |------------------------------------------------| - | 202302 | - +------------------------------------------------+ + +----------------------------------------------+ + | extract(YEAR_MONTH, "2023-02-07 10:11:12") | + |----------------------------------------------| + | 202302 | + +----------------------------------------------+ FROM_DAYS @@ -951,18 +951,18 @@ Description Usage: Returns a string value containing string format specifiers based on the input arguments. -Argument type: TYPE, STRING, where TYPE must be one of the following tokens: ["DATE", "TIME", "DATETIME", TIMESTAMP"], and +Argument type: TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, DATETIME, TIMESTAMP], and STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). Examples:: - os> source=people | eval `GET_FORMAT('DATE', 'USA')` = GET_FORMAT('DATE', 'USA') | fields `GET_FORMAT('DATE', 'USA')` + os> source=people | eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') | fields `GET_FORMAT(DATE, 'USA')` fetched rows / total rows = 1/1 - +-----------------------------+ - | GET_FORMAT('DATE', 'USA') | - |-----------------------------| - | %m.%d.%Y | - +-----------------------------+ + +---------------------------+ + | GET_FORMAT(DATE, 'USA') | + |---------------------------| + | %m.%d.%Y | + +---------------------------+ HOUR @@ -1817,13 +1817,13 @@ INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR Examples:: - os> source=people | eval `TIMESTAMPADD('DAY', 17, '2000-01-01 00:00:00')` = TIMESTAMPADD('DAY', 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD('QUARTER', -1, '2000-01-01 00:00:00')` = TIMESTAMPADD('QUARTER', -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD('DAY', 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD('QUARTER', -1, '2000-01-01 00:00:00')` + os> source=people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` fetched rows / total rows = 1/1 - +--------------------------------------------------+------------------------------------------------------+ - | TIMESTAMPADD('DAY', 17, '2000-01-01 00:00:00') | TIMESTAMPADD('QUARTER', -1, '2000-01-01 00:00:00') | - |--------------------------------------------------+------------------------------------------------------| - | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | - +--------------------------------------------------+------------------------------------------------------+ + +------------------------------------------------+----------------------------------------------------+ + | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | + |------------------------------------------------+----------------------------------------------------| + | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | + +------------------------------------------------+----------------------------------------------------+ TIMESTAMPDIFF @@ -1842,13 +1842,13 @@ INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR Examples:: - os> source=people | eval `TIMESTAMPDIFF('YEAR', '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF('YEAR', '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF('SECOND', time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF('SECOND', time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF('YEAR', '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF('SECOND', time('00:00:23'), time('00:00:00'))` + os> source=people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` fetched rows / total rows = 1/1 - +-----------------------------------------------------------------------+---------------------------------------------------------------+ - | TIMESTAMPDIFF('YEAR', '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF('SECOND', time('00:00:23'), time('00:00:00')) | - |-----------------------------------------------------------------------+---------------------------------------------------------------| - | 4 | -23 | - +-----------------------------------------------------------------------+---------------------------------------------------------------+ + +---------------------------------------------------------------------+-------------------------------------------------------------+ + | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | + |---------------------------------------------------------------------+-------------------------------------------------------------| + | 4 | -23 | + +---------------------------------------------------------------------+-------------------------------------------------------------+ TO_DAYS diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index f6c3ad01af..9cde1bfbb8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -287,6 +287,9 @@ valueExpression right=valueExpression #binaryArithmetic | primaryExpression #valueExpressionDefault | positionFunction #positionFunctionCall + | extractFunction #extractFunctionCall + | getFormatFunction #getFormatFunctionCall + | timestampFunction #timestampFunctionCall | LT_PRTHS valueExpression RT_PRTHS #parentheticValueExpr ; @@ -534,10 +537,8 @@ dateTimeFunctionName | DAY_OF_MONTH | DAY_OF_WEEK | DAY_OF_YEAR - | EXTRACT | FROM_DAYS | FROM_UNIXTIME - | GET_FORMAT | HOUR | HOUR_OF_DAY | LAST_DAY @@ -566,8 +567,6 @@ dateTimeFunctionName | TIME | TIMEDIFF | TIMESTAMP - | TIMESTAMPADD - | TIMESTAMPDIFF | TIME_FORMAT | TIME_TO_SEC | TO_DAYS @@ -583,6 +582,61 @@ dateTimeFunctionName | YEARWEEK ; +getFormatFunction + : GET_FORMAT LT_PRTHS getFormatType COMMA functionArg RT_PRTHS + ; + +getFormatType + : DATE + | DATETIME + | TIME + | TIMESTAMP + ; + +extractFunction + : EXTRACT LT_PRTHS datetimePart FROM functionArg RT_PRTHS + ; + +simpleDateTimePart + : MICROSECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + ; + +complexDateTimePart + : SECOND_MICROSECOND + | MINUTE_MICROSECOND + | MINUTE_SECOND + | HOUR_MICROSECOND + | HOUR_SECOND + | HOUR_MINUTE + | DAY_MICROSECOND + | DAY_SECOND + | DAY_MINUTE + | DAY_HOUR + | YEAR_MONTH + ; + +datetimePart + : simpleDateTimePart + | complexDateTimePart + ; + +timestampFunction + : timestampFunctionName LT_PRTHS simpleDateTimePart COMMA firstArg=functionArg COMMA secondArg=functionArg RT_PRTHS + ; + +timestampFunctionName + : TIMESTAMPADD + | TIMESTAMPDIFF + ; + /** condition function return boolean value */ conditionFunctionBase : LIKE diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index eddee3064e..9c8fd5c7b1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -297,6 +297,57 @@ public UnresolvedExpression visitPositionFunction( visitFunctionArg(ctx.functionArg(1)))); } + @Override + public UnresolvedExpression visitExtractFunctionCall(OpenSearchPPLParser.ExtractFunctionCallContext ctx) { + return new Function( + ctx.extractFunction().EXTRACT().toString(), + getExtractFunctionArguments(ctx)); + } + + private List getExtractFunctionArguments( + OpenSearchPPLParser.ExtractFunctionCallContext ctx) { + List args = Arrays.asList( + new Literal(ctx.extractFunction().datetimePart().getText(), DataType.STRING), + visitFunctionArg(ctx.extractFunction().functionArg()) + ); + return args; + } + + @Override + public UnresolvedExpression visitGetFormatFunctionCall(OpenSearchPPLParser.GetFormatFunctionCallContext ctx) { + return new Function( + ctx.getFormatFunction().GET_FORMAT().toString(), + getFormatFunctionArguments(ctx)); + } + + private List getFormatFunctionArguments( + OpenSearchPPLParser.GetFormatFunctionCallContext ctx) { + List args = Arrays.asList( + new Literal(ctx.getFormatFunction().getFormatType().getText(), DataType.STRING), + visitFunctionArg(ctx.getFormatFunction().functionArg()) + ); + return args; + } + + @Override + public UnresolvedExpression visitTimestampFunctionCall(OpenSearchPPLParser.TimestampFunctionCallContext ctx) { + return new Function( + ctx.timestampFunction().timestampFunctionName().getText(), + timestampFunctionArguments(ctx)); + } + + private List timestampFunctionArguments( + OpenSearchPPLParser.TimestampFunctionCallContext ctx) { + List args = Arrays.asList( + new Literal( + ctx.timestampFunction().simpleDateTimePart().getText(), + DataType.STRING), + visitFunctionArg(ctx.timestampFunction().firstArg), + visitFunctionArg(ctx.timestampFunction().secondArg) + ); + return args; + } + /** * Literal and value. */ diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index bbc566e2ba..7b50d39671 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -7,11 +7,17 @@ package org.opensearch.sql.ppl.antlr; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; import org.antlr.v4.runtime.tree.ParseTree; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +import java.util.List; +import java.util.stream.Stream; public class PPLSyntaxParserTest { @@ -268,5 +274,49 @@ public void testDescribeCommandWithSourceShouldFail() { new PPLSyntaxParser().parse("describe source=t"); } + + @Test + public void testCan_parse_extract_function() { + String[] parts = List.of("MICROSECOND", "SECOND", "MINUTE", "HOUR", "DAY", + "WEEK", "MONTH", "QUARTER", "YEAR", "SECOND_MICROSECOND", + "MINUTE_MICROSECOND", "MINUTE_SECOND", "HOUR_MICROSECOND", + "HOUR_SECOND", "HOUR_MINUTE", "DAY_MICROSECOND", + "DAY_SECOND", "DAY_MINUTE", "DAY_HOUR", "YEAR_MONTH").toArray(new String[0]); + + for (String part : parts) { + assertNotNull(new PPLSyntaxParser().parse(String.format("SOURCE=test | eval k = extract(%s FROM \"2023-02-06\")", part))); + } + } + + @Test + public void testCan_parse_get_format_function() { + String[] types = {"DATE", "DATETIME", "TIME", "TIMESTAMP"}; + String[] formats = {"'USA'", "'JIS'", "'ISO'", "'EUR'", "'INTERNAL'"}; + + for (String type : types) { + for (String format : formats) { + assertNotNull(new PPLSyntaxParser().parse(String.format("SOURCE=test | eval k = get_format(%s, %s)", type, format))); + } + } + } + + @Test + public void testCannot_parse_get_format_function_with_bad_arg() { + assertThrows( + SyntaxCheckException.class, + () -> new PPLSyntaxParser().parse("SOURCE=test | eval k = GET_FORMAT(NONSENSE_ARG,'INTERNAL')")); + } + + @Test + public void can_parse_timestampadd_function() { + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | eval k = TIMESTAMPADD(MINUTE, 1, '2003-01-02')")); + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | eval k = TIMESTAMPADD(WEEK,1,'2003-01-02')")); + } + + @Test + public void can_parse_timestampdiff_function() { + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | eval k = TIMESTAMPDIFF(MINUTE, '2003-01-02', '2003-01-02')")); + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | eval k = TIMESTAMPDIFF(WEEK,'2003-01-02','2003-01-02')")); + } }