diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGrokITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGrokITSuite.scala index 977660e44..3e6e9bd29 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGrokITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGrokITSuite.scala @@ -44,7 +44,6 @@ class FlintSparkPPLGrokITSuite // Retrieve the results val results: Array[Row] = frame.collect() // Define the expected results - // Define the expected results val expectedResults: Array[Row] = Array( Row("charlie@domain.net", "domain.net"), Row("david@anotherdomain.com", "anotherdomain.com"), @@ -67,13 +66,11 @@ class FlintSparkPPLGrokITSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal("1")))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal("1")), "host")() val expectedPlan = Project( Seq(emailAttribute, hostAttribute), @@ -105,13 +102,11 @@ class FlintSparkPPLGrokITSuite val emailAttribute = UnresolvedAttribute("email") val ageAttribute = UnresolvedAttribute("age") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal(1)))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal(1)), "host")() // Define the corrected expected plan @@ -156,13 +151,11 @@ class FlintSparkPPLGrokITSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal(1)))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal(1)), "host")() // Define the corrected expected plan @@ -201,13 +194,11 @@ class FlintSparkPPLGrokITSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal(1)))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal(1)), "host")() val sortedPlan = Sort( @@ -270,21 +261,20 @@ class FlintSparkPPLGrokITSuite val timestampAttribute = UnresolvedAttribute("timestamp") val responseAttribute = UnresolvedAttribute("response") val bytesAttribute = UnresolvedAttribute("bytes") + // scalastyle:off val expectedRegExp = "(?(?(?:(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))|(?(?:(?((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))(%.+)?)|(?(?(?[a-zA-Z0-9._-]+)) (?(?[a-zA-Z0-9._-]+)) \\[(?(?(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]))/(?\\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\\b)/(?(?>\\d\\d){1,2}):(?(?!<[0-9])(?(?:2[0123]|[01]?[0-9])):(?(?:[0-5][0-9]))(?::(?(?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)))(?![0-9])) (?(?:[+-]?(?:[0-9]+))))\\] \"(?:(?\\b\\w+\\b) (?\\S+)(?: HTTP/(?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))))))?|(?.*?))\" (?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))))) (?:(?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+))))))|-))" + // scalastyle:on val COMMONAPACHELOG = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("1")))), + RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("1")), "COMMONAPACHELOG")() - val timestamp = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("5")))), - "timestamp")() - val response = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("18")))), - "response")() - val bytes = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("19")))), - "bytes")() + val timestamp = + Alias(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("5")), "timestamp")() + val response = + Alias(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("18")), "response")() + val bytes = + Alias(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("19")), "bytes")() val expectedPlan = Project( Seq(logAttribute, timestampAttribute, responseAttribute, bytesAttribute), Project( @@ -322,13 +312,11 @@ class FlintSparkPPLGrokITSuite val street_addressAttribute = UnresolvedAttribute("street_address") val addressAttribute = UnresolvedAttribute("address") val addressExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - street_addressAttribute, - Literal( - "(?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))))) (?.*)"), - Literal("3")))), + RegExpExtract( + street_addressAttribute, + Literal( + "(?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))))) (?.*)"), + Literal("3")), "address")() val expectedPlan = Project( Seq(addressAttribute), diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala index 06f097fc7..e69999a8e 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala @@ -70,9 +70,8 @@ class FlintSparkPPLParseITSuite // Define the expected logical plan val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")), "host")() val expectedPlan = Project( Seq(emailAttribute, hostAttribute), Project( @@ -102,9 +101,8 @@ class FlintSparkPPLParseITSuite // Define the expected logical plan val emailAttribute = UnresolvedAttribute("email") val ageAttribute = UnresolvedAttribute("age") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)), "host")() // Define the corrected expected plan val expectedPlan = Project( @@ -147,9 +145,8 @@ class FlintSparkPPLParseITSuite val logicalPlan: LogicalPlan = frame.queryExecution.logical val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)), "host")() // Define the corrected expected plan val expectedPlan = Project( @@ -186,9 +183,8 @@ class FlintSparkPPLParseITSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)), "host")() val sortedPlan = Sort( Seq( @@ -242,21 +238,17 @@ class FlintSparkPPLParseITSuite val streetAttribute = UnresolvedAttribute("street") val streetNumberExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - addressAttribute, - Literal("(?\\d+) (?.+)"), - Literal("1")))), + RegExpExtract( + addressAttribute, + Literal("(?\\d+) (?.+)"), + Literal("1")), "streetNumber")() val streetExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - addressAttribute, - Literal("(?\\d+) (?.+)"), - Literal("2")))), + RegExpExtract( + addressAttribute, + Literal("(?\\d+) (?.+)"), + Literal("2")), "street")() val expectedPlan = Project( diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java index d6600d15e..7d51038cd 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java @@ -19,7 +19,6 @@ import java.time.ZoneId; import java.time.ZoneOffset; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java index cf841efce..d76f8e73b 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java @@ -9,26 +9,21 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$; import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation; import org.apache.spark.sql.catalyst.analysis.UnresolvedStar$; -import org.apache.spark.sql.catalyst.expressions.AttributeReference; import org.apache.spark.sql.catalyst.expressions.Ascending$; -import org.apache.spark.sql.catalyst.expressions.Coalesce; import org.apache.spark.sql.catalyst.expressions.Descending$; import org.apache.spark.sql.catalyst.expressions.Expression; import org.apache.spark.sql.catalyst.expressions.NamedExpression; import org.apache.spark.sql.catalyst.expressions.Predicate; -import org.apache.spark.sql.catalyst.expressions.RegExpExtract; import org.apache.spark.sql.catalyst.expressions.SortDirection; import org.apache.spark.sql.catalyst.expressions.SortOrder; -import org.apache.spark.sql.catalyst.expressions.StringRegexExpression; import org.apache.spark.sql.catalyst.plans.logical.Aggregate; -import org.apache.spark.sql.catalyst.plans.logical.DescribeRelation$; import org.apache.spark.sql.catalyst.plans.logical.Deduplicate; +import org.apache.spark.sql.catalyst.plans.logical.DescribeRelation$; import org.apache.spark.sql.catalyst.plans.logical.Limit; import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; -import org.apache.spark.sql.execution.command.DescribeTableCommand; import org.apache.spark.sql.catalyst.plans.logical.Union; +import org.apache.spark.sql.execution.command.DescribeTableCommand; import org.apache.spark.sql.types.DataTypes; -import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.util.CaseInsensitiveStringMap; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.AggregateFunction; @@ -76,16 +71,13 @@ import org.opensearch.sql.ppl.utils.BuiltinFunctionTranslator; import org.opensearch.sql.ppl.utils.ComparatorTransformer; import org.opensearch.sql.ppl.utils.ParseStrategy; -import org.opensearch.sql.ppl.utils.ParseUtils; import org.opensearch.sql.ppl.utils.SortUtils; import scala.Option; import scala.Option$; import scala.collection.Seq; import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.function.BiFunction; @@ -93,8 +85,6 @@ import static java.util.Collections.emptyList; import static java.util.List.of; -import static org.apache.spark.sql.types.DataTypes.IntegerType; -import static org.apache.spark.sql.types.DataTypes.StringType; import static org.opensearch.sql.ppl.CatalystPlanContext.findRelation; import static org.opensearch.sql.ppl.utils.DataTypeTransformer.seq; import static org.opensearch.sql.ppl.utils.DataTypeTransformer.translate; diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ParseStrategy.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ParseStrategy.java index 7e1783ee9..45766e588 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ParseStrategy.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ParseStrategy.java @@ -1,7 +1,6 @@ package org.opensearch.sql.ppl.utils; import org.apache.spark.sql.catalyst.analysis.UnresolvedStar$; -import org.apache.spark.sql.catalyst.expressions.Coalesce; import org.apache.spark.sql.catalyst.expressions.Expression; import org.apache.spark.sql.catalyst.expressions.NamedExpression; import org.apache.spark.sql.catalyst.expressions.RegExpExtract; @@ -22,11 +21,10 @@ import static org.apache.spark.sql.types.DataTypes.IntegerType; import static org.apache.spark.sql.types.DataTypes.StringType; import static org.opensearch.sql.ppl.utils.DataTypeTransformer.seq; -import static org.opensearch.sql.ppl.utils.ParseUtils.GrokExpression.getNamedGroupIndex; public interface ParseStrategy { /** - * transform the parse/grok/patterns command into a standard catalyst RegExpExtract expression wrapped by a Coalesce to handle potential null values + * transform the parse/grok/patterns command into a standard catalyst RegExpExtract expression * Since spark's RegExpExtract cant accept actual regExp group name we need to translate the group's name into its corresponding index * * @param node @@ -64,11 +62,9 @@ static LogicalPlan visitParseCommand(Parse node, Expression sourceField, ParseMe RegExpExtract regExpExtract = new RegExpExtract(sourceField, org.apache.spark.sql.catalyst.expressions.Literal.create(cleanedPattern, StringType), org.apache.spark.sql.catalyst.expressions.Literal.create(index + 1, IntegerType)); - //next create Coalesce to handle potential null values - Coalesce coalesce = new Coalesce(seq(regExpExtract)); //next Alias the extracted fields context.getNamedParseExpressions().push( - org.apache.spark.sql.catalyst.expressions.Alias$.MODULE$.apply(coalesce, + org.apache.spark.sql.catalyst.expressions.Alias$.MODULE$.apply(regExpExtract, group, NamedExpression.newExprId(), seq(new java.util.ArrayList()), diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGrokTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGrokTranslatorTestSuite.scala index dd0638557..5094b0035 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGrokTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGrokTranslatorTestSuite.scala @@ -53,13 +53,11 @@ class PPLLogicalPlanGrokTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal("1")))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal("1")), "host")() val expectedPlan = Project( Seq(emailAttribute, hostAttribute), @@ -90,17 +88,15 @@ class PPLLogicalPlanGrokTranslatorTestSuite // scalastyle:on val COMMONAPACHELOG = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("1")))), + RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("1")), "COMMONAPACHELOG")() val timestamp = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("11")))), + RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("11")), "timestamp")() - val response = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("25")))), - "response")() - val bytes = Alias( - Coalesce(Seq(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("27")))), - "bytes")() + val response = + Alias(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("25")), "response")() + val bytes = + Alias(RegExpExtract(messageAttribute, Literal(expectedRegExp), Literal("27")), "bytes")() val expectedPlan = Project( Seq(logAttribute, timestampAttribute, responseAttribute, bytesAttribute), Project( @@ -123,13 +119,11 @@ class PPLLogicalPlanGrokTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val ageAttribute = UnresolvedAttribute("age") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal(1)))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal(1)), "host")() // Define the corrected expected plan @@ -161,13 +155,11 @@ class PPLLogicalPlanGrokTranslatorTestSuite val evalResultAttribute = UnresolvedAttribute("eval_result") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal("1")))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal("1")), "host")() val evalResultExpression = Alias(Literal(1), "eval_result")() @@ -195,13 +187,11 @@ class PPLLogicalPlanGrokTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal(1)))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal(1)), "host")() // Define the corrected expected plan @@ -232,13 +222,11 @@ class PPLLogicalPlanGrokTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") val hostExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - emailAttribute, - Literal( - ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), - Literal(1)))), + RegExpExtract( + emailAttribute, + Literal( + ".+@(?\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b))"), + Literal(1)), "host")() val sortedPlan = Sort( @@ -282,13 +270,11 @@ class PPLLogicalPlanGrokTranslatorTestSuite val street_addressAttribute = UnresolvedAttribute("street_address") val addressAttribute = UnresolvedAttribute("address") val addressExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - street_addressAttribute, - Literal( - "(?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))))) (?.*)"), - Literal("3")))), + RegExpExtract( + street_addressAttribute, + Literal( + "(?(?:(?(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))))) (?.*)"), + Literal("3")), "address")() val expectedPlan = Project( Seq(addressAttribute), diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala index e11ee46e3..36fea03f9 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala @@ -38,9 +38,8 @@ class PPLLogicalPlanParseTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")), "host")() val expectedPlan = Project( Seq(emailAttribute, hostAttribute), Project( @@ -57,9 +56,8 @@ class PPLLogicalPlanParseTranslatorTestSuite context) val emailAttribute = UnresolvedAttribute("email") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")))), - "email")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")), "email")() val expectedPlan = Project( Seq(emailAttribute), Project( @@ -81,9 +79,8 @@ class PPLLogicalPlanParseTranslatorTestSuite // Define the expected logical plan val emailAttribute = UnresolvedAttribute("email") val ageAttribute = UnresolvedAttribute("age") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)), "host")() // Define the corrected expected plan val expectedPlan = Project( @@ -113,9 +110,8 @@ class PPLLogicalPlanParseTranslatorTestSuite val hostAttribute = UnresolvedAttribute("host") val evalResultAttribute = UnresolvedAttribute("eval_result") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal("1")), "host")() val evalResultExpression = Alias(Literal(1), "eval_result")() @@ -144,21 +140,17 @@ class PPLLogicalPlanParseTranslatorTestSuite val streetAttribute = UnresolvedAttribute("street") val streetNumberExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - addressAttribute, - Literal("(?\\d+) (?.+)"), - Literal("1")))), + RegExpExtract( + addressAttribute, + Literal("(?\\d+) (?.+)"), + Literal("1")), "streetNumber")() val streetExpression = Alias( - Coalesce( - Seq( - RegExpExtract( - addressAttribute, - Literal("(?\\d+) (?.+)"), - Literal("2")))), + RegExpExtract( + addressAttribute, + Literal("(?\\d+) (?.+)"), + Literal("2")), "street")() val expectedPlan = Project( @@ -184,9 +176,8 @@ class PPLLogicalPlanParseTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)), "host")() // Define the corrected expected plan val expectedPlan = Project( @@ -215,9 +206,8 @@ class PPLLogicalPlanParseTranslatorTestSuite val emailAttribute = UnresolvedAttribute("email") val hostAttribute = UnresolvedAttribute("host") - val hostExpression = Alias( - Coalesce(Seq(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)))), - "host")() + val hostExpression = + Alias(RegExpExtract(emailAttribute, Literal(".+@(?.+)"), Literal(1)), "host")() val sortedPlan = Sort( Seq(