From ecf51c1f848c14c6248c0d8397abd4d531a9e7e4 Mon Sep 17 00:00:00 2001 From: forestmvey Date: Fri, 7 Jul 2023 15:47:42 -0700 Subject: [PATCH] Added indexing for each part of array column name. Signed-off-by: forestmvey --- .../sql/common/utils/StringUtils.java | 4 ++ .../sql/analysis/ExpressionAnalyzer.java | 17 +++--- .../sql/analysis/QualifierAnalyzer.java | 20 ++++++- .../ast/expression/ArrayQualifiedName.java | 16 +++-- .../expression/ArrayReferenceExpression.java | 42 ++++++------- .../org/opensearch/sql/expression/DSL.java | 8 --- sql/src/main/antlr/OpenSearchSQLParser.g4 | 22 +++++-- .../sql/sql/parser/AstExpressionBuilder.java | 60 +++++++++++++++---- 8 files changed, 121 insertions(+), 68 deletions(-) diff --git a/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java index bd3a5a9779..53e75178a7 100644 --- a/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java @@ -105,4 +105,8 @@ public static String format(final String format, Object... args) { private static boolean isQuoted(String text, String mark) { return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark); } + + public static String removeParenthesis(String qualifier) { + return qualifier.replaceAll("\\[.+\\]", ""); + } } diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 0faaac0702..488f7f4f33 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -20,6 +20,7 @@ import java.util.OptionalInt; import java.util.stream.Collectors; import lombok.Getter; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; import org.opensearch.sql.ast.AbstractNodeVisitor; @@ -51,6 +52,7 @@ import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.expression.Xor; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -387,7 +389,7 @@ public Expression visitArrayQualifiedName(ArrayQualifiedName node, AnalysisConte return reserved; } - return visitArrayIdentifier(qualifierAnalyzer.unqualified(node), context, node.getIndex()); + return visitArrayIdentifier(qualifierAnalyzer.unqualified(node), context, node.getPartsAndIndexes()); } private Expression checkForReservedIdentifier(List parts, AnalysisContext context, QualifierAnalyzer qualifierAnalyzer, QualifiedName node) { @@ -454,7 +456,8 @@ private Expression visitIdentifier(String ident, AnalysisContext context) { return ref; } - private Expression visitArrayIdentifier(String ident, AnalysisContext context, OptionalInt index) { + private Expression visitArrayIdentifier(String ident, AnalysisContext context, + List> partsAndIndexes) { // ParseExpression will always override ReferenceExpression when ident conflicts for (NamedExpression expr : context.getNamedParseExpressions()) { if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) { @@ -463,13 +466,7 @@ private Expression visitArrayIdentifier(String ident, AnalysisContext context, O } TypeEnvironment typeEnv = context.peek(); - ArrayReferenceExpression ref = index.isEmpty() ? - DSL.indexedRef(ident, - typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident))) - : - DSL.indexedRef(ident, - typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident)), index.getAsInt()); - - return ref; + return new ArrayReferenceExpression(ident, + typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, StringUtils.removeParenthesis(ident))), partsAndIndexes); } } diff --git a/core/src/main/java/org/opensearch/sql/analysis/QualifierAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/QualifierAnalyzer.java index d1e31d0079..d75603be13 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/QualifierAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/QualifierAnalyzer.java @@ -11,8 +11,10 @@ import lombok.RequiredArgsConstructor; import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; +import org.opensearch.sql.ast.expression.ArrayQualifiedName; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.exception.SemanticCheckException; /** @@ -38,6 +40,10 @@ public String unqualified(QualifiedName fullName) { return isQualifierIndexOrAlias(fullName) ? fullName.rest().toString() : fullName.toString(); } + public String unqualified(ArrayQualifiedName fullName) { + return isQualifierIndexOrAlias(fullName) ? fullName.rest().toString() : fullName.toString(); + } + private boolean isQualifierIndexOrAlias(QualifiedName fullName) { Optional qualifier = fullName.first(); if (qualifier.isPresent()) { @@ -50,10 +56,22 @@ private boolean isQualifierIndexOrAlias(QualifiedName fullName) { return false; } + private boolean isQualifierIndexOrAlias(ArrayQualifiedName fullName) { + Optional qualifier = fullName.first(); + if (qualifier.isPresent()) { + if (isFieldName(qualifier.get())) { + return false; + } + resolveQualifierSymbol(fullName, qualifier.get()); + return true; + } + return false; + } + private boolean isFieldName(String qualifier) { try { // Resolve the qualifier in Namespace.FIELD_NAME - context.peek().resolve(new Symbol(Namespace.FIELD_NAME, qualifier)); + context.peek().resolve(new Symbol(Namespace.FIELD_NAME, StringUtils.removeParenthesis(qualifier))); return true; } catch (SemanticCheckException e2) { return false; diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ArrayQualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/ArrayQualifiedName.java index 8683fccf7f..910cfdc5be 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ArrayQualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ArrayQualifiedName.java @@ -9,24 +9,22 @@ import lombok.EqualsAndHashCode; import lombok.Getter; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.AbstractNodeVisitor; +import java.util.List; import java.util.OptionalInt; +import java.util.stream.Collectors; @Getter @EqualsAndHashCode(callSuper = false) public class ArrayQualifiedName extends QualifiedName { - private final OptionalInt index; + private final List> partsAndIndexes; - public ArrayQualifiedName(String name, int index) { - super(name); - this.index = OptionalInt.of(index); - } - - public ArrayQualifiedName(String name) { - super(name); - this.index = OptionalInt.empty(); + public ArrayQualifiedName(List> parts) { + super(parts.stream().map(p -> p.getLeft()).collect(Collectors.toList())); + this.partsAndIndexes = parts; } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/ArrayReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ArrayReferenceExpression.java index afb7c26c50..2da8e6a5dd 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ArrayReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ArrayReferenceExpression.java @@ -11,9 +11,12 @@ import java.util.Arrays; import java.util.List; import java.util.OptionalInt; +import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -24,30 +27,20 @@ @EqualsAndHashCode public class ArrayReferenceExpression extends ReferenceExpression { @Getter - private final OptionalInt index; // Should be a list of indexes to support multiple nesting levels + private final List> partsAndIndexes; @Getter private final ExprType type; - @Getter - private final List paths; - public ArrayReferenceExpression(String ref, ExprType type, int index) { - super(ref, type); - this.index = OptionalInt.of(index); - this.type = type; - this.paths = Arrays.asList(ref.split("\\.")); - } - - public ArrayReferenceExpression(String ref, ExprType type) { + public ArrayReferenceExpression(String ref, ExprType type, List> partsAndIndexes) { super(ref, type); - this.index = OptionalInt.empty(); + this.partsAndIndexes = partsAndIndexes; this.type = type; - this.paths = Arrays.asList(ref.split("\\.")); } public ArrayReferenceExpression(ReferenceExpression ref) { super(ref.toString(), ref.type()); - this.index = OptionalInt.empty(); + this.partsAndIndexes = Arrays.stream(ref.toString().split("\\.")).map(e -> Pair.of(e, OptionalInt.empty())).collect( + Collectors.toList()); this.type = ref.type(); - this.paths = Arrays.asList(ref.toString().split("\\.")); } @Override @@ -66,13 +59,15 @@ public T accept(ExpressionNodeVisitor visitor, C context) { } public ExprValue resolve(ExprTupleValue value) { - return resolve(value, paths); + return resolve(value, partsAndIndexes); } - private ExprValue resolve(ExprValue value, List paths) { - ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths)); + private ExprValue resolve(ExprValue value, List> paths) { + List pathsWithoutParenthesis = + paths.stream().map(p -> StringUtils.removeParenthesis(p.getLeft())).collect(Collectors.toList()); + ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, pathsWithoutParenthesis)); - if (!index.isEmpty()) { + if (!paths.get(0).getRight().isEmpty()) { wholePathValue = getIndexValueOfCollection(wholePathValue); } else if (paths.size() == 1 && !wholePathValue.type().equals(ExprCoreType.ARRAY)) { wholePathValue = ExprValueUtils.missingValue(); @@ -81,19 +76,20 @@ private ExprValue resolve(ExprValue value, List paths) { if (!wholePathValue.isMissing() || paths.size() == 1) { return wholePathValue; } else { - return resolve(value.keyValue(paths.get(0)), paths.subList(1, paths.size())); + return resolve(value.keyValue(pathsWithoutParenthesis.get(0) + ), paths.subList(1, paths.size())); } } private ExprValue getIndexValueOfCollection(ExprValue value) { ExprValue collectionVal = value; - for (OptionalInt currentIndex : List.of(index)) { +// for (OptionalInt currentIndex : List.of(index)) { if (collectionVal.type().equals(ExprCoreType.ARRAY)) { - collectionVal = collectionVal.collectionValue().get(currentIndex.getAsInt()); +// collectionVal = collectionVal.collectionValue().get(currentIndex.getAsInt()); } else { return ExprValueUtils.missingValue(); } - } +// } return collectionVal; } } diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 5aa786fd76..3f1897e483 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -87,14 +87,6 @@ public static ReferenceExpression ref(String ref, ExprType type) { return new ReferenceExpression(ref, type); } - public static ArrayReferenceExpression indexedRef(String ref, ExprType type, int index) { - return new ArrayReferenceExpression(ref, type, index); - } - - public static ArrayReferenceExpression indexedRef(String ref, ExprType type) { - return new ArrayReferenceExpression(ref, type); - } - /** * Wrap a named expression if not yet. The intent is that different languages may use * Alias or not when building AST. This caused either named or unnamed expression diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 627fbe819e..9fc72b19f5 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -303,7 +303,7 @@ expressions expressionAtom : constant #constantExpressionAtom | columnName #fullColumnNameExpressionAtom - | arrayColumnName #arrayColumnNameExpressionAtom +// | arrayColumnName #arrayColumnNameExpressionAtom | functionCall #functionCallExpressionAtom | LR_BRACKET expression RR_BRACKET #nestedExpressionAtom | left=expressionAtom @@ -815,10 +815,11 @@ columnName : qualifiedName ; -arrayColumnName - : qualifiedName LT_SQR_PRTHS COLON_SYMB RT_SQR_PRTHS - | qualifiedName LT_SQR_PRTHS decimalLiteral RT_SQR_PRTHS - ; +//arrayColumnName +// : arrayQualifiedName +// | qualifiedName LT_SQR_PRTHS COLON_SYMB RT_SQR_PRTHS (arrayColumnName | columnName) * +// | qualifiedName LT_SQR_PRTHS decimalLiteral RT_SQR_PRTHS (arrayColumnName | columnName) * +// ; allTupleFields : path=qualifiedName DOT STAR @@ -828,12 +829,21 @@ alias : ident ; +//arrayQualifiedName +// : (ident | indexedIdentifier) (DOT (ident | indexedIdentifier))* +//// (( LT_SQR_PRTHS decimalLiteral RT_SQR_PRTHS ) | (DOT ident ( LT_SQR_PRTHS decimalLiteral RT_SQR_PRTHS )* ))+ +// ; + +//indexedIdentifier +// : ident LT_SQR_PRTHS decimalLiteral RT_SQR_PRTHS +// ; + qualifiedName : ident (DOT ident)* ; ident - : DOT? ID + : DOT? ID (LT_SQR_PRTHS decimalLiteral RT_SQR_PRTHS)? | BACKTICK_QUOTE_ID | keywordsCanBeId | scalarFunctionName diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index df6c32f47f..d1f0c9ee1c 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -69,11 +69,14 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; + +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.OptionalInt; import java.util.stream.Collectors; import org.antlr.v4.runtime.RuleContext; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -106,7 +109,6 @@ import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AlternateMultiMatchQueryContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AndExpressionContext; -import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ArrayColumnNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ColumnNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IdentContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IntervalLiteralContext; @@ -130,16 +132,26 @@ public UnresolvedExpression visitColumnName(ColumnNameContext ctx) { return visit(ctx.qualifiedName()); } - @Override - public UnresolvedExpression visitArrayColumnName(ArrayColumnNameContext ctx) { - UnresolvedExpression qualifiedName = visit(ctx.qualifiedName()); - if (ctx.decimalLiteral() == null) { - return new ArrayQualifiedName(qualifiedName.toString()); - } else { - return new ArrayQualifiedName( - qualifiedName.toString(), Integer.parseInt(ctx.decimalLiteral().getText())); - } - } +// @Override +// public UnresolvedExpression visitArrayColumnName(ArrayColumnNameContext ctx) { +//// return new QualifiedName( +//// identifiers.stream() +//// .map(RuleContext::getText) +//// .map(StringUtils::unquoteIdentifier) +//// .collect(Collectors.toList())); +// +// var blah = ctx.arrayQualifiedName().indexedIdentifier(); +// var hmm = ctx.arrayQualifiedName().ident(); +// +// +// UnresolvedExpression qualifiedName = visit(ctx.arrayQualifiedName()); +//// if (ctx.arrayQualifiedName().decimalLiteral() == null) { +// return new ArrayQualifiedName(qualifiedName.toString()); +//// } else { +// return new ArrayQualifiedName( +// qualifiedName.toString(), Integer.parseInt(ctx.arrayQualifiedName().decimalLiteral().toString())); +//// } +// } @Override public UnresolvedExpression visitIdent(IdentContext ctx) { @@ -544,6 +556,32 @@ public UnresolvedExpression visitExtractFunctionCall(ExtractFunctionCallContext private QualifiedName visitIdentifiers(List identifiers) { + + List> parts = new ArrayList<>(); + boolean supportsArrays = false; + for(var blah : identifiers) { + if (blah.decimalLiteral() != null) { + parts.add( + Pair.of( + StringUtils.unquoteIdentifier(blah.getText()), + OptionalInt.of(Integer.parseInt(blah.decimalLiteral().getText())) + ) + ); + supportsArrays = true; + } else { + parts.add( + Pair.of( + StringUtils.unquoteIdentifier(blah.getText()), + OptionalInt.empty() + ) + ); + } + } + + if (supportsArrays) { + return new ArrayQualifiedName(parts); + } + return new QualifiedName( identifiers.stream() .map(RuleContext::getText)