diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RegexFunctionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RegexFunctionVisitor.java index 2fb258e8449..01e66df21bc 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RegexFunctionVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RegexFunctionVisitor.java @@ -7,7 +7,6 @@ import java.util.List; import java.util.Set; -import org.apache.commons.jexl3.parser.ASTAndNode; import org.apache.commons.jexl3.parser.ASTFunctionNode; import org.apache.commons.jexl3.parser.ASTIdentifier; import org.apache.commons.jexl3.parser.ASTOrNode; @@ -21,8 +20,6 @@ import datawave.query.jexl.JexlASTHelper; import datawave.query.jexl.JexlNodeFactory; import datawave.query.jexl.functions.FunctionJexlNodeVisitor; -import datawave.query.language.functions.jexl.Jexl; -import datawave.query.language.tree.FunctionNode; import datawave.query.parser.JavaRegexAnalyzer; import datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException; import datawave.query.util.MetadataHelper; @@ -41,16 +38,16 @@ public class RegexFunctionVisitor extends FunctionIndexQueryExpansionVisitor { private static final Logger log = ThreadConfigurableLogger.getLogger(RegexFunctionVisitor.class); - protected Set nonEventFields; + protected Set indexOnlyFields; - public RegexFunctionVisitor(ShardQueryConfiguration config, MetadataHelper metadataHelper, Set nonEventFields) { + public RegexFunctionVisitor(ShardQueryConfiguration config, MetadataHelper metadataHelper, Set indexOnlyFields) { super(config, metadataHelper, null); - this.nonEventFields = nonEventFields; + this.indexOnlyFields = indexOnlyFields; } @SuppressWarnings("unchecked") - public static T expandRegex(ShardQueryConfiguration config, MetadataHelper metadataHelper, Set nonEventFields, T script) { - RegexFunctionVisitor visitor = new RegexFunctionVisitor(config, metadataHelper, nonEventFields); + public static T expandRegex(ShardQueryConfiguration config, MetadataHelper metadataHelper, Set indexOnlyFields, T script) { + RegexFunctionVisitor visitor = new RegexFunctionVisitor(config, metadataHelper, indexOnlyFields); JexlNode root = (T) script.jjtAccept(visitor, null); root = TreeFlatteningRebuildingVisitor.flatten(root); return (T) root; @@ -67,66 +64,59 @@ public Object visit(ASTFunctionNode node, Object data) { List identifiers = JexlASTHelper.getIdentifiers(arguments.get(0)); List extractChildren = new ArrayList<>(identifiers.size()); - List keepChildren = new ArrayList<>(identifiers.size()); + boolean extractFields = false; for (ASTIdentifier identifier : identifiers) { - JexlNode regexNode = buildRegexNode(identifier, functionMetadata.name(), JexlNodes.getIdentifierOrLiteralAsString(arguments.get(1))); - if (regexNode != null) { - extractChildren.add(regexNode); - } else { - keepChildren.add(JexlNodeFactory.buildIdentifier(identifier.getName())); + String field = JexlASTHelper.deconstructIdentifier(identifier.getName()); + // if the function contains any index-only fields, extract them all from the function + if (indexOnlyFields.contains(field.toUpperCase())) { + extractFields = true; + break; } - } - if (extractChildren.size() == 0) { - // nothing to rewrite + if (!extractFields) { return returnNode; - } else if (keepChildren.size() == 0) { - // rewrite all nodes - if (identifiers.size() == 1) { - // we've already rewritten our one node - returnNode = extractChildren.get(0); - } else { - if (functionMetadata.name().equals(INCLUDE_REGEX) && arguments.get(0) instanceof ASTOrNode) { - returnNode = JexlNodeFactory.createOrNode(extractChildren); - } else { - // build an AND node because of how DeMorgan's law works with expanding negations - returnNode = JexlNodeFactory.createAndNode(extractChildren); - } - } } else { - // construct each and put it all together - JexlNode extractNode; - List joint = new ArrayList<>(); - List newArgs = new ArrayList<>(); - - if (functionMetadata.name().equals(INCLUDE_REGEX) && arguments.get(0) instanceof ASTOrNode) { - newArgs.add(JexlNodeFactory.createOrNode(keepChildren)); - extractNode = JexlNodeFactory.createOrNode(extractChildren); - } else { - newArgs.add(JexlNodeFactory.createAndNode(keepChildren)); - extractNode = JexlNodeFactory.createAndNode(extractChildren); - } - newArgs.add(arguments.get(1)); - JexlNode newFunc = FunctionJexlNodeVisitor.makeFunctionFrom(functionMetadata.namespace(), functionMetadata.name(), - newArgs.toArray(new JexlNode[0])); - - joint.add(extractNode); - joint.add(newFunc); + for (ASTIdentifier identifier : identifiers) { + JexlNode regexNode = buildRegexNode(identifier, functionMetadata.name(), JexlNodes.getIdentifierOrLiteralAsString(arguments.get(1))); + if (regexNode != null) { + extractChildren.add(regexNode); + } + } - if (functionMetadata.name().equals(INCLUDE_REGEX) && arguments.get(0) instanceof ASTOrNode) { - returnNode = JexlNodeFactory.createOrNode(joint); + if (extractChildren.size() == 0) { + // nothing to rewrite + return returnNode; } else { - returnNode = JexlNodeFactory.createAndNode(joint); + // rewrite all nodes + if (identifiers.size() == 1) { + // we've already rewritten our one node + returnNode = extractChildren.get(0); + } else { + if (functionMetadata.name().equals(INCLUDE_REGEX)) { + if (arguments.get(0) instanceof ASTOrNode) { + returnNode = JexlNodeFactory.createOrNode(extractChildren); + } else { + returnNode = JexlNodeFactory.createAndNode(extractChildren); + } + } else { + // Follow DeMorgan's law when expanding negations + if (arguments.get(0) instanceof ASTOrNode) { + returnNode = JexlNodeFactory.createAndNode(extractChildren); + } else { + returnNode = JexlNodeFactory.createOrNode(extractChildren); + } + } + } } } - } - if (log.isTraceEnabled()) { - log.trace("Rewrote \"" + JexlStringBuildingVisitor.buildQueryWithoutParse(node) + "\" into \"" - + JexlStringBuildingVisitor.buildQueryWithoutParse(returnNode) + "\""); + if (log.isTraceEnabled()) { + log.trace("Rewrote \"" + JexlStringBuildingVisitor.buildQueryWithoutParse(node) + "\" into \"" + + JexlStringBuildingVisitor.buildQueryWithoutParse(returnNode) + "\""); + } } return returnNode; } @@ -144,21 +134,20 @@ public Object visit(ASTFunctionNode node, Object data) { */ private JexlNode buildRegexNode(ASTIdentifier identifier, String functionName, String regex) { String field = JexlASTHelper.deconstructIdentifier(identifier.getName()); - if (nonEventFields.contains(field.toUpperCase())) { - try { - JavaRegexAnalyzer jra = new JavaRegexAnalyzer(regex); - if (!jra.isNgram()) { - if (functionName.equals(INCLUDE_REGEX)) { - return JexlNodeFactory.buildERNode(field, regex); - } else { - return JexlNodeFactory.buildNRNode(field, regex); - } + try { + JavaRegexAnalyzer jra = new JavaRegexAnalyzer(regex); + if (!jra.isNgram()) { + if (functionName.equals(INCLUDE_REGEX)) { + return JexlNodeFactory.buildERNode(field, regex); + } else { + return JexlNodeFactory.buildNRNode(field, regex); } - } catch (JavaRegexParseException e) { - QueryException qe = new QueryException(DatawaveErrorCode.INVALID_REGEX); - throw new DatawaveFatalQueryException(qe); } + } catch (JavaRegexParseException e) { + QueryException qe = new QueryException(DatawaveErrorCode.INVALID_REGEX); + throw new DatawaveFatalQueryException(qe); } + return null; } } diff --git a/warehouse/query-core/src/test/java/datawave/query/CompositeFunctionsTest.java b/warehouse/query-core/src/test/java/datawave/query/CompositeFunctionsTest.java index e5103f6a605..46fb753936d 100644 --- a/warehouse/query-core/src/test/java/datawave/query/CompositeFunctionsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/CompositeFunctionsTest.java @@ -782,6 +782,22 @@ public void testFilterFunctionsInvalidatedByIndexOnlyFields() throws ParseExcept } } + @Test + public void testMultiFieldInclude() throws Exception { + eventQueryLogic.setParser(new LuceneToJexlQueryParser()); + Map extraParameters = new HashMap<>(); + // @formatter:off + String[] queryStrings = { + "UUID:SOPRANO AND #INCLUDE(LOCATION || POSIZIONE || NAME, 'newjersey')" + }; + @SuppressWarnings("unchecked") + List[] expectedLists = new List[] {Collections.singletonList("SOPRANO")}; + for (int i = 0; i < queryStrings.length; i++) { + runTestQuery(expectedLists[i], queryStrings[i], format.parse("20091231"), format.parse("20150101"), extraParameters); + } + + } + @Test public void testDelayedExceededValueThresholdRegexTFField() throws Exception { Map extraParameters = new HashMap(); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RegexFunctionVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RegexFunctionVisitorTest.java index 08f8596cb45..6a40011bc5d 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RegexFunctionVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RegexFunctionVisitorTest.java @@ -70,11 +70,11 @@ public void testRewriteMultiFieldedIncludeRegex() throws ParseException { public void testRewriteMultiHeteroIndexOnlyFieldsIncludeRegex() throws ParseException { Set indexOnlyFields = Sets.newHashSet("FIELDB"); String query = "FOO == 'bar' && filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')"; - String expected = "FOO == 'bar' && (FIELDB =~ 'ba.*' || filter:includeRegex(FIELDA || FIELDC, 'ba.*'))"; + String expected = "FOO == 'bar' && (FIELDA =~ 'ba.*' || FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); query = "FOO == 'bar' || filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')"; - expected = "FOO == 'bar' || (FIELDB =~ 'ba.*' || filter:includeRegex(FIELDA || FIELDC, 'ba.*'))"; + expected = "FOO == 'bar' || (FIELDA =~ 'ba.*' || FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -82,11 +82,11 @@ public void testRewriteMultiHeteroIndexOnlyFieldsIncludeRegex() throws ParseExce public void testRewriteMultiHeteroIndexOnlyFieldsIncludeRegex2() throws ParseException { Set indexOnlyFields = Sets.newHashSet("FIELDB", "FIELDC"); String query = "FOO == 'bar' && filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')"; - String expected = "FOO == 'bar' && (FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*' || filter:includeRegex(FIELDA, 'ba.*'))"; + String expected = "FOO == 'bar' && (FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*' || FIELDA =~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); query = "FOO == 'bar' || filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')"; - expected = "FOO == 'bar' || (FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*' || filter:includeRegex(FIELDA, 'ba.*'))"; + expected = "FOO == 'bar' || (FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*' || FIELDA =~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -94,11 +94,11 @@ public void testRewriteMultiHeteroIndexOnlyFieldsIncludeRegex2() throws ParseExc public void testRewriteHeteroIndexOnlyFieldsExcludeRegex() throws ParseException { Set indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB"); String query = "FOO == 'bar' && filter:excludeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')"; - String expected = "FOO == 'bar' && FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*')"; + String expected = "FOO == 'bar' && FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && FIELDC !~ 'ba.*'"; assertVisitorResult(query, expected, indexOnlyFields); query = "FOO == 'bar' || filter:excludeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')"; - expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*'))"; + expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && FIELDC !~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -106,11 +106,11 @@ public void testRewriteHeteroIndexOnlyFieldsExcludeRegex() throws ParseException public void testANDIncludeRegex() throws ParseException { Set indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB"); String query = "FOO == 'bar' && filter:includeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')"; - String expected = "FOO == 'bar' && FIELDA =~ 'ba.*' && FIELDB =~ 'ba.*' && filter:includeRegex(FIELDC, 'ba.*')"; + String expected = "FOO == 'bar' && FIELDA =~ 'ba.*' && FIELDB =~ 'ba.*' && FIELDC =~ 'ba.*'"; assertVisitorResult(query, expected, indexOnlyFields); query = "FOO == 'bar' || filter:includeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')"; - expected = "FOO == 'bar' || (FIELDA =~ 'ba.*' && FIELDB =~ 'ba.*' && filter:includeRegex(FIELDC, 'ba.*'))"; + expected = "FOO == 'bar' || (FIELDA =~ 'ba.*' && FIELDB =~ 'ba.*' && FIELDC =~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -118,11 +118,11 @@ public void testANDIncludeRegex() throws ParseException { public void testANDExcludeRegex() throws ParseException { Set indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB"); String query = "FOO == 'bar' && filter:excludeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')"; - String expected = "FOO == 'bar' && FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*')"; + String expected = "FOO == 'bar' && (FIELDA !~ 'ba.*' || FIELDB !~ 'ba.*' || FIELDC !~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); query = "FOO == 'bar' || filter:excludeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')"; - expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*'))"; + expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' || FIELDB !~ 'ba.*' || FIELDC !~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -142,11 +142,11 @@ public void testRewriteMultiFieldedExcludeRegex() throws ParseException { public void testRewriteMultiHeteroFieldedExcludeRegex() throws ParseException { Set indexOnlyFields = Sets.newHashSet("FIELDA"); String query = "FOO == 'bar' && filter:excludeRegex(FIELDA||FIELDB, 'ba.*')"; - String expected = "FOO == 'bar' && (FIELDA !~ 'ba.*' && filter:excludeRegex(FIELDB, 'ba.*'))"; + String expected = "FOO == 'bar' && (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); query = "FOO == 'bar' || filter:excludeRegex(FIELDA||FIELDB, 'ba.*')"; - expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && filter:excludeRegex(FIELDB, 'ba.*'))"; + expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*')"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -166,7 +166,7 @@ public void testEndWildCardNotIndexOnly() throws ParseException { public void testMixedEventNonEvent() throws ParseException { Set indexOnlyFields = Sets.newHashSet("NON_EVENT_FIELD"); String query = "filter:includeRegex(EVENT_FIELD || NON_EVENT_FIELD,'all_.*?')"; - String expected = " NON_EVENT_FIELD =~ 'all_.*?' || filter:includeRegex(EVENT_FIELD, 'all_.*?')"; + String expected = "EVENT_FIELD =~ 'all_.*?' || NON_EVENT_FIELD =~ 'all_.*?'"; assertVisitorResult(query, expected, indexOnlyFields); } @@ -186,6 +186,15 @@ public void testDoubleEndedWildCard2() throws ParseException { assertVisitorResult(query, query, indexOnlyFields); } + @Test + public void testMultiFieldDoubleEndedWildCard() throws ParseException { + // we cannot extract the index only field because we don't support double ended wildcards against the index. Queries like this will fail in a later + // visitor + Set indexOnlyFields = Sets.newHashSet("FIELDA"); + String query = "FOO == 'bar' && filter:includeRegex(FIELDA||FIELDB,'.*all_.*')"; + assertVisitorResult(query, query, indexOnlyFields); + } + @Test public void testBadRegex() throws ParseException { exception.expect(DatawaveFatalQueryException.class);