Skip to content

Commit 1c677b9

Browse files
committed
Merge remote-tracking branch 'upstream/main' into HEAD
2 parents 8151247 + fae0687 commit 1c677b9

File tree

422 files changed

+17569
-3096
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

422 files changed

+17569
-3096
lines changed

async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/LangType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
/** Language type accepted in async query apis. */
99
public enum LangType {
1010
SQL("sql"),
11-
PPL("ppl");
11+
PPL("ppl"),
12+
PROMQL("promql");
1213
private final String text;
1314

1415
LangType(String text) {

common/src/main/java/org/opensearch/sql/common/patterns/BrainLogParser.java

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,11 @@ public BrainLogParser(
172172
* @return list of tokens by splitting preprocessed log message
173173
*/
174174
public List<String> preprocess(String logMessage, String logId) {
175-
if (logMessage == null || logId == null) {
176-
throw new IllegalArgumentException("log message or logId must not be null");
175+
if (logId == null) {
176+
throw new IllegalArgumentException("logId must not be null");
177+
}
178+
if (logMessage == null) {
179+
return Arrays.asList("", logId);
177180
}
178181

179182
List<String> tokens = preprocess(logMessage, this.filterPatternVariableMap, this.delimiters);
@@ -224,7 +227,7 @@ public void processTokenHistogram(List<String> tokens) {
224227
* @return list of token lists
225228
*/
226229
public List<List<String>> preprocessAllLogs(List<String> logMessages) {
227-
List<List<String>> preprocessedLogs = new ArrayList<>();
230+
List<List<String>> preprocessedLogs = new ArrayList<>(logMessages.size());
228231

229232
for (int i = 0; i < logMessages.size(); i++) {
230233
String logId = String.valueOf(i);
@@ -291,7 +294,8 @@ public static List<String> parseLogPattern(
291294
String groupCandidateStr = logIdGroupCandidateMap.get(logId);
292295
String[] groupCandidate = groupCandidateStr.split(",");
293296
Long repFreq = Long.parseLong(groupCandidate[0]); // representative frequency of the group
294-
return IntStream.range(0, tokens.size() - 1)
297+
int tokenCapacity = Math.max(0, tokens.size() - 1);
298+
return IntStream.range(0, tokenCapacity)
295299
.mapToObj(i -> new AbstractMap.SimpleEntry<>(i, tokens.get(i)))
296300
.map(
297301
entry -> {
@@ -334,7 +338,7 @@ public static List<String> parseLogPattern(
334338
}
335339
return token;
336340
})
337-
.collect(Collectors.toList());
341+
.collect(Collectors.toCollection(() -> new ArrayList<>(tokenCapacity)));
338342
}
339343

340344
/**
@@ -349,7 +353,10 @@ public Map<String, Map<String, Object>> parseAllLogPatterns(
349353

350354
Map<String, Map<String, Object>> logPatternMap = new HashMap<>();
351355
for (int i = 0; i < processedMessages.size(); i++) {
352-
List<String> logPattern = this.parseLogPattern(processedMessages.get(i));
356+
List<String> logPattern =
357+
this.parseLogPattern(processedMessages.get(i)).stream()
358+
.map(BrainLogParser::collapseContinuousWildcards)
359+
.collect(Collectors.toList());
353360
String patternKey = String.join(" ", logPattern);
354361
String sampleLog = logMessages.get(i);
355362
logPatternMap.compute(
@@ -379,6 +386,29 @@ public Map<String, Map<String, Object>> parseAllLogPatterns(
379386
return logPatternMap;
380387
}
381388

389+
static String collapseContinuousWildcards(String part) {
390+
// The minimum of continuous wildcards are 6 characters: <*><*>
391+
if (part == null || part.length() < 6) {
392+
return part;
393+
}
394+
int tokenLen = VARIABLE_DENOTER.length();
395+
StringBuilder sb = new StringBuilder(part.length());
396+
int i = 0;
397+
while (i < part.length()) {
398+
int j = part.indexOf(VARIABLE_DENOTER, i);
399+
if (j < 0) {
400+
sb.append(part, i, part.length());
401+
break;
402+
}
403+
sb.append(part, i, j).append(VARIABLE_DENOTER);
404+
do {
405+
j += tokenLen;
406+
} while (j <= part.length() - tokenLen && part.startsWith(VARIABLE_DENOTER, j));
407+
i = j;
408+
}
409+
return sb.toString();
410+
}
411+
382412
private Map<Long, Integer> getWordOccurrences(List<String> tokens) {
383413
Map<Long, Integer> occurrences = new HashMap<>();
384414
for (int i = 0; i < tokens.size() - 1; i++) {

common/src/main/java/org/opensearch/sql/common/patterns/PatternUtils.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.sql.common.patterns;
77

8+
import com.google.common.collect.ImmutableList;
89
import java.util.ArrayList;
910
import java.util.List;
1011
import java.util.Map;
@@ -20,6 +21,8 @@ public final class PatternUtils {
2021
public static final Pattern WILDCARD_PATTERN = Pattern.compile("<\\*[^>]*>");
2122
public static final String TOKEN_PREFIX = "<token";
2223
public static final Pattern TOKEN_PATTERN = Pattern.compile("<token\\d+>");
24+
public static final List<String> VALID_BRAIN_PARAMETERS =
25+
ImmutableList.of("variable_count_threshold", "frequency_threshold_percentage");
2326

2427
public static Map<String, Map<String, Object>> mergePatternGroups(
2528
Map<String, Map<String, Object>> left,
@@ -55,6 +58,7 @@ public static Map<String, Map<String, Object>> mergePatternGroups(
5558
public static void extractVariables(
5659
ParseResult parseResult, String original, Map<String, List<String>> result, String prefix) {
5760
List<String> parts = parseResult.parts;
61+
List<Boolean> isToken = parseResult.isToken;
5862
List<String> tokenOrder = parseResult.tokenOrder;
5963

6064
if (parts.isEmpty()) {
@@ -67,7 +71,7 @@ public static void extractVariables(
6771

6872
while (i < parts.size()) {
6973
String currentPart = parts.get(i);
70-
if (currentPart.startsWith(prefix)) { // Process already labeled part
74+
if (isToken.get(i)) { // Process already labeled part
7175
String tokenKey = tokenOrder.get(tokenIndex++);
7276
if (i == parts.size() - 1) { // The last part
7377
String value = original.substring(pos);
@@ -97,19 +101,22 @@ public static void extractVariables(
97101
}
98102

99103
public static class ParseResult {
100-
List<String> parts;
101-
List<String> tokenOrder;
104+
final List<String> parts;
105+
final List<Boolean> isToken;
106+
final List<String> tokenOrder;
102107

103-
public ParseResult(List<String> parts, List<String> tokenOrder) {
108+
public ParseResult(List<String> parts, List<Boolean> isToken, List<String> tokenOrder) {
104109
this.parts = parts;
110+
this.isToken = isToken;
105111
this.tokenOrder = tokenOrder;
106112
}
107113

108114
public String toTokenOrderString(String prefix) {
109115
StringBuilder result = new StringBuilder();
110116
int tokenIndex = 0;
111-
for (String currentPart : parts) {
112-
if (currentPart.startsWith(prefix)) {
117+
for (int i = 0; i < parts.size(); i++) {
118+
String currentPart = parts.get(i);
119+
if (isToken.get(i)) {
113120
result.append(tokenOrder.get(tokenIndex++));
114121
} else {
115122
result.append(currentPart);
@@ -126,6 +133,7 @@ public String toTokenOrderString(String prefix) {
126133
*/
127134
public static ParseResult parsePattern(String pattern, Pattern compiledPattern) {
128135
List<String> parts = new ArrayList<>();
136+
List<Boolean> isToken = new ArrayList<>();
129137
List<String> tokenOrder = new ArrayList<>();
130138
Matcher matcher = compiledPattern.matcher(pattern);
131139
int lastEnd = 0;
@@ -137,20 +145,23 @@ public static ParseResult parsePattern(String pattern, Pattern compiledPattern)
137145
// Add static part before the found match if there is
138146
if (start > lastEnd) {
139147
parts.add(pattern.substring(lastEnd, start));
148+
isToken.add(false);
140149
}
141150
// Add matched wildcard part and generate token order key
142151
String wildcard = matcher.group();
143152
parts.add(wildcard);
153+
isToken.add(true);
144154
tokenOrder.add("<token" + tokenCount++ + ">");
145155
lastEnd = end;
146156
}
147157

148158
// Add static part at the end
149159
if (lastEnd < pattern.length()) {
150160
parts.add(pattern.substring(lastEnd));
161+
isToken.add(false);
151162
}
152163

153-
return new ParseResult(parts, tokenOrder);
164+
return new ParseResult(parts, isToken, tokenOrder);
154165
}
155166

156167
private static void addToResult(Map<String, List<String>> result, String key, String value) {

common/src/main/java/org/opensearch/sql/common/setting/Settings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public enum Key {
2929
PATTERN_MODE("plugins.ppl.pattern.mode"),
3030
PATTERN_MAX_SAMPLE_COUNT("plugins.ppl.pattern.max.sample.count"),
3131
PATTERN_BUFFER_LIMIT("plugins.ppl.pattern.buffer.limit"),
32+
PATTERN_SHOW_NUMBERED_TOKEN("plugins.ppl.pattern.show.numbered.token"),
3233
PPL_REX_MAX_MATCH_LIMIT("plugins.ppl.rex.max_match.limit"),
3334
PPL_VALUES_MAX_LIMIT("plugins.ppl.values.max.limit"),
3435
PPL_SYNTAX_LEGACY_PREFERRED("plugins.ppl.syntax.legacy.preferred"),

common/src/test/java/org/opensearch/sql/common/patterns/BrainLogParserTest.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import static org.junit.Assert.assertThrows;
1111
import static org.junit.Assert.assertTrue;
1212

13+
import com.google.common.collect.ImmutableList;
1314
import com.google.common.collect.ImmutableMap;
1415
import java.util.AbstractMap;
1516
import java.util.Arrays;
@@ -104,6 +105,15 @@ public void testPreprocess() {
104105
assertEquals(expectedResult, result);
105106
}
106107

108+
@Test
109+
public void testPreprocessNullString() {
110+
String logMessage = null;
111+
String logId = "log1";
112+
List<String> expectedResult = Arrays.asList("", "log1");
113+
List<String> result = parser.preprocess(logMessage, logId);
114+
assertEquals(expectedResult, result);
115+
}
116+
107117
@Test
108118
public void testPreprocessWithUUID() {
109119
String logMessage = "127.0.0.1 - 1234 something, user_id:c78ac970-f0c3-4954-8cf8-352a8458d01c";
@@ -124,15 +134,11 @@ public void testPreprocessWithUUID() {
124134
public void testPreprocessWithIllegalInput() {
125135
String logMessage = "127.0.0.1 - 1234 something";
126136
String logId = "log1";
127-
String exceptionMessage = "log message or logId must not be null";
137+
String exceptionMessage = "logId must not be null";
138+
assertEquals(ImmutableList.of("", logId), parser.preprocess(null, logId));
128139
Throwable throwable =
129-
assertThrows(IllegalArgumentException.class, () -> parser.preprocess(null, logId));
130-
assertEquals(exceptionMessage, throwable.getMessage());
131-
throwable =
132140
assertThrows(IllegalArgumentException.class, () -> parser.preprocess(logMessage, null));
133141
assertEquals(exceptionMessage, throwable.getMessage());
134-
throwable = assertThrows(IllegalArgumentException.class, () -> parser.preprocess(null, null));
135-
assertEquals(exceptionMessage, throwable.getMessage());
136142
}
137143

138144
@Test
@@ -209,6 +215,29 @@ public void testParseLogPattern() {
209215
assertEquals(expectedLogPattern, logPattern);
210216
}
211217

218+
@Test
219+
public void testParseAllLogPatternsWithNullInput() {
220+
List<String> messages =
221+
Arrays.asList(
222+
null,
223+
"PacketResponder failed for blk_6996194389878584395",
224+
"PacketResponder failed for blk_-1547954353065580372");
225+
Map<String, Map<String, Object>> logPatternMap = parser.parseAllLogPatterns(messages, 1);
226+
Map<String, Map<String, Object>> expectedResult =
227+
ImmutableMap.of(
228+
"",
229+
ImmutableMap.of("pattern_count", 1L, "pattern", "", "sample_logs", ImmutableList.of()),
230+
"PacketResponder failed for blk_<*>",
231+
ImmutableMap.of(
232+
"pattern_count",
233+
2L,
234+
"pattern",
235+
"PacketResponder failed for blk_<*>",
236+
"sample_logs",
237+
ImmutableList.of("PacketResponder failed for blk_6996194389878584395")));
238+
assertEquals(expectedResult, logPatternMap);
239+
}
240+
212241
@Test
213242
public void testParseAllLogPatterns() {
214243
Map<String, Map<String, Object>> logPatternMap = parser.parseAllLogPatterns(TEST_HDFS_LOGS, 2);
@@ -286,6 +315,19 @@ public void testParseLogPatternWhenHigherFrequencyTokenIsVariable() {
286315
assertTrue(parser.getGroupTokenSetMap().get("4-3,3-0").size() > 1);
287316
}
288317

318+
@Test
319+
public void testCollapseContinuousWildcards() {
320+
String correctTokenPattern =
321+
"BLOCK* NameSystem.allocateBlock: /user/root/_temporary/_task_<*>_r_<*>";
322+
String continuousTokenPattern =
323+
"BLOCK* NameSystem.allocateBlock: /user/root/_temporary/_task_<*><*>_r_<*><*><*>";
324+
325+
assertEquals(
326+
correctTokenPattern, BrainLogParser.collapseContinuousWildcards(continuousTokenPattern));
327+
assertEquals(
328+
correctTokenPattern, BrainLogParser.collapseContinuousWildcards(correctTokenPattern));
329+
}
330+
289331
private Map<String, Long> collectPatternByCountMap(
290332
Map<String, Map<String, Object>> logPatternMap) {
291333
return logPatternMap.entrySet().stream()

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
import org.opensearch.sql.ast.tree.Rename;
8787
import org.opensearch.sql.ast.tree.Reverse;
8888
import org.opensearch.sql.ast.tree.Rex;
89+
import org.opensearch.sql.ast.tree.SPath;
8990
import org.opensearch.sql.ast.tree.Search;
9091
import org.opensearch.sql.ast.tree.Sort;
9192
import org.opensearch.sql.ast.tree.Sort.SortOption;
@@ -95,6 +96,7 @@
9596
import org.opensearch.sql.ast.tree.Trendline;
9697
import org.opensearch.sql.ast.tree.UnresolvedPlan;
9798
import org.opensearch.sql.ast.tree.Values;
99+
import org.opensearch.sql.ast.tree.Window;
98100
import org.opensearch.sql.common.antlr.SyntaxCheckException;
99101
import org.opensearch.sql.data.model.ExprMissingValue;
100102
import org.opensearch.sql.data.type.ExprCoreType;
@@ -755,11 +757,21 @@ public LogicalPlan visitReverse(Reverse node, AnalysisContext context) {
755757
throw getOnlyForCalciteException("Reverse");
756758
}
757759

760+
@Override
761+
public LogicalPlan visitSpath(SPath node, AnalysisContext context) {
762+
throw getOnlyForCalciteException("Spath");
763+
}
764+
758765
@Override
759766
public LogicalPlan visitTimechart(Timechart node, AnalysisContext context) {
760767
throw getOnlyForCalciteException("Timechart");
761768
}
762769

770+
@Override
771+
public LogicalPlan visitWindow(Window node, AnalysisContext context) {
772+
throw getOnlyForCalciteException("Window");
773+
}
774+
763775
@Override
764776
public LogicalPlan visitRegex(Regex node, AnalysisContext context) {
765777
throw getOnlyForCalciteException("Regex");

core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.opensearch.sql.ast.expression.HighlightFunction;
3939
import org.opensearch.sql.ast.expression.In;
4040
import org.opensearch.sql.ast.expression.Interval;
41+
import org.opensearch.sql.ast.expression.LambdaFunction;
4142
import org.opensearch.sql.ast.expression.Literal;
4243
import org.opensearch.sql.ast.expression.Not;
4344
import org.opensearch.sql.ast.expression.Or;
@@ -479,6 +480,11 @@ public Expression visitInSubquery(InSubquery node, AnalysisContext context) {
479480
throw getOnlyForCalciteException("Subsearch");
480481
}
481482

483+
@Override
484+
public Expression visitLambdaFunction(LambdaFunction node, AnalysisContext context) {
485+
throw getOnlyForCalciteException("Lambda function");
486+
}
487+
482488
/**
483489
* If QualifiedName is actually a reserved metadata field, return the expr type associated with
484490
* the metadata field.

0 commit comments

Comments
 (0)