Skip to content

Commit 8036860

Browse files
grcevskicarsontham
authored andcommitted
Improve route harvesting (open-telemetry#635)
1 parent bf8e69f commit 8036860

File tree

3 files changed

+77
-15
lines changed

3 files changed

+77
-15
lines changed

pkg/components/transform/route/harvest/java.go

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ package harvest
55

66
import (
77
"bufio"
8+
"bytes"
9+
"io"
810
"log/slog"
911
"net/url"
1012
"regexp"
@@ -116,23 +118,44 @@ func (h *JavaRoutes) addRouteIfValid(line string, routes []string) []string {
116118
return routes
117119
}
118120

121+
func (h *JavaRoutes) processSymbolLine(lineBytes []byte, routes []string) []string {
122+
if len(lineBytes) > 0 {
123+
// Remove newline characters
124+
lineBytes = bytes.TrimRight(lineBytes, "\r\n")
125+
// Validate the line and sanitize
126+
line, ok := h.validLine(string(lineBytes))
127+
if ok {
128+
routes = h.addRouteIfValid(line, routes)
129+
}
130+
}
131+
132+
return routes
133+
}
134+
119135
func (h *JavaRoutes) ExtractRoutes(pid int32) (*RouteHarvesterResult, error) {
120136
routes := []string{}
121137
out, err := jvmAttachFunc(int(pid), []string{"jcmd", "VM.symboltable -verbose"}, h.log)
122138
if err != nil {
123139
return nil, err
124140
}
125141

126-
scanner := bufio.NewScanner(out)
127-
for scanner.Scan() {
128-
line := scanner.Text()
129-
line, ok := h.validLine(line)
142+
defer out.Close()
130143

131-
if !ok {
132-
continue
144+
reader := bufio.NewReader(out)
145+
for {
146+
// Read line by line, handling arbitrarily long lines
147+
line, err := reader.ReadBytes('\n')
148+
if err != nil {
149+
if err == io.EOF {
150+
// Process the last line if it doesn't end with newline
151+
routes = h.processSymbolLine(line, routes)
152+
break
153+
}
154+
h.log.Error("error reading line", "error", err)
155+
return nil, err
133156
}
134157

135-
routes = h.addRouteIfValid(line, routes)
158+
routes = h.processSymbolLine(line, routes)
136159
}
137160

138161
routes = h.sortRoutes(routes)

pkg/components/transform/route/part_matcher.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,27 +43,37 @@ func (rm *PartialRouteMatcher) Find(path string) string {
4343
}
4444

4545
tokens := tokenize(path)
46-
return rm.findCombined(tokens, 0, []string{})
46+
return rm.findCombined(tokens, 0, make([]string, len(tokens)), 0)
4747
}
4848

4949
// findCombined tries to match the full path using combinations of partial matches from different roots
50-
func (rm *PartialRouteMatcher) findCombined(tokens []string, startIdx int, matchedParts []string) string {
50+
func (rm *PartialRouteMatcher) findCombined(tokens []string, startIdx int, matchedParts []string, matchedLen int) string {
5151
// If we've consumed all tokens, we found a complete match
5252
if startIdx >= len(tokens) {
53-
if len(matchedParts) > 0 {
54-
return strings.Join(matchedParts, "")
53+
if matchedLen > 0 {
54+
return strings.Join(matchedParts[:matchedLen], "")
5555
}
5656
return ""
5757
}
5858

59+
newMatchedParts := matchedParts
60+
61+
// This check is here for ensuring that we don't run into unexpected condition where
62+
// the original tokens slice is shorter than the parts slice. In practice, this should
63+
// never happen, the tokens will always be >= to the number of parts. However, if we
64+
// encounter unexpected pattern and we don't expand the array we'll hit a panic.
65+
if matchedLen == len(matchedParts) {
66+
newMatchedParts = make([]string, len(matchedParts)+1)
67+
copy(newMatchedParts, matchedParts)
68+
}
69+
5970
// Try each root tree for partial matching from current position
6071
for _, root := range rm.roots {
6172
if partialMatch, consumed := rm.findPartial(tokens[startIdx:], root); partialMatch != "" && consumed > 0 {
6273
// Found a partial match, try to match the rest
63-
newMatchedParts := make([]string, len(matchedParts)+1)
64-
copy(newMatchedParts, matchedParts)
65-
newMatchedParts[len(matchedParts)] = partialMatch
66-
if result := rm.findCombined(tokens, startIdx+consumed, newMatchedParts); result != "" {
74+
newMatchedParts[matchedLen] = partialMatch
75+
76+
if result := rm.findCombined(tokens, startIdx+consumed, newMatchedParts, matchedLen+1); result != "" {
6777
return result
6878
}
6979
}

pkg/components/transform/route/part_matcher_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,32 @@ func BenchmarkPartialRouteMatcherComplex(b *testing.B) {
391391
}
392392
}
393393
}
394+
395+
// TestMatchedPartsSliceExpansion tests the specific condition where the matchedParts slice
396+
// needs to be expanded when matchedLen == len(matchedParts). This happens when we have
397+
// enough partial route matches to fill the initial slice and need to add more.
398+
func TestMatchedPartsSliceExpansion(t *testing.T) {
399+
// Create routes that can be combined into a long path requiring slice expansion
400+
routes := []string{
401+
"/api",
402+
"/v1",
403+
"/users",
404+
"/{id}",
405+
"/profile",
406+
"/settings",
407+
"/preferences",
408+
"/notifications",
409+
}
410+
411+
m := NewPartialRouteMatcher(routes)
412+
413+
// Create a path that will match multiple partial routes sequentially,
414+
// forcing the matchedParts slice to expand beyond its initial size
415+
testPath := "/api/v1/users/123/profile/settings/preferences/notifications"
416+
417+
// Purposefully call with empty parts slice to force the iteration to hit the copy
418+
tokens := tokenize(testPath)
419+
result := m.findCombined(tokens, 0, make([]string, 0), 0)
420+
421+
assert.Equal(t, "/api/v1/users/{id}/profile/settings/preferences/notifications", result)
422+
}

0 commit comments

Comments
 (0)