Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions internal/pkg/pipeline/task/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,7 @@ func (f *file) readFile(output chan<- *record.Record) error {

for _, path := range paths {

readerCloser, err := reader.read(path)
if err != nil {
return err
}
defer readerCloser.Close()

content, err := io.ReadAll(readerCloser)
content, err := readFileContent(reader, path)
if err != nil {
return err
}
Expand All @@ -147,6 +141,23 @@ func (f *file) readFile(output chan<- *record.Record) error {

}

// helper function to read file and close reader after reading
// we need this to close reader in a loop without delay
func readFileContent(r reader, path string) ([]byte, error) {
readerCloser, err := r.read(path)
if err != nil {
return nil, err
}
defer readerCloser.Close()

content, err := io.ReadAll(readerCloser)
if err != nil {
return nil, err
}

return content, nil
}

func (f *file) writeFile(input <-chan *record.Record) error {

for {
Expand Down
59 changes: 36 additions & 23 deletions internal/pkg/pipeline/task/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"io"
"net/http"
"strings"
"sync"
"time"

"github.com/patterninc/caterpillar/internal/pkg/config"
Expand All @@ -27,7 +28,11 @@ const (
)

var (
ctx = context.Background()
byteBufferPool = sync.Pool{
New: func() any {
return new(bytes.Buffer)
},
}
)

type oauth struct {
Expand Down Expand Up @@ -199,13 +204,24 @@ func (h *httpCore) processItem(rc *record.Record, output chan<- *record.Record)
if err != nil {
return err
}
nextPageInput, err := json.Marshal(result)
if err != nil {

// Use pooled buffer for temporary JSON encoding
buf := byteBufferPool.Get().(*bytes.Buffer)
buf.Reset()
if err := json.NewEncoder(buf).Encode(result); err != nil {
byteBufferPool.Put(buf)
return err
}
nextPageInput := buf.Bytes()
// trim trailing newline added by encoder
if len(nextPageInput) > 0 && nextPageInput[len(nextPageInput)-1] == '\n' {
nextPageInput = nextPageInput[:len(nextPageInput)-1]
}

nextPageData, err := nextPage.Execute(nextPageInput, map[string]any{
`page_id`: pageID,
})
byteBufferPool.Put(buf) // Return buffer after use

if err != nil {
return err
Expand Down Expand Up @@ -256,6 +272,21 @@ func (h *httpCore) processItem(rc *record.Record, output chan<- *record.Record)

func (h *httpCore) call(endpoint string) (*result, error) {

// Create HTTP client once, reuse for all retries to enable connection pooling
client := &http.Client{
Timeout: time.Duration(h.Timeout),
}

// Do we use proxy for this one?
if h.Proxy != nil {
transport, err := h.Proxy.getTransport()
if err != nil {
fmt.Printf("error configuring proxy: %s\n", err)
return nil, err
}
client.Transport = transport
}

var lastErr error
for attempt := 1; attempt <= h.MaxRetries; attempt++ {

Expand Down Expand Up @@ -292,24 +323,6 @@ func (h *httpCore) call(endpoint string) (*result, error) {
}
}

// Create HTTP client with proxy configuration if specified
client := &http.Client{
Timeout: time.Duration(h.Timeout),
}

// Do we use proxy for this one?
if h.Proxy != nil {
transport, err := h.Proxy.getTransport()
if err != nil {
lastErr = err
if attempt < h.MaxRetries {
continue
}
break
}
client.Transport = transport
}

response, err := client.Do(request)
if err != nil {
lastErr = err
Expand All @@ -320,9 +333,9 @@ func (h *httpCore) call(endpoint string) (*result, error) {
break
}

defer response.Body.Close()

// Read body and close immediately
body, err := io.ReadAll(response.Body)
response.Body.Close()
if err != nil {
lastErr = err
if attempt < h.MaxRetries {
Expand Down
4 changes: 4 additions & 0 deletions internal/pkg/pipeline/task/xpath/xpath.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ func (x *xpath) Run(ctx context.Context, input <-chan *record.Record, output cha
x.SendData(r.Meta, data, output)
}
}

// Release the original HTML payload so its buffer can be garbage collected
// once parsing and field extraction are complete.
r.Data = nil
}

return nil
Expand Down
Loading