-
Notifications
You must be signed in to change notification settings - Fork 2
Support for Archives(zip,tar) #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
75badd8
ff59ec8
0a6eeca
a52ae4f
e735358
1e654d2
073193a
9e0cf2b
7a94408
7c136db
8496ea5
297abdb
c3a74fa
829ac05
018786a
309c1dc
5b8246a
972476b
f3d603c
d6d4331
8430e13
782ab23
e2370c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,111 @@ | ||||||
| package archive | ||||||
|
|
||||||
| import ( | ||||||
| "fmt" | ||||||
|
|
||||||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/record" | ||||||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task" | ||||||
| ) | ||||||
|
|
||||||
| type actionType string | ||||||
|
|
||||||
| const ( | ||||||
| actionCompress actionType = `pack` | ||||||
| actionDecompress actionType = `unpack` | ||||||
| ) | ||||||
|
|
||||||
| const ( | ||||||
| defaultFormat = `zip` | ||||||
| defaultAction = `pack` | ||||||
| ) | ||||||
|
|
||||||
| type archiver interface { | ||||||
| Read(b []byte) | ||||||
| Write(b []byte) | ||||||
| } | ||||||
|
|
||||||
| type core struct { | ||||||
| task.Base `yaml:",inline" json:",inline"` | ||||||
| Format string `yaml:"format,omitempty" json:"format,omitempty"` | ||||||
| Action actionType `yaml:"action,omitempty" json:"action,omitempty"` | ||||||
| FileName string `yaml:"file_name,omitempty" json:"file_name,omitempty"` | ||||||
| } | ||||||
|
|
||||||
| func New() (task.Task, error) { | ||||||
| return &core{ | ||||||
| Format: defaultFormat, | ||||||
| Action: defaultAction, | ||||||
| }, nil | ||||||
| } | ||||||
|
|
||||||
| func (c *core) UnmarshalYAML(unmarshal func(interface{}) error) error { | ||||||
| type raw core | ||||||
| obj := raw{ | ||||||
| Format: defaultFormat, | ||||||
| Action: defaultAction, | ||||||
| } | ||||||
| if err := unmarshal(&obj); err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| if obj.Action != actionCompress && obj.Action != actionDecompress { | ||||||
| return fmt.Errorf("invalid action: %s (must be 'compress' or 'decompress')", obj.Action) | ||||||
|
||||||
| return fmt.Errorf("invalid action: %s (must be 'compress' or 'decompress')", obj.Action) | |
| return fmt.Errorf("invalid action: %s (must be 'pack' or 'unpack')", obj.Action) |
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Format field is not validated in UnmarshalYAML. Unlike the compress task which validates the format against a map of supported formats, this code only validates the action. Invalid formats like "rar" or "7z" would only be caught at runtime in the Run method's switch statement. Format validation should happen during configuration unmarshaling for early error detection.
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Read and Write calls don't capture or handle any errors because the archiver interface methods don't return errors. If an error occurs in Read or Write, it will either terminate the program via log.Fatal or be silently ignored, but cannot be properly handled by the Run method. This needs to be addressed by updating the archiver interface to return errors.
Copilot
AI
Jan 30, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new archive task implementation lacks unit tests. Given that the repository has comprehensive test coverage for other components (e.g., internal/pkg/pipeline/dag_test.go), unit tests should be added to verify the pack and unpack operations for both ZIP and TAR formats, error handling paths, and edge cases such as empty archives, corrupted archives, and files with various sizes.
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,65 @@ | ||||||||
| package archive | ||||||||
|
|
||||||||
| import ( | ||||||||
| "archive/tar" | ||||||||
| "bytes" | ||||||||
| "io" | ||||||||
| "log" | ||||||||
|
|
||||||||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/record" | ||||||||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task" | ||||||||
| ) | ||||||||
|
|
||||||||
| type tarArchive struct { | ||||||||
| *task.Base | ||||||||
| FileName string | ||||||||
| Record *record.Record | ||||||||
| OutputChan chan<- *record.Record | ||||||||
| } | ||||||||
|
|
||||||||
| func (t *tarArchive) Read(b []byte) { | ||||||||
| r := tar.NewReader(bytes.NewReader(b)) | ||||||||
|
|
||||||||
| for { | ||||||||
| header, err := r.Next() | ||||||||
| if err != nil { | ||||||||
| break | ||||||||
| } | ||||||||
|
|
||||||||
| // check the file type is regular file | ||||||||
| if header.Typeflag == tar.TypeReg { | ||||||||
| buf := make([]byte, header.Size) | ||||||||
| if _, err := io.ReadFull(r, buf); err != nil && err != io.EOF { | ||||||||
| log.Fatal(err) | ||||||||
|
||||||||
| } | ||||||||
| t.SendData(t.Record.Context, buf, t.OutputChan) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| func (t *tarArchive) Write(b []byte) { | ||||||||
|
|
||||||||
| if t.FileName == "" { | ||||||||
| log.Fatal("file name is required to create tar archive") | ||||||||
| } | ||||||||
|
|
||||||||
|
||||||||
| if t.FileName == "" { | |
| log.Fatal("file name is required to create tar archive") | |
| } |
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The tar.NewWriter is incorrectly initialized with bytes.NewBuffer(b), which creates a new buffer containing a copy of the input data. This should be an empty buffer like bytes.NewBuffer(nil) or new(bytes.Buffer). The current implementation would prepend the raw input data before the tar archive data, resulting in a corrupted output.
Outdated
Copilot
AI
Jan 30, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using log.Fatal in library/task code will abruptly terminate the entire application, which is inappropriate for a pipeline task. Errors should be propagated back through the Run method's return value to allow the calling code to handle them gracefully. The archiver interface methods should return errors, and the Run method should handle those errors appropriately.
ma-gk marked this conversation as resolved.
Show resolved
Hide resolved
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using log.Fatal will terminate the entire program. These errors should be returned instead for proper pipeline error handling.
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After writing to the tar archive, this sends the original input data (b) instead of the tar archive buffer. This should send the tar writer's buffer contents (e.g., tarBuf.Bytes()) instead. The current implementation would send the uncompressed data rather than the tar archive.
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,66 @@ | ||||||||||||||||||||||||||||||||||
| package archive | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| import ( | ||||||||||||||||||||||||||||||||||
| "archive/zip" | ||||||||||||||||||||||||||||||||||
| "bytes" | ||||||||||||||||||||||||||||||||||
| "errors" | ||||||||||||||||||||||||||||||||||
| "io" | ||||||||||||||||||||||||||||||||||
| "log" | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/record" | ||||||||||||||||||||||||||||||||||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task" | ||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| type zipArchive struct { | ||||||||||||||||||||||||||||||||||
| *task.Base | ||||||||||||||||||||||||||||||||||
| FileName string | ||||||||||||||||||||||||||||||||||
| Record *record.Record | ||||||||||||||||||||||||||||||||||
| OutputChan chan<- *record.Record | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| func (z *zipArchive) Read(b []byte) { | ||||||||||||||||||||||||||||||||||
| r, err := zip.NewReader(bytes.NewReader(b), int64(len(b))) | ||||||||||||||||||||||||||||||||||
| if err != nil { | ||||||||||||||||||||||||||||||||||
| log.Fatal(err) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| for _, f := range r.File { | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // check the file type is regular file | ||||||||||||||||||||||||||||||||||
| if f.FileInfo().Mode().IsRegular() { | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| rc, err := f.Open() | ||||||||||||||||||||||||||||||||||
| if err != nil { | ||||||||||||||||||||||||||||||||||
| log.Fatal(err) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| buf := make([]byte, f.FileInfo().Size()) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| _, err = rc.Read(buf) | ||||||||||||||||||||||||||||||||||
| if err != nil && !errors.Is(err, io.EOF) { | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
| buf := make([]byte, f.FileInfo().Size()) | |
| _, err = rc.Read(buf) | |
| if err != nil && !errors.Is(err, io.EOF) { | |
| buf, err := io.ReadAll(rc) | |
| if err != nil { |
Outdated
Copilot
AI
Jan 30, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using log.Fatal in library/task code will abruptly terminate the entire application, which is inappropriate for a pipeline task. Errors should be propagated back through the Run method's return value to allow the calling code to handle them gracefully. The archiver interface methods should return errors, and the Run method should handle those errors appropriately.
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using log.Fatal will terminate the entire program. This error should be returned instead. Additionally, rc.Close() on line 43 should be deferred to ensure the file is closed even if an error occurs during Read. The pattern should be similar to other tasks in the codebase that return errors for proper error handling.
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This error check should not use log.Fatal as it terminates the entire program. Additionally, the validation should occur in UnmarshalYAML during configuration parsing, not during runtime execution. This check is redundant since validation already happens in archive.go at line 55-58.
| if z.FileName == "" { | |
| log.Fatal("file name is required to create zip archive") | |
| } |
Outdated
Copilot
AI
Jan 29, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Multiple error handling issues: (1) The error from zipWriter.Create is being silently ignored with the blank identifier. (2) The error from w.Write is being ignored. (3) The error from zipWriter.Close is being ignored. All of these errors should be checked and handled appropriately, similar to how the compress task handles errors.
| w, _ := zipWriter.Create(z.FileName) | |
| w.Write(b) | |
| zipWriter.Close() | |
| w, err := zipWriter.Create(z.FileName) | |
| if err != nil { | |
| log.Fatal(err) | |
| } | |
| if _, err := w.Write(b); err != nil { | |
| log.Fatal(err) | |
| } | |
| if err := zipWriter.Close(); err != nil { | |
| log.Fatal(err) | |
| } |
ma-gk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -26,7 +26,7 @@ var ( | |||||
| return io.NopCloser(snappy.NewReader(r)), nil | ||||||
| }, | ||||||
| NewWriter: func(w io.Writer) io.WriteCloser { | ||||||
| return snappy.NewWriter(w) | ||||||
| return snappy.NewBufferedWriter(w) | ||||||
|
||||||
| return snappy.NewBufferedWriter(w) | |
| return snappy.NewWriter(w) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ import ( | |
| "gopkg.in/yaml.v3" | ||
|
|
||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task" | ||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task/archive" | ||
|
||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task/aws/parameter_store" | ||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task/compress" | ||
| "github.com/patterninc/caterpillar/internal/pkg/pipeline/task/converter" | ||
|
|
@@ -33,6 +34,7 @@ type tasks []task.Task | |
| var ( | ||
| validate = validator.New() | ||
| supportedTasks = map[string]func() (task.Task, error){ | ||
| `archive`: archive.New, | ||
| `aws_parameter_store`: parameter_store.New, | ||
| `compress`: compress.New, | ||
| `converter`: converter.New, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| Albatross | ||
| Acorn Woodpecker | ||
| American Kestrel | ||
| Anna's Hummingbird | ||
| Bald Eagle | ||
| Baltimore Oriole | ||
| Barn Swallow | ||
| Belted Kingfisher | ||
| Bicolored Antbird | ||
| Black Capped Chickadee | ||
| Black Skimmer | ||
| Blue Jay | ||
| Bluebird | ||
| Bobolink | ||
| Bohemian Waxwing | ||
| Brown Creeper | ||
| Brown Pelican | ||
| Burrowing Owl | ||
| California Condor | ||
| California Quail | ||
| Canada Goose | ||
| Cardinal | ||
| Caspian Tern | ||
| Cedar Waxwing | ||
| Chestnut Sided Warbler | ||
| Chimney Swift | ||
| Chipping Sparrow | ||
| Clark's Nutcracker | ||
| Clay Colored Sparrow | ||
| Cliff Swallow | ||
| Columbiformes | ||
| Common Eider | ||
| Common Goldeneye | ||
| Common Grackle | ||
| Common Loon | ||
| Common Merganser | ||
| Common Raven | ||
| Common Tern | ||
| Common Yellowthroat | ||
| Coopers Hawk | ||
| Cory's Shearwater | ||
| Crested Flycatcher | ||
| Curve Billed Thrasher | ||
| Dark Eyed Junco | ||
| Dickcissel | ||
| Dovekie | ||
| Downy Woodpecker | ||
| Drab Seedeater | ||
| Dunnock | ||
| Eastern Bluebird | ||
| Eastern Meadowlark | ||
| Eastern Phoebe | ||
| Eastern Screech Owl | ||
| Eastern Towhee | ||
| Eastern Wood Pewee | ||
| Eared Grebe | ||
| Egyptian Plover | ||
| Elanus leucurus | ||
| Evening Grosbeak | ||
| Eared Quetzal | ||
| Eurasian Wigeon | ||
| Eurpean Starling | ||
ma-gk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Fabulous Flamingo | ||
| Ferruginous Hawk | ||
| Fiscal Flycatcher | ||
| Flammulated Owl | ||
| Flatbill | ||
| Flesh Footed Shearwater | ||
| Florida Jay | ||
| Fringilla coelebs | ||
| Fulmar | ||
| Gadwall | ||
| Gambel's Quail | ||
| Gannet | ||
| Garden Warbler | ||
| Gnatcatcher | ||
| Godwit | ||
| Golden Eagle | ||
| Golden Winged Warbler | ||
| Goldeneye | ||
| Goldfinch | ||
| Goosander | ||
| Goshawk | ||
| Grace's Warbler | ||
| Grasshopper Sparrow | ||
| Gray Catbird | ||
| Great Black Backed Gull | ||
| Great Blue Heron | ||
| Great Crested Flycatcher | ||
| Great Horned Owl | ||
| Great Kiskadee | ||
| Great Spotted Woodpecker | ||
| Great Tit | ||
| Grebe | ||
| Greenbul | ||
| Green Heron | ||
| Green Tailed Towhee | ||
| Green Winged Teal | ||
| Greenlet | ||
| Grey Kingbird | ||
| Grey Owl | ||
| Grosbeaks | ||
| Grouse | ||
| Gull | ||
| Hairy Woodpecker | ||
| Hammond's Flycatcher | ||
| Harris Hawk | ||
| Harris Sparrow | ||
| Hawaiian Creeper | ||
| Hawaiian Goose | ||
| Hawfinch | ||
| Heathland Francolin | ||
| Herring Gull | ||
| Hoary Puffleg | ||
| Hooded Merganser | ||
| Hooded Oriole | ||
| Hooded Warbler | ||
| Hoopoe | ||
| Horned Auk | ||
| Horned Grebe | ||
| Horned Lark | ||
| House Finch | ||
| House Sparrow | ||
| House Wren | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The archiver interface methods Read and Write don't return errors, which prevents proper error handling. These methods should return error values so that errors can be propagated back through the Run method to the caller. Compare with the compress task where compress/decompress methods return errors.