-
Notifications
You must be signed in to change notification settings - Fork 464
fix: resolve race condition in file state verification and improve error handling #501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
fc571f4
ce235f8
ff65c41
97ffb2e
d1cc3f3
b036ac5
d12ed8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ import ( | |
| "github.com/uber/kraken/lib/store" | ||
| "github.com/uber/kraken/lib/torrent/scheduler" | ||
| "github.com/uber/kraken/tracker/announceclient" | ||
| "github.com/uber/kraken/utils/closers" | ||
| "github.com/uber/kraken/utils/handler" | ||
| "github.com/uber/kraken/utils/httputil" | ||
|
|
||
|
|
@@ -66,7 +67,6 @@ func New( | |
| tags tagclient.Client, | ||
| ac announceclient.Client, | ||
| containerRuntime containerruntime.Factory) *Server { | ||
|
|
||
| stats = stats.Tagged(map[string]string{ | ||
| "module": "agentserver", | ||
| }) | ||
|
|
@@ -118,13 +118,15 @@ func (s *Server) getTagHandler(w http.ResponseWriter, r *http.Request) error { | |
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| d, err := s.tags.Get(tag) | ||
| if err == tagclient.ErrTagNotFound { | ||
| return handler.ErrorStatus(http.StatusNotFound) | ||
| } | ||
| if err != nil { | ||
| if err == tagclient.ErrTagNotFound { | ||
| return handler.ErrorStatus(http.StatusNotFound) | ||
| } | ||
| return handler.Errorf("get tag: %s", err) | ||
| } | ||
|
|
||
| io.WriteString(w, d.String()) | ||
| return nil | ||
| } | ||
|
|
@@ -139,25 +141,42 @@ func (s *Server) downloadBlobHandler(w http.ResponseWriter, r *http.Request) err | |
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| f, err := s.cads.Cache().GetFileReader(d.Hex()) | ||
| if err != nil { | ||
| if os.IsNotExist(err) || s.cads.InDownloadError(err) { | ||
| if err := s.sched.Download(namespace, d); err != nil { | ||
| if err == scheduler.ErrTorrentNotFound { | ||
| return handler.ErrorStatus(http.StatusNotFound) | ||
| } | ||
| return handler.Errorf("download torrent: %s", err) | ||
| } | ||
| f, err = s.cads.Cache().GetFileReader(d.Hex()) | ||
| if err != nil { | ||
| return handler.Errorf("store: %s", err) | ||
| } | ||
| } else { | ||
| return handler.Errorf("store: %s", err) | ||
|
|
||
| // Happy path: file already exists in cache | ||
|
||
| if err == nil { | ||
| defer closers.Close(f) | ||
| if _, err := io.Copy(w, f); err != nil { | ||
| return fmt.Errorf("copy file: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // If error is not recoverable, return error | ||
| if !os.IsNotExist(err) && !s.cads.InDownloadError(err) { | ||
| return handler.Errorf("store: %s", err) | ||
hweawer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| // File doesn't exist or is in wrong state, trigger P2P download | ||
| if err := s.sched.Download(namespace, d); err != nil { | ||
| if err == scheduler.ErrTorrentNotFound { | ||
| return handler.ErrorStatus(http.StatusNotFound) | ||
| } | ||
| return handler.Errorf("download torrent: %s", err) | ||
hweawer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| // Get file reader after download completes | ||
| // Use Any() to check both download and cache directories, as the file | ||
| // might still be in the process of being moved from download to cache. | ||
| f, err = s.cads.Any().GetFileReader(d.Hex()) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
But |
||
| if err != nil { | ||
| return handler.Errorf("store: %s", err) | ||
hweawer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| defer closers.Close(f) | ||
|
|
||
| if _, err := io.Copy(w, f); err != nil { | ||
| return fmt.Errorf("copy file: %s", err) | ||
| return fmt.Errorf("copy file: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,46 +41,96 @@ func NewReadOnlyTransferer( | |
| cads *store.CADownloadStore, | ||
| tags tagclient.Client, | ||
| sched scheduler.Scheduler) *ReadOnlyTransferer { | ||
|
|
||
| stats = stats.Tagged(map[string]string{ | ||
| "module": "rotransferer", | ||
| }) | ||
|
|
||
| return &ReadOnlyTransferer{stats, cads, tags, sched} | ||
| } | ||
|
|
||
| // mapSchedulerError converts scheduler errors to appropriate transferer errors. | ||
| func mapSchedulerError(err error, d core.Digest) error { | ||
| // torrent not found → 404 | ||
| if err == scheduler.ErrTorrentNotFound { | ||
| return ErrBlobNotFound{ | ||
| Digest: d.Hex(), | ||
| Reason: "torrent not found in tracker", | ||
| } | ||
| } | ||
|
|
||
| // All other scheduler errors → 500 with context | ||
| return fmt.Errorf("download blob %s: %w", d.Hex(), err) | ||
| } | ||
|
|
||
| // Stat returns blob info from local cache, and triggers download if the blob is | ||
| // not available locally. | ||
| func (t *ReadOnlyTransferer) Stat(namespace string, d core.Digest) (*core.BlobInfo, error) { | ||
| fi, err := t.cads.Cache().GetFileStat(d.Hex()) | ||
| if os.IsNotExist(err) || t.cads.InDownloadError(err) { | ||
| if err := t.sched.Download(namespace, d); err != nil { | ||
| return nil, fmt.Errorf("scheduler: %s", err) | ||
| } | ||
| fi, err = t.cads.Cache().GetFileStat(d.Hex()) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("stat cache: %s", err) | ||
|
|
||
| // Happy path: file already exists in cache | ||
|
||
| if err == nil { | ||
| return core.NewBlobInfo(fi.Size()), nil | ||
| } | ||
|
|
||
| // If error is not recoverable, return error | ||
| if !os.IsNotExist(err) && !t.cads.InDownloadError(err) { | ||
| return nil, fmt.Errorf("stat cache: %w", err) | ||
| } | ||
|
|
||
| // File doesn't exist or is in wrong state, trigger P2P download | ||
| if err := t.sched.Download(namespace, d); err != nil { | ||
| return nil, mapSchedulerError(err, d) | ||
| } | ||
|
|
||
| // Stat file after download completes | ||
| // Use Any() to check both download and cache directories, as the file | ||
| // might still be in the process of being moved from download to cache. | ||
| fi, err = t.cads.Any().GetFileStat(d.Hex()) | ||
| if err == nil { | ||
| return core.NewBlobInfo(fi.Size()), nil | ||
| } | ||
| if os.IsNotExist(err) { | ||
| return nil, ErrBlobNotFound{ | ||
| Digest: d.Hex(), | ||
| Reason: "file not found after download", | ||
| } | ||
| } else if err != nil { | ||
| return nil, fmt.Errorf("stat cache: %s", err) | ||
| } | ||
| return core.NewBlobInfo(fi.Size()), nil | ||
| return nil, fmt.Errorf("stat cache after download: %w", err) | ||
| } | ||
|
|
||
| // Download downloads blobs as torrent. | ||
| func (t *ReadOnlyTransferer) Download(namespace string, d core.Digest) (store.FileReader, error) { | ||
| f, err := t.cads.Cache().GetFileReader(d.Hex()) | ||
| if os.IsNotExist(err) || t.cads.InDownloadError(err) { | ||
| if err := t.sched.Download(namespace, d); err != nil { | ||
| return nil, fmt.Errorf("scheduler: %s", err) | ||
| } | ||
| f, err = t.cads.Cache().GetFileReader(d.Hex()) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("cache: %s", err) | ||
|
|
||
| // Happy path: file already exists in cache | ||
| if err == nil { | ||
| return f, nil | ||
| } | ||
|
|
||
| // If error is not recoverable, return error | ||
| if !os.IsNotExist(err) && !t.cads.InDownloadError(err) { | ||
| return nil, fmt.Errorf("get cache file: %w", err) | ||
| } | ||
|
|
||
| // File doesn't exist or is in wrong state, trigger P2P download | ||
| if err := t.sched.Download(namespace, d); err != nil { | ||
| return nil, mapSchedulerError(err, d) | ||
| } | ||
|
|
||
| // Get file reader after download completes | ||
| // Use Any() to check both download and cache directories, as the file | ||
| // might still be in the process of being moved from download to cache. | ||
| f, err = t.cads.Any().GetFileReader(d.Hex()) | ||
| if err != nil { | ||
| if os.IsNotExist(err) { | ||
| return nil, ErrBlobNotFound{ | ||
| Digest: d.Hex(), | ||
| Reason: "file not found after download", | ||
hweawer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| } else if err != nil { | ||
| return nil, fmt.Errorf("cache: %s", err) | ||
| return nil, fmt.Errorf("get file reader after download: %w", err) | ||
| } | ||
|
|
||
| return f, nil | ||
| } | ||
|
|
||
|
|
@@ -92,15 +142,18 @@ func (t *ReadOnlyTransferer) Upload(namespace string, d core.Digest, blob store. | |
| // GetTag gets manifest digest for tag. | ||
| func (t *ReadOnlyTransferer) GetTag(tag string) (core.Digest, error) { | ||
| d, err := t.tags.Get(tag) | ||
| if err != nil { | ||
| if err == tagclient.ErrTagNotFound { | ||
| t.stats.Counter("tag_not_found").Inc(1) | ||
| return core.Digest{}, ErrTagNotFound | ||
| if err == nil { | ||
| return d, nil | ||
| } | ||
| if err == tagclient.ErrTagNotFound { | ||
| t.stats.Counter("tag_not_found").Inc(1) | ||
| return core.Digest{}, ErrTagNotFound{ | ||
| Tag: tag, | ||
| Reason: "not found in build-index", | ||
| } | ||
| t.stats.Counter("get_tag_error").Inc(1) | ||
| return core.Digest{}, fmt.Errorf("client get tag: %s", err) | ||
| } | ||
| return d, nil | ||
| t.stats.Counter("get_tag_error").Inc(1) | ||
| return core.Digest{}, fmt.Errorf("client get tag: %w", err) | ||
| } | ||
|
|
||
| // PutTag is not supported. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.