-
-
Notifications
You must be signed in to change notification settings - Fork 512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: more robust Reusable containers experience #2768
base: main
Are you sure you want to change the base?
Changes from 21 commits
4902c76
10daab5
ed3e1fa
442dd4a
969fe59
c2f5e83
dcbb467
c07b5dc
d1d40ce
7aa26da
35da30a
0ffec34
7a505ba
a544bc0
c9356ed
1f66181
31e54b9
44b1b73
781ade7
e975ae9
dd91901
799ee15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1122,29 +1122,80 @@ func (p *DockerProvider) CreateContainer(ctx context.Context, req ContainerReque | |
|
||
req.LifecycleHooks = []ContainerLifecycleHooks{combineContainerHooks(defaultHooks, req.LifecycleHooks)} | ||
|
||
err = req.creatingHook(ctx) | ||
if err != nil { | ||
return nil, err | ||
if req.Reuse { | ||
// Remove the SessionID label from the request, as we don't want Ryuk to control | ||
// the container lifecycle in the case of reusing containers. | ||
delete(req.Labels, core.LabelSessionID) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed to not remove the sessionId but an specific label when ryuk is enabled. TBD |
||
} | ||
|
||
var resp container.CreateResponse | ||
if req.Reuse { | ||
Comment on lines
+1129
to
+1132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: combine the two if's if the above isn't removed, based on previous question |
||
// we must protect the reusability of the container in the case it's invoked | ||
// in a parallel execution, via ParallelContainers or t.Parallel() | ||
reuseContainerMx.Lock() | ||
defer reuseContainerMx.Unlock() | ||
Comment on lines
+1135
to
+1136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: This will lock for the duration of rest of the call, is that the intention? |
||
|
||
// calculate the hash, and add the labels, just before creating the container | ||
hash := req.hash() | ||
req.Labels[core.LabelContainerHash] = fmt.Sprintf("%d", hash.Hash) | ||
req.Labels[core.LabelCopiedFilesHash] = fmt.Sprintf("%d", hash.FilesHash) | ||
Comment on lines
+1140
to
+1141
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: do we need multiple hashes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is aligned with the Java implementation. We need these two in order to keep track of the added files before the container is started (Files attribute in the container request) |
||
|
||
// in the case different test programs are creating a container with the same hash, | ||
// we must check if the container is already created. For that we wait up to 5 seconds | ||
// for the container to be created. If the error means the container is not found, we | ||
// can proceed with the creation of the container. | ||
// This is needed because we need to synchronize the creation of the container across | ||
// different test programs. | ||
c, err := p.waitContainerCreationInTimeout(ctx, hash, 5*time.Second) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see this as a potential configuration:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the PR description says:
wouldn't we want to accept this limitation and allow for the race condition with the current implementation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed. Added that comment as an idea for a potential follow-up |
||
if err != nil && !errdefs.IsNotFound(err) { | ||
// another error occurred different from not found, so we return the error | ||
return nil, err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Can we wrap so we know where it came from |
||
} | ||
|
||
// Create a new container if the request is to reuse the container, but there is no container found by hash | ||
if c != nil { | ||
resp.ID = c.ID | ||
|
||
// replace the logging messages for reused containers: | ||
// we know the first lifecycle hook is the logger hook, | ||
// so it's safe to replace its first message for reused containers. | ||
Comment on lines
+1159
to
+1161
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Is that always valid, could a user not have customised this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are default hooks and user defined hooks |
||
req.LifecycleHooks[0].PreCreates[0] = func(ctx context.Context, req ContainerRequest) error { | ||
Logger.Printf("🔥 Reusing container: %s", resp.ID[:12]) | ||
return nil | ||
} | ||
req.LifecycleHooks[0].PostCreates[0] = func(ctx context.Context, c Container) error { | ||
Logger.Printf("🔥 Container reused: %s", resp.ID[:12]) | ||
return nil | ||
} | ||
} | ||
} | ||
|
||
resp, err := p.client.ContainerCreate(ctx, dockerInput, hostConfig, networkingConfig, platform, req.Name) | ||
if err != nil { | ||
return nil, fmt.Errorf("container create: %w", err) | ||
} | ||
|
||
// #248: If there is more than one network specified in the request attach newly created container to them one by one | ||
if len(req.Networks) > 1 { | ||
for _, n := range req.Networks[1:] { | ||
nw, err := p.GetNetwork(ctx, NetworkRequest{ | ||
Name: n, | ||
}) | ||
if err == nil { | ||
endpointSetting := network.EndpointSettings{ | ||
Aliases: req.NetworkAliases[n], | ||
} | ||
err = p.client.NetworkConnect(ctx, nw.ID, resp.ID, &endpointSetting) | ||
if err != nil { | ||
return nil, fmt.Errorf("network connect: %w", err) | ||
// If the container was not found by hash, create a new one | ||
if resp.ID == "" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: this function is getting too large, we should look to split it out into more consumable pieces. |
||
err = req.creatingHook(ctx) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
resp, err = p.client.ContainerCreate(ctx, dockerInput, hostConfig, networkingConfig, platform, req.Name) | ||
if err != nil { | ||
return nil, fmt.Errorf("container create: %w", err) | ||
} | ||
|
||
// #248: If there is more than one network specified in the request attach newly created container to them one by one | ||
if len(req.Networks) > 1 { | ||
for _, n := range req.Networks[1:] { | ||
nw, err := p.GetNetwork(ctx, NetworkRequest{ | ||
Name: n, | ||
}) | ||
if err == nil { | ||
endpointSetting := network.EndpointSettings{ | ||
Aliases: req.NetworkAliases[n], | ||
} | ||
err = p.client.NetworkConnect(ctx, nw.ID, resp.ID, &endpointSetting) | ||
if err != nil { | ||
return nil, fmt.Errorf("network connect: %w", err) | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -1194,10 +1245,35 @@ func (p *DockerProvider) findContainerByName(ctx context.Context, name string) ( | |
return nil, nil | ||
} | ||
|
||
func (p *DockerProvider) waitContainerCreation(ctx context.Context, name string) (*types.Container, error) { | ||
func (p *DockerProvider) findContainerByHash(ctx context.Context, ch containerHash) (*types.Container, error) { | ||
filter := filters.NewArgs( | ||
filters.Arg("label", fmt.Sprintf("%s=%d", core.LabelContainerHash, ch.Hash)), | ||
filters.Arg("label", fmt.Sprintf("%s=%d", core.LabelCopiedFilesHash, ch.FilesHash)), | ||
) | ||
|
||
containers, err := p.client.ContainerList(ctx, container.ListOptions{Filters: filter}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer p.Close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: It seems odd to close the client here, could you clarify why that's needed? |
||
|
||
if len(containers) > 0 { | ||
return &containers[0], nil | ||
} | ||
return nil, nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: return a not found error. |
||
} | ||
|
||
func (p *DockerProvider) waitContainerCreation(ctx context.Context, hash containerHash) (*types.Container, error) { | ||
return p.waitContainerCreationInTimeout(ctx, hash, 5*time.Second) | ||
} | ||
|
||
func (p *DockerProvider) waitContainerCreationInTimeout(ctx context.Context, hash containerHash, timeout time.Duration) (*types.Container, error) { | ||
exp := backoff.NewExponentialBackOff() | ||
exp.MaxElapsedTime = timeout | ||
|
||
return backoff.RetryNotifyWithData( | ||
func() (*types.Container, error) { | ||
c, err := p.findContainerByName(ctx, name) | ||
c, err := p.findContainerByHash(ctx, hash) | ||
if err != nil { | ||
if !errdefs.IsNotFound(err) && isPermanentClientError(err) { | ||
return nil, backoff.Permanent(err) | ||
|
@@ -1206,11 +1282,11 @@ func (p *DockerProvider) waitContainerCreation(ctx context.Context, name string) | |
} | ||
|
||
if c == nil { | ||
return nil, errdefs.NotFound(fmt.Errorf("container %s not found", name)) | ||
return nil, errdefs.NotFound(fmt.Errorf("container %v not found", hash)) | ||
} | ||
return c, nil | ||
}, | ||
backoff.WithContext(backoff.NewExponentialBackOff(), ctx), | ||
backoff.WithContext(exp, ctx), | ||
func(err error, duration time.Duration) { | ||
if errdefs.IsNotFound(err) { | ||
return | ||
|
@@ -1220,8 +1296,10 @@ func (p *DockerProvider) waitContainerCreation(ctx context.Context, name string) | |
) | ||
} | ||
|
||
// Deprecated: it will be removed in the next major release. | ||
func (p *DockerProvider) ReuseOrCreateContainer(ctx context.Context, req ContainerRequest) (Container, error) { | ||
c, err := p.findContainerByName(ctx, req.Name) | ||
hash := req.hash() | ||
c, err := p.findContainerByHash(ctx, hash) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
@@ -1233,7 +1311,7 @@ func (p *DockerProvider) ReuseOrCreateContainer(ctx context.Context, req Contain | |
if !createContainerFailDueToNameConflictRegex.MatchString(err.Error()) { | ||
return nil, err | ||
} | ||
c, err = p.waitContainerCreation(ctx, req.Name) | ||
c, err = p.waitContainerCreation(ctx, hash) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question: Not sure about this, I would expect reused containers to still only persist while in use, is that the intent?
The edge case for shutdown while still in use, is addressed by reaper rework PRs, so it should be safe to remove this if the desired behaviour is to share between test runs that are within a reasonable window.