-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
[server] Fix file copy for objects larger than 5GB #7992
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,13 @@ import ( | |
| "time" | ||
| ) | ||
|
|
||
| const () | ||
| const ( | ||
| // AWS S3 CopyObject has a 5GB limit. For larger files, use multipart copy | ||
| maxCopyObjectSize = 5 * 1024 * 1024 * 1024 // 5GB in bytes | ||
| // Each part in multipart copy should be at least 5MB and can be up to 5GB (AWS requirement) | ||
| // Using maximum 5GB part size to minimize number of parts and API calls | ||
| multipartCopyPartSize = 5 * 1024 * 1024 * 1024 // 5GB in bytes (AWS max) | ||
| ) | ||
|
|
||
| type FileCopyController struct { | ||
| S3Config *s3config.S3Config | ||
|
|
@@ -190,6 +196,12 @@ func (fc *FileCopyController) createCopy(c *gin.Context, fcInternal fileCopyInte | |
|
|
||
| // Helper function for S3 object copying. | ||
| func copyS3Object(s3Client *s3.S3, bucket *string, req *copyS3ObjectReq) error { | ||
| // For files larger than 5GB, use multipart copy | ||
| if req.SourceS3Object.FileSize > maxCopyObjectSize { | ||
| return copyS3ObjectMultipart(s3Client, bucket, req) | ||
| } | ||
|
|
||
| // For files <= 5GB, use simple CopyObject | ||
| copySource := fmt.Sprintf("%s/%s", *bucket, req.SourceS3Object.ObjectKey) | ||
| copyInput := &s3.CopyObjectInput{ | ||
| Bucket: bucket, | ||
|
|
@@ -205,3 +217,100 @@ func copyS3Object(s3Client *s3.S3, bucket *string, req *copyS3ObjectReq) error { | |
| logrus.WithField("duration", elapsed).WithField("size", req.SourceS3Object.FileSize).Infof("copied (%s) from %s to %s", req.SourceS3Object.Type, copySource, req.DestObjectKey) | ||
| return nil | ||
| } | ||
|
|
||
| // copyS3ObjectMultipart copies large S3 objects (>5GB) using multipart upload | ||
| func copyS3ObjectMultipart(s3Client *s3.S3, bucket *string, req *copyS3ObjectReq) error { | ||
| copySource := fmt.Sprintf("%s/%s", *bucket, req.SourceS3Object.ObjectKey) | ||
| start := time.Now() | ||
|
|
||
| // Step 1: Initiate multipart upload | ||
| createOutput, err := s3Client.CreateMultipartUpload(&s3.CreateMultipartUploadInput{ | ||
| Bucket: bucket, | ||
| Key: &req.DestObjectKey, | ||
| }) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to initiate multipart upload for %s: %w", req.DestObjectKey, err) | ||
| } | ||
| uploadID := createOutput.UploadId | ||
|
|
||
| // Ensure we abort the multipart upload if something goes wrong | ||
| defer func() { | ||
| if err != nil { | ||
| abortInput := &s3.AbortMultipartUploadInput{ | ||
| Bucket: bucket, | ||
| Key: &req.DestObjectKey, | ||
| UploadId: uploadID, | ||
| } | ||
| if _, abortErr := s3Client.AbortMultipartUpload(abortInput); abortErr != nil { | ||
| logrus.WithError(abortErr).Errorf("failed to abort multipart upload %s", *uploadID) | ||
| } | ||
| } | ||
| }() | ||
|
|
||
| // Step 2: Upload parts | ||
| fileSize := req.SourceS3Object.FileSize | ||
| numParts := (fileSize + multipartCopyPartSize - 1) / multipartCopyPartSize | ||
| completedParts := make([]*s3.CompletedPart, numParts) | ||
|
|
||
|
Comment on lines
+251
to
+254
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The new multipart helper declares Useful? React with 👍 / 👎. |
||
| logrus.WithFields(logrus.Fields{ | ||
| "size": fileSize, | ||
| "numParts": numParts, | ||
| "partSize": multipartCopyPartSize, | ||
| }).Infof("starting multipart copy for (%s) from %s to %s", req.SourceS3Object.Type, copySource, req.DestObjectKey) | ||
|
|
||
| for i := int64(0); i < numParts; i++ { | ||
| partNumber := i + 1 | ||
| startByte := i * multipartCopyPartSize | ||
| endByte := startByte + multipartCopyPartSize - 1 | ||
| if endByte >= fileSize { | ||
| endByte = fileSize - 1 | ||
| } | ||
|
|
||
| copyRange := fmt.Sprintf("bytes=%d-%d", startByte, endByte) | ||
| uploadPartCopyInput := &s3.UploadPartCopyInput{ | ||
| Bucket: bucket, | ||
| CopySource: ©Source, | ||
| CopySourceRange: ©Range, | ||
| Key: &req.DestObjectKey, | ||
| PartNumber: &partNumber, | ||
| UploadId: uploadID, | ||
| } | ||
|
|
||
| uploadPartOutput, uploadErr := s3Client.UploadPartCopy(uploadPartCopyInput) | ||
| if uploadErr != nil { | ||
| err = fmt.Errorf("failed to upload part %d for %s: %w", partNumber, req.DestObjectKey, uploadErr) | ||
| return err | ||
| } | ||
|
|
||
| completedParts[i] = &s3.CompletedPart{ | ||
| ETag: uploadPartOutput.CopyPartResult.ETag, | ||
|
Comment on lines
+283
to
+286
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Inside the multipart copy loop Useful? React with 👍 / 👎. |
||
| PartNumber: &partNumber, | ||
| } | ||
|
|
||
| logrus.Debugf("uploaded part %d/%d for %s", partNumber, numParts, req.DestObjectKey) | ||
| } | ||
|
|
||
| // Step 3: Complete multipart upload | ||
| completeInput := &s3.CompleteMultipartUploadInput{ | ||
| Bucket: bucket, | ||
| Key: &req.DestObjectKey, | ||
| UploadId: uploadID, | ||
| MultipartUpload: &s3.CompletedMultipartUpload{ | ||
| Parts: completedParts, | ||
| }, | ||
| } | ||
|
|
||
| _, err = s3Client.CompleteMultipartUpload(completeInput) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to complete multipart upload for %s: %w", req.DestObjectKey, err) | ||
| } | ||
|
|
||
| elapsed := time.Since(start) | ||
| logrus.WithFields(logrus.Fields{ | ||
| "duration": elapsed, | ||
| "size": fileSize, | ||
| "numParts": numParts, | ||
| }).Infof("completed multipart copy for (%s) from %s to %s", req.SourceS3Object.Type, copySource, req.DestObjectKey) | ||
|
|
||
| return nil | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new multipart copy implementation declares
numParts := (fileSize + multipartCopyPartSize - 1) / multipartCopyPartSizeas anint64, and then uses it both as the length argument tomakeand as the index variable (completedParts[i]). Go’smakeand slice indices requireint, so this file no longer compiles (cannot use numParts (type int64) as type int). UntilnumParts(and the loop index) are converted toint,copyS3ObjectMultipartcannot even build, preventing the server from compiling at all.Useful? React with 👍 / 👎.