Skip to content

Commit d861874

Browse files
author
averiewang
committed
Merge branch 'fix/averiewang/uploadfile-by-io' into 'main' (merge request !76)
fix: upload file by io.Reader
2 parents 6840f9f + d0b9135 commit d861874

File tree

6 files changed

+162
-49
lines changed

6 files changed

+162
-49
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## v1.6.2
4+
* fix: upload file by io.Reader
5+
36
## v1.6.1
47
* feat: support configuring autoId for the primary key in collection, which can only be set to uuid
58
* feat: support a new field indexType json, allowing for one layer of JSON Fields. JSON keys mush be string, while values ​​can be string, uint64 or array

example/collection_upload_file_demo/main.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,17 @@ func (d *Demo) CreateDBAndCollection(ctx context.Context, database, collection s
8282
func (d *Demo) UploadFile(ctx context.Context, database, collection, localFilePath string) error {
8383
appendKeywordsToChunk := false
8484
appendTitleToChunk := false
85+
86+
// filename := filepath.Base(localFilePath)
87+
// fd, err := os.Open(localFilePath)
88+
// if err != nil {
89+
// return err
90+
// }
91+
8592
param := tcvectordb.UploadFileParams{
8693
LocalFilePath: localFilePath,
94+
//FileName: filename,
95+
//Reader: fd,
8796
SplitterPreprocess: ai_document_set.DocumentSplitterPreprocess{
8897
AppendKeywordsToChunk: &appendKeywordsToChunk,
8998
AppendTitleToChunk: &appendTitleToChunk,
@@ -160,7 +169,6 @@ func main() {
160169

161170
ctx := context.Background()
162171
testVdb, err := NewDemo("vdb http url or ip and port", "vdb username", "key get from web console")
163-
printErr(err)
164172
err = testVdb.CreateDBAndCollection(ctx, database, collectionName)
165173
printErr(err)
166174
err = testVdb.UploadFile(ctx, database, collectionName, localFilePath)

tcvectordb/ai_document_sets.go

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package tcvectordb
2020

2121
import (
22+
"bytes"
2223
"context"
2324
"encoding/base64"
2425
"encoding/json"
@@ -607,10 +608,11 @@ func (i *implementerAIDocumentSets) LoadAndSplitText(ctx context.Context, param
607608
return nil, BaseDbTypeError
608609
}
609610

610-
size, err := i.loadAndSplitTextCheckParams(&param)
611+
size, reader, err := i.loadAndSplitTextCheckParams(&param)
611612
if err != nil {
612613
return nil, err
613614
}
615+
defer reader.Close()
614616

615617
res, err := i.GetCosTmpSecret(ctx, GetCosTmpSecretParams{
616618
DocumentSetName: param.DocumentSetName,
@@ -666,33 +668,48 @@ func (i *implementerAIDocumentSets) LoadAndSplitText(ctx context.Context, param
666668
return nil, fmt.Errorf("cos header for param MetaData is too large, it can not be more than 2k")
667669
}
668670

669-
opt := &cos.MultiUploadOptions{
670-
OptIni: &cos.InitiateMultipartUploadOptions{
671-
nil,
672-
&cos.ObjectPutHeaderOptions{
673-
XCosMetaXXX: &header,
674-
//Listener: &cos.DefaultProgressListener{},
671+
if param.LocalFilePath != "" {
672+
// upload file by reading local file path, which supports multi parts uploading
673+
opt := &cos.MultiUploadOptions{
674+
OptIni: &cos.InitiateMultipartUploadOptions{
675+
nil,
676+
&cos.ObjectPutHeaderOptions{
677+
XCosMetaXXX: &header,
678+
//Listener: &cos.DefaultProgressListener{},
679+
},
675680
},
676-
},
677-
// Whether to enable resume from breakpoint, default is false
678-
CheckPoint: true,
679-
PartSize: 5,
680-
}
681+
// Whether to enable resume from breakpoint, default is false
682+
CheckPoint: true,
683+
PartSize: 5,
684+
}
681685

682-
_, _, err = c.Object.Upload(ctx, res.UploadPath, param.LocalFilePath, opt)
683-
if err != nil {
684-
return nil, err
686+
_, _, err = c.Object.Upload(ctx, res.UploadPath, param.LocalFilePath, opt)
687+
if err != nil {
688+
return nil, err
689+
}
690+
} else {
691+
// upload file by io.reader
692+
opt := &cos.ObjectPutOptions{
693+
ObjectPutHeaderOptions: &cos.ObjectPutHeaderOptions{
694+
ContentLength: size,
695+
XCosMetaXXX: &header,
696+
},
697+
}
698+
_, err = c.Object.Put(ctx, res.UploadPath, reader, opt)
699+
if err != nil {
700+
return nil, err
701+
}
685702
}
686703

687704
result = new(LoadAndSplitTextResult)
688705
result.GetCosTmpSecretResult = *res
689706
return result, nil
690707
}
691708

692-
func (i *implementerAIDocumentSets) loadAndSplitTextCheckParams(param *LoadAndSplitTextParams) (size int64, err error) {
709+
func (i *implementerAIDocumentSets) loadAndSplitTextCheckParams(param *LoadAndSplitTextParams) (size int64, reader io.ReadCloser, err error) {
693710
if param.DocumentSetName == "" {
694711
if param.LocalFilePath == "" {
695-
return 0, errors.New("need param: DocumentSetName")
712+
return 0, nil, errors.New("need param: DocumentSetName")
696713
}
697714
param.DocumentSetName = filepath.Base(param.LocalFilePath)
698715
}
@@ -706,17 +723,33 @@ func (i *implementerAIDocumentSets) loadAndSplitTextCheckParams(param *LoadAndSp
706723
"because only markdown filetype supports defining ChunkSplitter")
707724
}
708725

709-
fileInfo, err := os.Stat(param.LocalFilePath)
710-
if err != nil {
711-
return 0, errors.Errorf("get file size failed. err: %v", err.Error())
726+
if param.LocalFilePath != "" {
727+
fd, err := os.Open(param.LocalFilePath)
728+
if err != nil {
729+
return 0, nil, err
730+
}
731+
reader = fd
732+
fstat, err := fd.Stat()
733+
if err != nil {
734+
return 0, nil, err
735+
}
736+
size = fstat.Size()
737+
} else {
738+
bytesBuf := bytes.NewBuffer(nil)
739+
written, err := io.Copy(bytesBuf, param.Reader)
740+
if err != nil {
741+
return 0, nil, err
742+
}
743+
744+
size = written
745+
reader = io.NopCloser(bytesBuf)
712746
}
713-
size = fileInfo.Size()
714747

715748
if size == 0 {
716-
return 0, errors.New("file size cannot be 0")
749+
return 0, nil, errors.New("file size cannot be 0")
717750
}
718751

719-
return size, nil
752+
return size, reader, nil
720753
}
721754

722755
func (i *implementerAIDocumentSets) toDocumentSet(item ai_document_set.QueryDocumentSet) *AIDocumentSet {

tcvectordb/base_flat.go

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
package tcvectordb
2020

2121
import (
22+
"bytes"
2223
"context"
2324
"encoding/base64"
2425
"encoding/json"
2526
"fmt"
27+
"io"
2628
"log"
2729
"net/http"
2830
"net/url"
@@ -91,7 +93,6 @@ type FlatInterface interface {
9193
// [ChangePassword] changes the password for the specific user.
9294
ChangePassword(ctx context.Context, param ChangePasswordParams) error
9395

94-
// []
9596
UploadFile(ctx context.Context, databaseName, collectionName string, param UploadFileParams) (result *UploadFileResult, err error)
9697

9798
GetImageUrl(ctx context.Context, databaseName, collectionName string,
@@ -334,6 +335,7 @@ func (i *implementerFlatDocument) ChangePassword(ctx context.Context, param Chan
334335
type UploadFileParams struct {
335336
FileName string
336337
LocalFilePath string
338+
Reader io.Reader
337339
SplitterPreprocess ai_document_set.DocumentSplitterPreprocess
338340
EmbeddingModel string
339341
ParsingProcess *api.ParsingProcess
@@ -355,10 +357,10 @@ func (i *implementerFlatDocument) UploadFile(ctx context.Context, databaseName,
355357
return uploadFile(ctx, i, databaseName, collectionName, param)
356358
}
357359

358-
func checkUploadFileParam(ctx context.Context, param *UploadFileParams) (size int64, err error) {
360+
func checkUploadFileParam(ctx context.Context, param *UploadFileParams) (size int64, reader io.ReadCloser, err error) {
359361
if param.FileName == "" {
360362
if param.LocalFilePath == "" {
361-
return 0, errors.New("need param: FileName or LocalFilePath")
363+
return 0, nil, errors.New("need param: FileName or LocalFilePath")
362364
}
363365
param.FileName = filepath.Base(param.LocalFilePath)
364366
}
@@ -371,24 +373,41 @@ func checkUploadFileParam(ctx context.Context, param *UploadFileParams) (size in
371373
log.Printf("[Warning] %s", "param SplitterPreprocess.ChunkSplitter will be ommitted, "+
372374
"because only markdown filetype supports defining ChunkSplitter")
373375
}
376+
if param.LocalFilePath != "" {
377+
fd, err := os.Open(param.LocalFilePath)
378+
if err != nil {
379+
return 0, nil, err
380+
}
381+
reader = fd
382+
fstat, err := fd.Stat()
383+
if err != nil {
384+
return 0, nil, err
385+
}
386+
size = fstat.Size()
387+
} else {
388+
bytesBuf := bytes.NewBuffer(nil)
389+
written, err := io.Copy(bytesBuf, param.Reader)
390+
if err != nil {
391+
return 0, nil, err
392+
}
374393

375-
fileInfo, err := os.Stat(param.LocalFilePath)
376-
if err != nil {
377-
return 0, errors.Errorf("get file size failed. err: %v", err.Error())
394+
size = written
395+
reader = io.NopCloser(bytesBuf)
378396
}
379-
size = fileInfo.Size()
380397

381398
if size == 0 {
382-
return 0, errors.New("file size cannot be 0")
399+
return 0, nil, errors.New("file size cannot be 0")
383400
}
384-
return size, nil
401+
402+
return size, reader, nil
385403
}
386404
func uploadFile(ctx context.Context, cli SdkClient, databaseName, collectionName string,
387405
param UploadFileParams) (result *UploadFileResult, err error) {
388-
size, err := checkUploadFileParam(ctx, &param)
406+
size, reader, err := checkUploadFileParam(ctx, &param)
389407
if err != nil {
390408
return nil, err
391409
}
410+
defer reader.Close()
392411

393412
req := new(document.UploadUrlReq)
394413
req.Database = databaseName
@@ -457,22 +476,36 @@ func uploadFile(ctx context.Context, cli SdkClient, databaseName, collectionName
457476
return nil, fmt.Errorf("cos header for param MetaData is too large, it can not be more than 2k")
458477
}
459478

460-
opt := &cos.MultiUploadOptions{
461-
OptIni: &cos.InitiateMultipartUploadOptions{
462-
nil,
463-
&cos.ObjectPutHeaderOptions{
464-
XCosMetaXXX: &header,
465-
//Listener: &cos.DefaultProgressListener{},
479+
if param.LocalFilePath != "" {
480+
opt := &cos.MultiUploadOptions{
481+
OptIni: &cos.InitiateMultipartUploadOptions{
482+
nil,
483+
&cos.ObjectPutHeaderOptions{
484+
XCosMetaXXX: &header,
485+
//Listener: &cos.DefaultProgressListener{},
486+
},
466487
},
467-
},
468-
// Whether to enable resume from breakpoint, default is false
469-
CheckPoint: true,
470-
PartSize: 5,
471-
}
488+
// Whether to enable resume from breakpoint, default is false
489+
CheckPoint: true,
490+
PartSize: 5,
491+
}
472492

473-
_, _, err = c.Object.Upload(ctx, res.UploadPath, param.LocalFilePath, opt)
474-
if err != nil {
475-
return nil, err
493+
_, _, err = c.Object.Upload(ctx, res.UploadPath, param.LocalFilePath, opt)
494+
if err != nil {
495+
return nil, err
496+
}
497+
} else {
498+
499+
opt := &cos.ObjectPutOptions{
500+
ObjectPutHeaderOptions: &cos.ObjectPutHeaderOptions{
501+
ContentLength: size,
502+
XCosMetaXXX: &header,
503+
},
504+
}
505+
_, err = c.Object.Put(ctx, res.UploadPath, reader, opt)
506+
if err != nil {
507+
return nil, err
508+
}
476509
}
477510

478511
return result, nil

tcvectordb/version.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@
1818

1919
package tcvectordb
2020

21-
const SDKVersion = "v1.6.1"
21+
const SDKVersion = "v1.6.2"

test/aidb_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"encoding/json"
2323
"fmt"
2424
"log"
25+
"os"
2526
"strings"
2627
"testing"
2728
"time"
@@ -171,6 +172,41 @@ func TestLoadAndSplitText(t *testing.T) {
171172
t.Logf("%+v", result)
172173
}
173174

175+
func TestLoadAndSplitTextByReader(t *testing.T) {
176+
defer cli.Close()
177+
178+
col := cli.AIDatabase(aiDatabase).CollectionView(collectionViewName)
179+
180+
metaData := map[string]interface{}{
181+
// 元数据只支持string、uint64类型的值
182+
"author_name": "sam",
183+
"fileKey": 1024}
184+
185+
appendTitleToChunk := false
186+
appendKeywordsToChunk := true
187+
chunkSplitter := "\n\n"
188+
189+
fd, err := os.Open("../example/demo_files/tcvdb.md")
190+
printErr(err)
191+
192+
result, err := col.LoadAndSplitText(ctx, tcvectordb.LoadAndSplitTextParams{
193+
DocumentSetName: "tcvdb.md",
194+
Reader: fd,
195+
//LocalFilePath: "../example/demo_files/tcvdb.md",
196+
MetaData: metaData,
197+
SplitterPreprocess: ai_document_set.DocumentSplitterPreprocess{
198+
ChunkSplitter: &chunkSplitter,
199+
AppendTitleToChunk: &appendTitleToChunk,
200+
AppendKeywordsToChunk: &appendKeywordsToChunk,
201+
},
202+
ParsingProcess: &api.ParsingProcess{
203+
ParsingType: string(tcvectordb.VisionModelParsing),
204+
},
205+
})
206+
printErr(err)
207+
t.Logf("%+v", result)
208+
}
209+
174210
func TestAIGetDocumentSet(t *testing.T) {
175211
time.Sleep(10 * time.Second)
176212
col := cli.AIDatabase(aiDatabase).CollectionView(collectionViewName)

0 commit comments

Comments
 (0)