Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions ext/sheets/larkdrive/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package larkdrive

import (
"context"
"fmt"
"regexp"

"github.com/goto/optimus/ext/sheets/lark"

Check failure on line 8 in ext/sheets/larkdrive/client.go

View workflow job for this annotation

GitHub Actions / lint

File is not properly formatted (gci)
"github.com/goto/optimus/internal/errors"
"github.com/goto/salt/log"

larksdk "github.com/larksuite/oapi-sdk-go/v3"
larkdrive "github.com/larksuite/oapi-sdk-go/v3/service/drive/v1"
)

const EntityLarkDrive = "larkdrive"

var driveIDRegexp = regexp.MustCompile(`larksuite\.com\/drive\/folder\/([a-zA-Z0-9]+)`)

type SheetCallback func(file *Sheet) error

type Client struct {
client *larksdk.Client
sheetClient *lark.Client
}

func (*Client) GetFolderToken(url string) (string, error) {
var folderToken string

if !driveIDRegexp.MatchString(url) {
return folderToken, errors.NotFound(EntityLarkDrive, "invalid Lark Drive URL format")
}

subMatched := driveIDRegexp.FindStringSubmatch(url)
if len(subMatched) > 1 {
folderToken = subMatched[1]
return folderToken, nil
}

return folderToken, errors.NotFound(EntityLarkDrive, "failed to extract folder token from URL")
}

func (c *Client) IterateSheet(ctx context.Context, folderToken string, callback SheetCallback) error {
builder := larkdrive.NewListFileReqBuilder().
PageSize(100).
FolderToken(folderToken).
OrderBy("EditedTime").
Direction("DESC")

for {
req := builder.Build()

resp, err := c.client.Drive.File.List(ctx, req)
if err != nil {
return errors.AddErrContext(err, EntityLarkDrive, "failed to list files from lark folder")
}

if !resp.Success() {
return errors.NewError(ErrCodeToErrorType(resp.CodeError), EntityLarkDrive, fmt.Sprintf("failed to list files with error: %s", resp.ErrorResp()))
}

for _, file := range resp.Data.Files {
sheet, err := NewSheet(c.sheetClient, file)
if err != nil {
continue
}

if err := callback(sheet); err != nil {
return err
}
}

hasMoreData := resp.Data.HasMore != nil && *resp.Data.HasMore
hasNextPageToken := resp.Data.NextPageToken == nil
hasNextPage := hasMoreData && hasNextPageToken
if hasNextPage {
break
}

builder.PageToken(*resp.Data.NextPageToken)
}

return nil
}

func (c *Client) GetRevisionID(ctx context.Context, url string) (int, error) {
folderToken, err := c.GetFolderToken(url)
if err != nil {
return 0, err
}

builder := larkdrive.NewListFileReqBuilder().
PageSize(100).
FolderToken(folderToken).
OrderBy("EditedTime").
Direction("DESC")
revisionNumbers := make([]int, 0)

for {
req := builder.Build()

resp, err := c.client.Drive.File.List(ctx, req)
if err != nil {
return 0, errors.AddErrContext(err, EntityLarkDrive, "failed to list files from lark folder")
}

if !resp.Success() {
return 0, errors.NewError(ErrCodeToErrorType(resp.CodeError), EntityLarkDrive, fmt.Sprintf("failed to list files with error: %s", resp.ErrorResp()))
}

for _, file := range resp.Data.Files {
sheet, err := NewSheet(c.sheetClient, file)
if err != nil {
continue
}

revisionId, err := sheet.GetRevisionID(ctx, *sheet.Url)

Check failure on line 117 in ext/sheets/larkdrive/client.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: var revisionId should be revisionID (revive)
if err != nil {
return 0, errors.AddErrContext(err, EntityLarkDrive, fmt.Sprintf("failed to get revision ID for file: %s", *file.Name))
}
revisionNumbers = append(revisionNumbers, revisionId)
}

hasMoreData := resp.Data.HasMore != nil && *resp.Data.HasMore
hasNextPageToken := resp.Data.NextPageToken == nil
hasNextPage := hasMoreData && hasNextPageToken
if hasNextPage {
break
}

builder.PageToken(*resp.Data.NextPageToken)
}

revisionNumber := GenerateRevNumForDrive(revisionNumbers)
return revisionNumber, nil
}

func NewClient(secret string, logger log.Logger) (*Client, error) {
cred, err := NewCredentialFromSecret(secret)
if err != nil {
return nil, err
}

options := []larksdk.ClientOptionFunc{
larksdk.WithLogger(NewLogger(logger)),
}
client := larksdk.NewClient(cred.AppID, cred.AppSecret, options...)

return &Client{
client: client,
}, nil
}
70 changes: 70 additions & 0 deletions ext/sheets/larkdrive/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package larkdrive

Check failure on line 1 in ext/sheets/larkdrive/client_test.go

View workflow job for this annotation

GitHub Actions / lint

package should be `larkdrive_test` instead of `larkdrive` (testpackage)

import (
"testing"

"github.com/goto/optimus/ext/sheets/lark"

larksdk "github.com/larksuite/oapi-sdk-go/v3"
)

func TestClient_GetFolderToken(t *testing.T) {
type fields struct {
client *larksdk.Client
sheetClient *lark.Client
}
type args struct {
url string
}
tests := []struct {
name string
fields fields
args args
want string
wantErr bool
}{
{
name: "Extract folder token from valid URL without subsdomain",
fields: fields{},
args: args{
url: "https://larksuite.com/drive/folder/1234567890abcdef",
},
want: "1234567890abcdef",
wantErr: false,
},
{
name: "Extract folder token from valid URL with subsdomain",
fields: fields{},
args: args{
url: "https://subdomain.larksuite.com/drive/folder/1234567890abcdef",
},
want: "1234567890abcdef",
wantErr: false,
},
{
name: "Invalid URL format",
fields: fields{},
args: args{
url: "https://drive.google.com/drive/folder/1234567890abcdef",
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cl := &Client{
client: tt.fields.client,
sheetClient: tt.fields.sheetClient,
}
got, err := cl.GetFolderToken(tt.args.url)
if (err != nil) != tt.wantErr {
t.Errorf("GetFolderToken() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("GetFolderToken() got = %v, want %v", got, tt.want)
}
})
}
}
41 changes: 41 additions & 0 deletions ext/sheets/larkdrive/dto.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package larkdrive

import (
"encoding/json"

"github.com/goto/optimus/internal/errors"

Check failure on line 6 in ext/sheets/larkdrive/dto.go

View workflow job for this annotation

GitHub Actions / lint

File is not properly formatted (gci)

larkcore "github.com/larksuite/oapi-sdk-go/v3/core"
)

const (
ErrCodeInvalidParam = 1061002
ErrCodeNotFound = 1061003
ErrCodeForbidden = 1061004
)

func ErrCodeToErrorType(codeError larkcore.CodeError) errors.ErrorType {
switch codeError.Code {
case ErrCodeInvalidParam:
return errors.ErrInvalidArgument
case ErrCodeNotFound:
return errors.ErrNotFound
case ErrCodeForbidden:
return errors.ErrForbidden
default:
return errors.ErrInternalError
}
}

type Credential struct {
AppID string `json:"app_id"`
AppSecret string `json:"app_secret"`
}

func NewCredentialFromSecret(secret string) (*Credential, error) {
cred := &Credential{}
if err := json.Unmarshal([]byte(secret), cred); err != nil {
return nil, errors.AddErrContextWithType(err, errors.ErrInvalidArgument, EntityLarkDrive, "invalid secret format, expected JSON")
}
return cred, nil
}
76 changes: 76 additions & 0 deletions ext/sheets/larkdrive/file.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package larkdrive

import (
"bytes"
"encoding/binary"
"hash/crc64"
"sort"

Check failure on line 8 in ext/sheets/larkdrive/file.go

View workflow job for this annotation

GitHub Actions / lint

File is not properly formatted (gci)
larksheet "github.com/goto/optimus/ext/sheets/lark"
"github.com/goto/optimus/internal/errors"

larkdrive "github.com/larksuite/oapi-sdk-go/v3/service/drive/v1"
)

// LarkFileType enum can be referred to https://open.larksuite.com/document/server-docs/docs/drive-v1/folder/list on data.files[].type

const (
LarkFileTypeSheet = "sheet"
)

// Sheet represents from Lark Drive File with type "sheet" and have client to interact with Lark Sheets API.
type Sheet struct {
*larksheet.Client
*larkdrive.File
}

func NewSheet(client *larksheet.Client, file *larkdrive.File) (*Sheet, error) {
if file == nil {
return nil, errors.InvalidArgument(EntityLarkDrive, "file cannot be nil")
}
if file.Token == nil || *file.Token == "" {
return nil, errors.InvalidArgument(EntityLarkDrive, "file token cannot be empty")
}
if file.Name == nil || *file.Name == "" {
return nil, errors.InvalidArgument(EntityLarkDrive, "file name cannot be empty")
}

if !IsLarkSheet(file) {
return nil, errors.InvalidArgument(EntityLarkDrive, "file is not a Lark Sheet")
}

return &Sheet{
Client: client,
File: file,
}, nil
}

func IsLarkSheet(file *larkdrive.File) bool {
if file.Type == nil {
return false
}
return *file.Type == LarkFileTypeSheet
}

// GenerateRevNumForDrive generates a revision number for a Lark Drive file based on its revisions.
// It sorts the revisions, converts them to a byte slice, and computes a CRC32 checksum.
// This is used to create a unique identifier for the file based on its revision history.
// If revisions ordered need to be preserved, we can copy the revisions slice before sorting it.
func GenerateRevNumForDrive(revisions []int) int {
if len(revisions) == 0 {
return 0
}

sort.Ints(revisions)

table := crc64.MakeTable(crc64.ECMA)
var buf bytes.Buffer
for _, v := range revisions {
binary.Write(&buf, binary.BigEndian, uint32(v))
}

revNumber := int(crc64.Checksum(buf.Bytes(), table))
buf.Reset()

return revNumber
}
38 changes: 38 additions & 0 deletions ext/sheets/larkdrive/logger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package larkdrive

import (
"context"
"fmt"

"github.com/goto/salt/log"

larkcore "github.com/larksuite/oapi-sdk-go/v3/core"
)

type Logger struct {
log.Logger
}

func (l *Logger) toMessage(i ...interface{}) string {

Check failure on line 16 in ext/sheets/larkdrive/logger.go

View workflow job for this annotation

GitHub Actions / lint

unused-receiver: method receiver 'l' is not referenced in method's body, consider removing or renaming it as _ (revive)
return fmt.Sprintf("%v", i...)
}

func (l *Logger) Debug(_ context.Context, i ...interface{}) {
l.Logger.Debug(l.toMessage(i...))
}

func (l *Logger) Info(_ context.Context, i ...interface{}) {
l.Logger.Info(l.toMessage(i...))
}

func (l *Logger) Warn(_ context.Context, i ...interface{}) {
l.Logger.Warn(l.toMessage(i...))
}

func (l *Logger) Error(_ context.Context, i ...interface{}) {
l.Logger.Error(l.toMessage(i...))
}

func NewLogger(logger log.Logger) larkcore.Logger {
return &Logger{Logger: logger}
}
2 changes: 1 addition & 1 deletion ext/store/maxcompute/external_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func (e ExternalTableHandle) enrichRoleToAssume(ctx context.Context, et *Externa

func (e ExternalTableHandle) getLocation(ctx context.Context, et *ExternalTable, res *resource.Resource) (string, error) {
switch et.Source.SourceType {
case GoogleSheet, GoogleDrive, LarkSheet:
case GoogleSheet, GoogleDrive, LarkSheet, LarkDrive:
loc := et.Source.Location
if loc == "" {
tenantWithDetails, err := e.tenantDetailsGetter.GetDetails(ctx, res.Tenant())
Expand Down
1 change: 1 addition & 0 deletions ext/store/maxcompute/external_table_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
GoogleSheet ExternalTableSourceType = "GOOGLE_SHEETS"
GoogleDrive ExternalTableSourceType = "GOOGLE_DRIVE"
LarkSheet ExternalTableSourceType = "LARK_SHEET"
LarkDrive ExternalTableSourceType = "LARK_DRIVE"
OSS ExternalTableSourceType = "OSS"
)

Expand Down
Loading
Loading