diff --git a/app/api/oaiapi/assistant.go b/app/api/oaiapi/assistant.go new file mode 100644 index 0000000..532030c --- /dev/null +++ b/app/api/oaiapi/assistant.go @@ -0,0 +1,26 @@ +package oaiapi + +import ( + "github.com/jlewi/foyle/app/api" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + AssistantGVK = schema.FromAPIVersionAndKind(OAIGroup+"/"+api.Version, "Assistant") +) + +// Assistant based off https://platform.openai.com/docs/api-reference/assistants/create +type Assistant struct { + Metadata api.Metadata `json:"metadata" yaml:"metadata"` + Spec AssistantSpec `json:"spec" yaml:"spec"` +} + +type AssistantSpec struct { + // Model is the name of the model to use + Model string `json:"model" yaml:"model"` + // Instructions is the instructions for the assistant + Instructions string `json:"instructions" yaml:"instructions"` + // VectorStoreIDs is the IDs of the vector stores to use + VectorStoreIDs []string `json:"vectorStoreIDs" yaml:"vectorStoreIDs"` + Description string `json:"description" yaml:"description"` +} diff --git a/app/api/oaiapi/const.go b/app/api/oaiapi/const.go new file mode 100644 index 0000000..fa510f5 --- /dev/null +++ b/app/api/oaiapi/const.go @@ -0,0 +1,7 @@ +package oaiapi + +import "github.com/jlewi/foyle/app/api" + +const ( + OAIGroup = "oai." + api.Group +) diff --git a/app/api/oaiapi/filesync.go b/app/api/oaiapi/filesync.go new file mode 100644 index 0000000..242e40b --- /dev/null +++ b/app/api/oaiapi/filesync.go @@ -0,0 +1,25 @@ +package oaiapi + +import ( + "github.com/jlewi/foyle/app/api" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + FileSyncGVK = schema.FromAPIVersionAndKind(OAIGroup+"/"+api.Version, "FileSync") +) + +// FileSync based off https://platform.openai.com/docs/api-reference/vector-stores/create +type FileSync struct { + Metadata api.Metadata `json:"metadata" yaml:"metadata"` + Spec FileSyncSpec `json:"spec" yaml:"spec"` +} + +type FileSyncSpec struct { + // Source is the source glob to match + Source string `json:"source" yaml:"source"` + // VectorStoreID is the ID of the vector store to sync the files to + VectorStoreID string `json:"vectorStoreID" yaml:"vectorStoreID"` + VectorStoreName string `json:"vectorStoreName" yaml:"vectorStoreName"` + BaseURL string `json:"baseURL" yaml:"baseURL"` +} diff --git a/app/api/oaiapi/vectorstore.go b/app/api/oaiapi/vectorstore.go new file mode 100644 index 0000000..dc2af50 --- /dev/null +++ b/app/api/oaiapi/vectorstore.go @@ -0,0 +1,20 @@ +package oaiapi + +import ( + "github.com/jlewi/foyle/app/api" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + VectorStoreGVK = schema.FromAPIVersionAndKind(OAIGroup+"/"+api.Version, "VectorStore") +) + +// VectorStore based off https://platform.openai.com/docs/api-reference/vector-stores/create +type VectorStore struct { + Metadata api.Metadata `json:"metadata" yaml:"metadata"` + Spec VectorStoreSpec `json:"spec" yaml:"spec"` +} + +type VectorStoreSpec struct { + // TODO(jeremy): Should add actual fields. +} diff --git a/app/go.mod b/app/go.mod index 156b422..fad0468 100644 --- a/app/go.mod +++ b/app/go.mod @@ -4,9 +4,10 @@ go 1.22.1 replace ( github.com/jlewi/foyle/protos/go => ../protos/go + // TODO(jeremy): Remove this if https://github.com/sashabaranov/go-openai/pull/919 ever gets merged + github.com/sashabaranov/go-openai => github.com/jlewi/go-openai v0.0.0-20250102163401-3f27fc7109d1 // TODO(jeremy): We can get rid of this replace; we should no longer need to use a jlewi branch. github.com/stateful/runme/v3 => github.com/jlewi/runme/v3 v3.0.0-20240524044247-2657f0b08e0f - k8s.io/client-go => k8s.io/client-go v0.27.3 ) @@ -37,7 +38,7 @@ require ( github.com/oklog/ulid/v2 v2.1.0 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c github.com/pkg/errors v0.9.1 - github.com/sashabaranov/go-openai v1.30.3 + github.com/sashabaranov/go-openai v1.36.1 github.com/spf13/cobra v1.8.0 github.com/spf13/viper v1.18.2 github.com/stateful/runme/v3 v3.3.1-0.20240515132033-7fd1591498c6 @@ -157,6 +158,7 @@ require ( github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect + github.com/openai/openai-go v0.1.0-alpha.41 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect @@ -184,6 +186,10 @@ require ( github.com/spf13/cast v1.6.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.6.0 // indirect + github.com/tidwall/gjson v1.14.4 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/timtadh/data-structures v0.6.1 // indirect github.com/tklauser/go-sysconf v0.3.13 // indirect github.com/tklauser/numcpus v0.7.0 // indirect diff --git a/app/go.sum b/app/go.sum index a000413..1024323 100644 --- a/app/go.sum +++ b/app/go.sum @@ -390,6 +390,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= +github.com/jlewi/go-openai v0.0.0-20250102163401-3f27fc7109d1 h1:+YZfNrujCqjwWmfBxU0x/d0MpZb5tP5oCyVhBbZVS9I= +github.com/jlewi/go-openai v0.0.0-20250102163401-3f27fc7109d1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/jlewi/hydros v0.0.7-0.20240503183011-8f99ead373fb h1:2G2k606S3Qcg40czr7gnkeIG5KgQ2wXJ1BMxAuC+P3I= github.com/jlewi/hydros v0.0.7-0.20240503183011-8f99ead373fb/go.mod h1:4fV+JUCnexPY2ZbKzdfV/RsyrfralN832MsUSq/7FqE= github.com/jlewi/monogo v0.0.0-20240123191147-401afe194d74 h1:pbOw/rOMs0AZ494bGnI6DieGKwqoJQEjHWaJZrvxsJo= @@ -494,6 +496,8 @@ github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU= github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= +github.com/openai/openai-go v0.1.0-alpha.41 h1:OPRT5YfNKlENfipMtolMWnKbCR1iQDc9hCRsUkhMaK8= +github.com/openai/openai-go v0.1.0-alpha.41/go.mod h1:3SdE6BffOX9HPEQv8IL/fi3LYZ5TUpRYaqGQZbyk11A= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= @@ -624,6 +628,16 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= +github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/timtadh/data-structures v0.6.1 h1:76eDpwngj2rEi9r/qvdH6YL7wMXGsoFFzhEylo/IacA= github.com/timtadh/data-structures v0.6.1/go.mod h1:uYUnI1cQi/5yMCc7s23I+x8Mn8BCMf4WgK+7/4QSEk4= github.com/timtadh/getopt v1.0.0/go.mod h1:L3EL6YN2G0eIAhYBo9b7SB9d/kEQmdnwthIlMJfj210= diff --git a/app/pkg/agent/agent_test.go b/app/pkg/agent/agent_test.go index 48f090e..80400af 100644 --- a/app/pkg/agent/agent_test.go +++ b/app/pkg/agent/agent_test.go @@ -182,7 +182,7 @@ func Test_StreamingClient(t *testing.T) { log := zapr.NewLogger(newLog) // This is code to help us test streaming with the connect protocol addr := "http://127.0.0.1:8877/api" - //addr := "http://127.0.0.1:9977/api" + //addr :=x "http://127.0.0.1:9977/api" log.Info("Server started") if err := runClient(addr); err != nil { diff --git a/app/pkg/application/app.go b/app/pkg/application/app.go index cb523b2..a997606 100644 --- a/app/pkg/application/app.go +++ b/app/pkg/application/app.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "fmt" + "github.com/jlewi/foyle/app/api/oaiapi" "io" "net/http" "os" @@ -271,6 +272,29 @@ func (a *App) SetupRegistry() error { return err } + vs, err := oai.NewVSController(*a.Config) + if err != nil { + return err + } + if err := a.Registry.Register(oaiapi.VectorStoreGVK, vs); err != nil { + return err + } + + fileSyncer, err := oai.NewFileSyncer(*a.Config) + if err != nil { + return err + } + if err := a.Registry.Register(oaiapi.FileSyncGVK, fileSyncer); err != nil { + return err + } + + assistant, err := oai.NewAssistantController(*a.Config) + if err != nil { + return err + } + if err := a.Registry.Register(oaiapi.AssistantGVK, assistant); err != nil { + return err + } return nil } diff --git a/app/pkg/logs/matchers/names.go b/app/pkg/logs/matchers/names.go index 1fdd019..7050ce7 100644 --- a/app/pkg/logs/matchers/names.go +++ b/app/pkg/logs/matchers/names.go @@ -6,7 +6,7 @@ package matchers import "strings" const ( - OAIComplete = "github.com/jlewi/foyle/app/pkg/oai.(*Completer).Complete" + OAIComplete = "github.com/jlewi/foyle/app/pkg/oaiapi.(*Completer).Complete" AnthropicComplete = "github.com/jlewi/foyle/app/pkg/anthropic.(*Completer).Complete" LogEvents = "github.com/jlewi/foyle/app/pkg/agent.(*Agent).LogEvents" StreamGenerate = "github.com/jlewi/foyle/app/pkg/agent.(*Agent).StreamGenerate" diff --git a/app/pkg/logs/matchers/test/names_test.go b/app/pkg/logs/matchers/test/names_test.go index 28fc0a7..36863c8 100644 --- a/app/pkg/logs/matchers/test/names_test.go +++ b/app/pkg/logs/matchers/test/names_test.go @@ -1,5 +1,5 @@ // package test is a hacky way to avoid circular imports in the test. -// The test imports some packages (e.g. anthropic/oai) that also import matchers +// The test imports some packages (e.g. anthropic/oaiapi) that also import matchers // so if we don't use a separate package we end up with a circular import. package test diff --git a/app/pkg/oai/assistant.go b/app/pkg/oai/assistant.go new file mode 100644 index 0000000..3ab919e --- /dev/null +++ b/app/pkg/oai/assistant.go @@ -0,0 +1,87 @@ +package oai + +import ( + "context" + "github.com/jlewi/foyle/app/api/oaiapi" + "github.com/jlewi/foyle/app/pkg/config" + "github.com/jlewi/foyle/app/pkg/logs" + "github.com/pkg/errors" + "github.com/sashabaranov/go-openai" + "google.golang.org/protobuf/proto" + "sigs.k8s.io/kustomize/kyaml/yaml" +) + +// AssistantController is a controller for OpenAI assistant +type AssistantController struct { + cfg config.Config + client *openai.Client +} + +// NewAssistantController creates a new controller for OpenAI assistant +func NewAssistantController(cfg config.Config) (*AssistantController, error) { + return &AssistantController{cfg: cfg}, nil +} + +// ReconcileNode reconciles the state of the resource. +func (a *AssistantController) ReconcileNode(ctx context.Context, node *yaml.RNode) error { + s := &oaiapi.Assistant{} + if err := node.YNode().Decode(s); err != nil { + return errors.Wrap(err, "Failed to decode Assistant") + } + + return a.Apply(ctx, s) +} + +func (a *AssistantController) Apply(ctx context.Context, s *oaiapi.Assistant) error { + log := logs.FromContext(ctx) + if a.client == nil { + client, err := NewClient(a.cfg) + if err != nil { + return errors.Wrap(err, "Failed to create OpenAI client") + } + a.client = client + } + + client := a.client + tools := []openai.AssistantTool{ + { + Type: openai.AssistantToolTypeFileSearch, + }, + } + req := &openai.AssistantRequest{ + Model: s.Spec.Model, + Name: proto.String(s.Metadata.Name), + Description: proto.String(s.Spec.Description), + Instructions: proto.String(s.Spec.Instructions), + Tools: tools, + ToolResources: &openai.AssistantToolResource{ + FileSearch: &openai.AssistantToolFileSearch{VectorStoreIDs: s.Spec.VectorStoreIDs}, + }, + } + resp, err := client.CreateAssistant(ctx, *req) + + if err != nil { + return errors.Wrapf(err, "Failed to create assistant %v", s.Metadata.Name) + } + + log.Info("Created assistant", "name", s.Metadata.Name, "id", resp.ID) + return nil +} + +type Assistant struct { + cfg config.Config + client *openai.Client +} + +func (a *Assistant) Assist(ctx context.Context) error { + if a.client == nil { + client, err := NewClient(a.cfg) + if err != nil { + return errors.Wrap(err, "Failed to create OpenAI client") + } + a.client = client + } + + client := a.client + client.CreateThreadAndRun() +} diff --git a/app/pkg/oai/filesyncer.go b/app/pkg/oai/filesyncer.go new file mode 100644 index 0000000..fa19617 --- /dev/null +++ b/app/pkg/oai/filesyncer.go @@ -0,0 +1,221 @@ +package oai + +import ( + "context" + "github.com/jlewi/foyle/app/api/oaiapi" + "github.com/jlewi/foyle/app/pkg/config" + "github.com/jlewi/foyle/app/pkg/logs" + "github.com/jlewi/monogo/files" + openaico "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/pkg/errors" + "github.com/sashabaranov/go-openai" + "os" + "path" + "path/filepath" + "sigs.k8s.io/kustomize/kyaml/yaml" + "strings" +) + +// FileSyncer is a controller to sync files to an assistant. +// https://platform.openai.com/docs/api-reference/files +type FileSyncer struct { + cfg config.Config + client *openai.Client + + oClient *openaico.Client +} + +// NewFileSyncer creates a new controller for OpenAI file sync +func NewFileSyncer(cfg config.Config) (*FileSyncer, error) { + // Delay creation of the client so that we don't create it just to register the controller + return &FileSyncer{cfg: cfg}, nil +} + +// ReconcileNode reconciles the state of the resource. +func (f *FileSyncer) ReconcileNode(ctx context.Context, node *yaml.RNode) error { + s := &oaiapi.FileSync{} + if err := node.YNode().Decode(s); err != nil { + return errors.Wrap(err, "Failed to decode FileSync") + } + + return f.Apply(ctx, s) +} + +func (f *FileSyncer) Apply(ctx context.Context, s *oaiapi.FileSync) error { + log := logs.FromContext(ctx) + if f.client == nil { + client, err := NewClient(f.cfg) + if err != nil { + return errors.Wrap(err, "Failed to create OpenAI client") + } + f.client = client + } + + if f.oClient == nil { + apiKey, err := files.Read(f.cfg.OpenAI.APIKeyFile) + if err != nil { + return errors.Wrap(err, "Failed to read OpenAI API key") + } + oClient := openaico.NewClient(option.WithAPIKey(string(apiKey))) + f.oClient = oClient + } + // TODO(jeremy): We shouldn't assume we only want to match markdown files. + // How can we support a suitable glob like syntax? + mdFiles, err := findMarkdownFiles(s.Spec.Source) + if err != nil { + errors.Wrapf(err, "Failed to find markdown files in %v", s.Spec.Source) + } + + client := f.client + + files, err := client.ListFiles(ctx) + if err != nil { + errors.Wrap(err, "Failed to list files") + } + + alreadyUploaded := make(map[string]string) + for _, f := range files.Files { + alreadyUploaded[f.FileName] = f.ID + } + + fileIDs := make([]string, 0, len(mdFiles)) + + // Files that should be in the vector store + // We use this to remove files that are no longer in the vector store + expectedIDS := make(map[string]string) + + for _, mdFile := range mdFiles { + relPath, err := filepath.Rel(s.Spec.Source, mdFile) + if err != nil { + errors.Wrapf(err, "Failed to get relative path for %v", mdFile) + } + + // N.B. We don't use the Hugo Link because we need the file extension + // relativeUrl := convertFilePathToHugoURL(relPath) + // fileName := s.Spec.BaseURL + relativeUrl + fileName := s.Spec.BaseURL + relPath + + if fid, ok := alreadyUploaded[fileName]; ok { + log.Info("File already uploaded", "path", relPath, "fileName", fileName, "id", fid) + fileIDs = append(fileIDs, fid) + expectedIDS[fid] = fileName + continue + } + + fileData, err := os.ReadFile(mdFile) + //if err != nil { + // errors.Wrapf(err, "Failed to read file %v", mdFile) + //} + req := &openai.FileBytesRequest{ + Name: fileName, + Bytes: fileData, + Purpose: openai.PurposeAssistants, + } + + newFile, err := f.client.CreateFileBytes(ctx, *req) + if err != nil { + errors.Wrapf(err, "Failed to create file %v", mdFile) + } + log.Info("Uploaded file", "path", mdFile, "id", newFile.ID, "fileName", newFile.FileName) + fileIDs = append(fileIDs, newFile.ID) + expectedIDS[newFile.ID] = fileName + } + + if err != nil { + errors.Wrap(err, "Failed to list files") + } + + req := &openai.VectorStoreFileBatchRequest{ + FileIDs: fileIDs, + } + log.Info("Creating vector store file batch", "numFileIDs", len(fileIDs)) + resp, err := client.CreateVectorStoreFileBatch(ctx, s.Spec.VectorStoreID, *req) + if err != nil { + errors.Wrapf(err, "Failed to create vector store file batch") + } + + log.Info("Created vector store file batch", "id", resp.ID, "numFileIDs", len(fileIDs)) + + f.pruneFilesInVectorStore(ctx, s.Spec.VectorStoreID, expectedIDS) + return nil +} + +func (f *FileSyncer) pruneFilesInVectorStore(ctx context.Context, vectorStoreID string, expectedIDS map[string]string) error { + log := logs.FromContext(ctx) + client := f.client + limit := 100 + pagination := openai.Pagination{ + Limit: &limit, + } + + totalFiles := 0 + numPruned := 0 + for { + fList, err := client.ListVectorStoreFiles(ctx, vectorStoreID, pagination) + if err != nil { + errors.Wrap(err, "Failed to list files") + } + + totalFiles += len(fList.VectorStoreFiles) + for _, f := range fList.VectorStoreFiles { + if fileName, ok := expectedIDS[f.ID]; !ok { + log.Info("Removing file from vector store", "id", f.ID, "fileName", fileName) + err := client.DeleteVectorStoreFile(ctx, vectorStoreID, f.ID) + if err != nil { + log.Error(err, "Failed to remove file %v from vector store", f.ID) + } + numPruned++ + } + } + if !fList.HasMore { + log.Info("Pruned vector store files", "numFiles", totalFiles, "numPruned", numPruned) + return nil + } + + pagination.After = fList.LastID + } + return nil +} + +func findMarkdownFiles(dir string) ([]string, error) { + var markdownFiles []string + + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && strings.HasSuffix(info.Name(), ".md") { + markdownFiles = append(markdownFiles, path) + } + return nil + }) + + return markdownFiles, err +} + +// convertFilePathToHugoURL takes a file path and converts it into a URL for a Hugo static site +func convertFilePathToHugoURL(filePath string) string { + // Extract the directory and filename without the extension + dir, file := path.Split(filePath) + ext := path.Ext(file) + fileName := strings.TrimSuffix(file, ext) + + // Replace spaces with hyphens and convert to lowercase + sanitizedFileName := strings.ReplaceAll(strings.ToLower(fileName), " ", "-") + sanitizedDir := strings.ReplaceAll(strings.ToLower(dir), " ", "-") + + // _index.md files should be treated as directories + if sanitizedFileName == "_index" { + sanitizedFileName = "" + } + + // Construct the URL path + urlPath := path.Join(sanitizedDir, sanitizedFileName) + + // There should be a trailing slash if its not empty + if urlPath != "" { + urlPath += "/" + } + return urlPath +} diff --git a/app/pkg/oai/filesyncer_test.go b/app/pkg/oai/filesyncer_test.go new file mode 100644 index 0000000..db32da1 --- /dev/null +++ b/app/pkg/oai/filesyncer_test.go @@ -0,0 +1,76 @@ +package oai + +import ( + "context" + "github.com/jlewi/foyle/app/pkg/config" + "github.com/sashabaranov/go-openai" + "os" + "testing" +) + +func Test_FileSyncer(t *testing.T) { + if os.Getenv("GITHUB_ACTIONS") != "" { + t.Skipf("Test_FileSyncer is a manual test that is skipped in CICD") + } + if err := config.InitViper(nil); err != nil { + t.Fatalf("Error initializing viper: %v", err) + } + cfg := config.GetConfig() + + client, err := NewClient(*cfg) + if err != nil { + t.Fatalf("Error creating client: %v", err) + } + + ctx := context.Background() + //file, err := client.GetFile(ctx, "file-9ik4Eous1jaJ16QRkXwfMZ") + //if err != nil { + // t.Fatalf("Error getting file: %v", err) + //} + //t.Logf("FileName: %v", file.FileName) + vectorStoreID := "vs_YOUtN6oGx9LPCWuFECXXrdw2" + req := &openai.VectorStoreFileBatchRequest{ + FileIDs: []string{"file-9SUyizQYBygnxhRxdzzk9K"}, + } + //log.Info("Creating vector store file batch", "numFileIDs", len(fileIDs)) + _, err = client.CreateVectorStoreFileBatch(ctx, vectorStoreID, *req) + if err != nil { + t.Fatalf("Failed to create vector store file batch: %v", err) + } + +} + +func Test_convertFilePathToHugoURL(t *testing.T) { + type testCase struct { + name string + path string + expected string + } + + cases := []testCase{ + { + name: "basic", + path: `content/docs/runbooks/api/Oncall Foo Issues Runbook.md`, + expected: "content/docs/runbooks/api/oncall-foo-issues-runbook/", + }, + { + name: "index", + path: `docs/content/_index.md`, + expected: "docs/content/", + }, + { + name: "index", + path: `_index.md`, + expected: "/", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + actual := convertFilePathToHugoURL(c.path) + if actual != c.expected { + t.Fatalf("Expected %v, got %v", c.expected, actual) + } + }) + } +} diff --git a/app/pkg/oai/tracer.go b/app/pkg/oai/tracer.go index 8709f9c..b8edbfa 100644 --- a/app/pkg/oai/tracer.go +++ b/app/pkg/oai/tracer.go @@ -6,5 +6,5 @@ import ( ) func tracer() trace.Tracer { - return otel.Tracer("github.com/jlewi/foyle/app/pkg/oai") + return otel.Tracer("github.com/jlewi/foyle/app/pkg/oaiapi") } diff --git a/app/pkg/oai/vectorstore.go b/app/pkg/oai/vectorstore.go new file mode 100644 index 0000000..32183b0 --- /dev/null +++ b/app/pkg/oai/vectorstore.go @@ -0,0 +1,55 @@ +package oai + +import ( + "context" + "github.com/jlewi/foyle/app/api/oaiapi" + "github.com/jlewi/foyle/app/pkg/config" + "github.com/jlewi/foyle/app/pkg/logs" + "github.com/pkg/errors" + "github.com/sashabaranov/go-openai" + "sigs.k8s.io/kustomize/kyaml/yaml" +) + +// VSController is a controller for OpenAI vector store +type VSController struct { + cfg config.Config + client *openai.Client +} + +// NewVSController creates a new controller for OpenAI vector store +func NewVSController(cfg config.Config) (*VSController, error) { + // Delay creation of the client so that we don't create it just to register the controller + return &VSController{cfg: cfg}, nil +} + +// ReconcileNode reconciles the state of the resource. +func (v *VSController) ReconcileNode(ctx context.Context, node *yaml.RNode) error { + s := &oaiapi.VectorStore{} + if err := node.YNode().Decode(s); err != nil { + return errors.Wrap(err, "Failed to decode VectorStore") + } + + return v.Apply(ctx, s) +} + +func (v *VSController) Apply(ctx context.Context, s *oaiapi.VectorStore) error { + log := logs.FromContext(ctx) + + if v.client == nil { + client, err := NewClient(v.cfg) + if err != nil { + return errors.Wrap(err, "Failed to create OpenAI client") + } + v.client = client + } + + request := &openai.VectorStoreRequest{ + Name: s.Metadata.Name, + } + response, err := v.client.CreateVectorStore(ctx, *request) + if err != nil { + return errors.Wrapf(err, "Failed to create vector store %v", s.Metadata.Name) + } + log.Info("Created vector store", "name", s.Metadata.Name, "id", response.ID) + return nil +}