Skip to content

Commit

Permalink
feat: integrate with azure cosmos db (#1444)
Browse files Browse the repository at this point in the history
Co-authored-by: Wassim Chegham <[email protected]>
Co-authored-by: Alex Yang <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2024
1 parent 69f3095 commit 396b1e1
Show file tree
Hide file tree
Showing 11 changed files with 1,671 additions and 565 deletions.
5 changes: 5 additions & 0 deletions .changeset/slow-pets-push.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"llamaindex": patch
---

feat: add Azure Cosmos DB DocumentStore, IndexStore, KVStore, update vectorStore and examples
71 changes: 71 additions & 0 deletions examples/azure-cosmosdb.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import "dotenv/config";

import {
DefaultAzureCredential,
getBearerTokenProvider,
} from "@azure/identity";
import {
AzureCosmosDBNoSqlVectorStore,
AzureCosmosNoSqlDocumentStore,
AzureCosmosNoSqlIndexStore,
Document,
OpenAI,
OpenAIEmbedding,
Settings,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
/**
* This example demonstrates how to use Azure CosmosDB with LlamaIndex.
* It uses Azure CosmosDB as IndexStore, DocumentStore, and VectorStore.
*
* To run this example, create an .env file under /examples and set the following environment variables:
*
* AZURE_OPENAI_ENDPOINT="https://AOAI-ACCOUNT.openai.azure.com" // Sample Azure OpenAI endpoint.
* AZURE_DEPLOYMENT_NAME="gpt-4o" // Sample Azure OpenAI deployment name.
* EMBEDDING_MODEL="text-embedding-3-large" // Sample Azure OpenAI embedding model.
* AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT = "https://DB-ACCOUNT.documents.azure.com:443/" // Sample CosmosDB account endpoint.
*
* This example uses managed identity to authenticate with Azure CosmosDB and Azure OpenAI. Make sure to assign the required roles to the managed identity.
* You can also use connectionString for Azure CosmosDB and Keys with Azure OpenAI for authentication.
*/
(async () => {
const credential = new DefaultAzureCredential();
const azureADTokenProvider = getBearerTokenProvider(
credential,
"https://cognitiveservices.azure.com/.default",
);

const azure = {
azureADTokenProvider,
deployment: process.env.AZURE_DEPLOYMENT_NAME,
};
Settings.llm = new OpenAI({ azure });
Settings.embedModel = new OpenAIEmbedding({
model: process.env.EMBEDDING_MODEL,
azure: {
...azure,
deployment: process.env.EMBEDDING_MODEL,
},
});
const docStore = AzureCosmosNoSqlDocumentStore.fromAadToken();
console.log({ docStore });
const indexStore = AzureCosmosNoSqlIndexStore.fromAadToken();
console.log({ indexStore });
const vectorStore = AzureCosmosDBNoSqlVectorStore.fromUriAndManagedIdentity();
console.log({ vectorStore });
const storageContext = await storageContextFromDefaults({
docStore,
indexStore,
vectorStore,
});
console.log({ storageContext });

const document = new Document({ text: "Test Text" });
const index = await VectorStoreIndex.fromDocuments([document], {
storageContext,
logProgress: true,
});

console.log({ index });
})();
73 changes: 50 additions & 23 deletions examples/cosmosdb/loadVectorData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,21 @@ import {
} from "@llamaindex/readers/cosmosdb";
import * as dotenv from "dotenv";
import {
AzureCosmosDBNoSqlVectorStore,
AzureCosmosDBNoSQLConfig,
OpenAI,
OpenAIEmbedding,
Settings,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import {
createStoresFromConnectionString,
createStoresFromManagedIdentity,
} from "./utils";
// Load environment variables from local .env file
dotenv.config();

const cosmosEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT!;
const cosmosEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT!;
const cosmosConnectionString =
process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING!;
const databaseName =
Expand All @@ -26,7 +30,7 @@ const collectionName =
const vectorCollectionName =
process.env.AZURE_COSMOSDB_VECTOR_CONTAINER_NAME || "vectorContainer";

// This exampple uses Azure OpenAI llm and embedding models
// This example uses Azure OpenAI llm and embedding models
const llmInit = {
azure: {
apiVersion: process.env.AZURE_OPENAI_LLM_API_VERSION,
Expand All @@ -46,24 +50,48 @@ const embedModelInit = {
Settings.llm = new OpenAI(llmInit);
Settings.embedModel = new OpenAIEmbedding(embedModelInit);

async function loadVectorData() {
if (!cosmosConnectionString && !cosmosEndpoint) {
throw new Error(
"Azure CosmosDB connection string or endpoint must be set.",
);
}

let cosmosClient: CosmosClient;
// initialize the cosmos client
// Initialize the CosmosDB client
async function initializeCosmosClient() {
if (cosmosConnectionString) {
cosmosClient = new CosmosClient(cosmosConnectionString);
return new CosmosClient(cosmosConnectionString);
} else {
cosmosClient = new CosmosClient({
const credential = new DefaultAzureCredential();
return new CosmosClient({
endpoint: cosmosEndpoint,
aadCredentials: new DefaultAzureCredential(),
aadCredentials: credential,
});
}
}

// Initialize CosmosDB to be used as a vectorStore, docStore, and indexStore
async function initializeStores() {
// Create a configuration object for the Azure CosmosDB NoSQL Vector Store
const dbConfig: AzureCosmosDBNoSQLConfig = {
databaseName,
containerName: vectorCollectionName,
flatMetadata: false,
};

if (cosmosConnectionString) {
return createStoresFromConnectionString(cosmosConnectionString, dbConfig);
} else {
// Use managed identity to authenticate with Azure CosmosDB
const credential = new DefaultAzureCredential();
return createStoresFromManagedIdentity(
cosmosEndpoint,
credential,
dbConfig,
);
}
}

async function loadVectorData() {
if (!cosmosConnectionString && !cosmosEndpoint) {
throw new Error(
"Azure CosmosDB connection string or endpoint must be set.",
);
}
const cosmosClient = await initializeCosmosClient();
const reader = new SimpleCosmosDBReader(cosmosClient);
// create a configuration object for the reader
const simpleCosmosReaderConfig: SimpleCosmosDBReaderLoaderConfig = {
Expand All @@ -76,16 +104,15 @@ async function loadVectorData() {

// load objects from cosmos and convert them into LlamaIndex Document objects
const documents = await reader.loadData(simpleCosmosReaderConfig);
// create Azure CosmosDB as a vector store
const vectorStore = new AzureCosmosDBNoSqlVectorStore({
client: cosmosClient,
databaseName,
containerName: vectorCollectionName,
flatMetadata: false,
});

// use Azure CosmosDB as a vectorStore, docStore, and indexStore
const { vectorStore, docStore, indexStore } = await initializeStores();
// Store the embeddings in the CosmosDB container
const storageContext = await storageContextFromDefaults({ vectorStore });
const storageContext = await storageContextFromDefaults({
vectorStore,
docStore,
indexStore,
});
await VectorStoreIndex.fromDocuments(documents, { storageContext });
console.log(
`Successfully created embeddings in the CosmosDB container ${vectorCollectionName}.`,
Expand Down
42 changes: 38 additions & 4 deletions examples/cosmosdb/queryVectorData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@ import { DefaultAzureCredential } from "@azure/identity";
import * as dotenv from "dotenv";
import {
AzureCosmosDBNoSQLConfig,
AzureCosmosDBNoSqlVectorStore,
OpenAI,
OpenAIEmbedding,
Settings,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import {
createStoresFromConnectionString,
createStoresFromManagedIdentity,
} from "./utils";

// Load environment variables from local .env file
dotenv.config();

const cosmosEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT!;
const cosmosEndpoint = process.env.AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT!;
const cosmosConnectionString =
process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING!;
const databaseName =
Expand All @@ -40,6 +44,27 @@ const embedModelInit = {
Settings.llm = new OpenAI(llmInit);
Settings.embedModel = new OpenAIEmbedding(embedModelInit);

async function initializeStores() {
// Create a configuration object for the Azure CosmosDB NoSQL Vector Store
const dbConfig: AzureCosmosDBNoSQLConfig = {
databaseName,
containerName,
flatMetadata: false,
};

if (cosmosConnectionString) {
return createStoresFromConnectionString(cosmosConnectionString, dbConfig);
} else {
// Use managed identity to authenticate with Azure CosmosDB
const credential = new DefaultAzureCredential();
return createStoresFromManagedIdentity(
cosmosEndpoint,
credential,
dbConfig,
);
}
}

async function query() {
if (!cosmosConnectionString && !cosmosEndpoint) {
throw new Error(
Expand All @@ -65,10 +90,19 @@ async function query() {
containerName,
flatMetadata: false,
};
const store = new AzureCosmosDBNoSqlVectorStore(dbConfig);

// use Azure CosmosDB as a vectorStore, docStore, and indexStore
const { vectorStore, docStore, indexStore } = await initializeStores();

// Store the embeddings in the CosmosDB container
const storageContext = await storageContextFromDefaults({
vectorStore,
docStore,
indexStore,
});

// create an index from the Azure CosmosDB NoSQL Vector Store
const index = await VectorStoreIndex.fromVectorStore(store);
const index = await VectorStoreIndex.init({ storageContext });

// create a retriever and a query engine from the index
const retriever = index.asRetriever({ similarityTopK: 20 });
Expand Down
51 changes: 51 additions & 0 deletions examples/cosmosdb/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { TokenCredential } from "@azure/identity";
import {
AzureCosmosDBNoSQLConfig,
AzureCosmosDBNoSqlVectorStore,
AzureCosmosNoSqlDocumentStore,
AzureCosmosNoSqlIndexStore,
} from "llamaindex";

/**
* Util function to create AzureCosmosDB vectorStore, docStore, indexStore from connection string.
*/
export const createStoresFromConnectionString = (
connectionString: string,
dbConfig: AzureCosmosDBNoSQLConfig,
) => {
const vectorStore = AzureCosmosDBNoSqlVectorStore.fromConnectionString({
connectionString,
...dbConfig,
});
const docStore = AzureCosmosNoSqlDocumentStore.fromConnectionString({
connectionString,
});
const indexStore = AzureCosmosNoSqlIndexStore.fromConnectionString({
connectionString,
});
return { vectorStore, docStore, indexStore };
};

/**
* Util function to create AzureCosmosDB vectorStore, docStore, indexStore from connection string.
*/
export const createStoresFromManagedIdentity = (
endpoint: string,
credential: TokenCredential,
dbConfig: AzureCosmosDBNoSQLConfig,
) => {
const vectorStore = AzureCosmosDBNoSqlVectorStore.fromUriAndManagedIdentity({
endpoint,
credential,
...dbConfig,
});
const docStore = AzureCosmosNoSqlDocumentStore.fromAadToken({
endpoint,
credential,
});
const indexStore = AzureCosmosNoSqlIndexStore.fromAadToken({
endpoint,
credential,
});
return { vectorStore, docStore, indexStore };
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import {
AzureCosmosNoSqlKVStore,
type AadTokenOptions,
type AccountAndKeyOptions,
type ConnectionStringOptions,
} from "../kvStore/AzureCosmosNoSqlKVStore.js";
import { KVDocumentStore } from "./KVDocumentStore.js";

const DEFAULT_DATABASE = "DocumentStoreDB";
const DEFAULT_CONTAINER = "DocumentStoreContainer";

export interface AzureCosmosNoSqlDocumentStoreArgs {
azureCosmosNoSqlKVStore: AzureCosmosNoSqlKVStore;
namespace?: string;
}

export class AzureCosmosNoSqlDocumentStore extends KVDocumentStore {
constructor({
azureCosmosNoSqlKVStore,
namespace,
}: AzureCosmosNoSqlDocumentStoreArgs) {
super(azureCosmosNoSqlKVStore, namespace);
}

/**
* Static method for creating an instance using a connection string.
* If no connection string is provided, it will attempt to use the env variable `AZURE_COSMOSDB_NOSQL_CONNECTION_STRING` as connection string.
* @returns Instance of AzureCosmosNoSqlDocumentStore
*/
static fromConnectionString(options: ConnectionStringOptions = {}) {
options.dbName = options.dbName || DEFAULT_DATABASE;
options.containerName = options.containerName || DEFAULT_CONTAINER;

const azureCosmosNoSqlKVStore =
AzureCosmosNoSqlKVStore.fromConnectionString(options);
const namespace = `${options.dbName}.${options.containerName}`;
return new AzureCosmosNoSqlDocumentStore({
azureCosmosNoSqlKVStore,
namespace,
});
}

/**
* Static method for creating an instance using a account endpoint and key.
* If no endpoint and key is provided, it will attempt to use the env variable `AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT` as enpoint and `AZURE_COSMOSDB_NOSQL_ACCOUNT_KEY` as key.
* @returns Instance of AzureCosmosNoSqlDocumentStore
*/
static fromAccountAndKey(options: AccountAndKeyOptions = {}) {
options.dbName = options.dbName || DEFAULT_DATABASE;
options.containerName = options.containerName || DEFAULT_CONTAINER;

const azureCosmosNoSqlKVStore =
AzureCosmosNoSqlKVStore.fromAccountAndKey(options);
const namespace = `${options.dbName}.${options.containerName}`;
return new AzureCosmosNoSqlDocumentStore({
azureCosmosNoSqlKVStore,
namespace,
});
}

/**
* Static method for creating an instance using AAD token.
* If no endpoint and credentials are provided, it will attempt to use the env variable `AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT` as endpoint and use DefaultAzureCredential() as credentials.
* @returns Instance of AzureCosmosNoSqlDocumentStore
*/
static fromAadToken(options: AadTokenOptions = {}) {
options.dbName = options.dbName || DEFAULT_DATABASE;
options.containerName = options.containerName || DEFAULT_CONTAINER;
const azureCosmosNoSqlKVStore =
AzureCosmosNoSqlKVStore.fromAadToken(options);
const namespace = `${options.dbName}.${options.containerName}`;
return new AzureCosmosNoSqlDocumentStore({
azureCosmosNoSqlKVStore,
namespace,
});
}
}
Loading

0 comments on commit 396b1e1

Please sign in to comment.