diff --git a/apps/next/src/content/docs/llamaindex/_static/concepts/indexing.jpg b/apps/next/src/content/docs/llamaindex/_static/concepts/indexing.jpg
new file mode 100644
index 0000000000..8672967213
Binary files /dev/null and b/apps/next/src/content/docs/llamaindex/_static/concepts/indexing.jpg differ
diff --git a/apps/next/src/content/docs/llamaindex/_static/concepts/querying.jpg b/apps/next/src/content/docs/llamaindex/_static/concepts/querying.jpg
new file mode 100644
index 0000000000..3c241bdda5
Binary files /dev/null and b/apps/next/src/content/docs/llamaindex/_static/concepts/querying.jpg differ
diff --git a/apps/next/src/content/docs/llamaindex/_static/concepts/rag.jpg b/apps/next/src/content/docs/llamaindex/_static/concepts/rag.jpg
new file mode 100644
index 0000000000..b68eca2564
Binary files /dev/null and b/apps/next/src/content/docs/llamaindex/_static/concepts/rag.jpg differ
diff --git a/apps/next/src/content/docs/llamaindex/examples/agent.mdx b/apps/next/src/content/docs/llamaindex/examples/agent.mdx
new file mode 100644
index 0000000000..84b4fb29f3
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/agent.mdx
@@ -0,0 +1,12 @@
+---
+title: Agents
+---
+
+A built-in agent that can take decisions and reasoning based on the tools provided to it.
+
+## OpenAI Agent
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/agent/openai";
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/examples/agent_gemini.mdx b/apps/next/src/content/docs/llamaindex/examples/agent_gemini.mdx
new file mode 100644
index 0000000000..38bc4ef72d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/agent_gemini.mdx
@@ -0,0 +1,8 @@
+---
+title: Gemini Agent
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSourceGemini from "!raw-loader!../../../../../../../examples/gemini/agent.ts";
+
+<DynamicCodeBlock lang="ts" code={CodeSourceGemini} />
diff --git a/apps/next/src/content/docs/llamaindex/examples/chat_engine.mdx b/apps/next/src/content/docs/llamaindex/examples/chat_engine.mdx
new file mode 100644
index 0000000000..ac4528951a
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/chat_engine.mdx
@@ -0,0 +1,10 @@
+---
+title: Chat Engine
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/chatEngine";
+
+Chat Engine is a class that allows you to create a chatbot from a retriever. It is a wrapper around a retriever that allows you to chat with it in a conversational manner.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/examples/context_aware_agent.mdx b/apps/next/src/content/docs/llamaindex/examples/context_aware_agent.mdx
new file mode 100644
index 0000000000..6aa5c5a4fb
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/context_aware_agent.mdx
@@ -0,0 +1,61 @@
+---
+title: Context-Aware Agent
+---
+
+The Context-Aware Agent enhances the capabilities of standard LLM agents by incorporating relevant context from a retriever for each query. This allows the agent to provide more informed and specific responses based on the available information.
+
+## Usage
+
+Here's a simple example of how to use the Context-Aware Agent:
+
+```typescript
+import {
+  Document,
+  VectorStoreIndex,
+  OpenAIContextAwareAgent,
+  OpenAI,
+} from "llamaindex";
+
+async function createContextAwareAgent() {
+  // Create and index some documents
+  const documents = [
+    new Document({
+      text: "LlamaIndex is a data framework for LLM applications.",
+      id_: "doc1",
+    }),
+    new Document({
+      text: "The Eiffel Tower is located in Paris, France.",
+      id_: "doc2",
+    }),
+  ];
+
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  const retriever = index.asRetriever({ similarityTopK: 1 });
+
+  // Create the Context-Aware Agent
+  const agent = new OpenAIContextAwareAgent({
+    llm: new OpenAI({ model: "gpt-3.5-turbo" }),
+    contextRetriever: retriever,
+  });
+
+  // Use the agent to answer queries
+  const response = await agent.chat({
+    message: "What is LlamaIndex used for?",
+  });
+
+  console.log("Agent Response:", response.response);
+}
+
+createContextAwareAgent().catch(console.error);
+```
+
+In this example, the Context-Aware Agent uses the retriever to fetch relevant context for each query, allowing it to provide more accurate and informed responses based on the indexed documents.
+
+## Key Components
+
+- `contextRetriever`: A retriever (e.g., from a VectorStoreIndex) that fetches relevant documents or passages for each query.
+
+## Available Context-Aware Agents
+
+- `OpenAIContextAwareAgent`: A context-aware agent using OpenAI's models.
+- `AnthropicContextAwareAgent`: A context-aware agent using Anthropic's models.
diff --git a/apps/next/src/content/docs/llamaindex/examples/local_llm.mdx b/apps/next/src/content/docs/llamaindex/examples/local_llm.mdx
new file mode 100644
index 0000000000..031019fabe
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/local_llm.mdx
@@ -0,0 +1,79 @@
+---
+title: Local LLMs
+---
+
+LlamaIndex.TS supports OpenAI and [other remote LLM APIs](other_llms). You can also run a local LLM on your machine!
+
+## Using a local model via Ollama
+
+The easiest way to run a local LLM is via the great work of our friends at [Ollama](https://ollama.com/), who provide a simple to use client that will download, install and run a [growing range of models](https://ollama.com/library) for you.
+
+### Install Ollama
+
+They provide a one-click installer for Mac, Linux and Windows on their [home page](https://ollama.com/).
+
+### Pick and run a model
+
+Since we're going to be doing agentic work, we'll need a very capable model, but the largest models are hard to run on a laptop. We think `mixtral 8x7b` is a good balance between power and resources, but `llama3` is another great option. You can run Mixtral by running
+
+```bash
+ollama run mixtral:8x7b
+```
+
+The first time you run it will also automatically download and install the model for you.
+
+### Switch the LLM in your code
+
+To tell LlamaIndex to use a local LLM, use the `Settings` object:
+
+```javascript
+Settings.llm = new Ollama({
+  model: "mixtral:8x7b",
+});
+```
+
+### Use local embeddings
+
+If you're doing retrieval-augmented generation, LlamaIndex.TS will also call out to OpenAI to index and embed your data. To be entirely local, you can use a local embedding model like this:
+
+```javascript
+Settings.embedModel = new HuggingFaceEmbedding({
+  modelType: "BAAI/bge-small-en-v1.5",
+  quantized: false,
+});
+```
+
+The first time this runs it will download the embedding model to run it.
+
+### Try it out
+
+With a local LLM and local embeddings in place, you can perform RAG as usual and everything will happen on your machine without calling an API:
+
+```typescript
+async function main() {
+  // Load essay from abramov.txt in Node
+  const path = "node_modules/llamaindex/examples/abramov.txt";
+
+  const essay = await fs.readFile(path, "utf-8");
+
+  // Create Document object with essay
+  const document = new Document({ text: essay, id_: path });
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query({
+    query: "What did the author do in college?",
+  });
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
+```
+
+You can see the [full example file](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/vectorIndexLocal.ts).
diff --git a/apps/next/src/content/docs/llamaindex/examples/meta.json b/apps/next/src/content/docs/llamaindex/examples/meta.json
new file mode 100644
index 0000000000..f432a3b772
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/meta.json
@@ -0,0 +1,15 @@
+{
+  "title": "Examples",
+  "pages": [
+    "more_examples",
+    "chat_engine",
+    "vector_index",
+    "summary_index",
+    "save_load_index",
+    "context_aware_agent",
+    "agent",
+    "agent_gemini",
+    "local_llm",
+    "other_llms"
+  ]
+}
diff --git a/apps/next/src/content/docs/llamaindex/examples/more_examples.mdx b/apps/next/src/content/docs/llamaindex/examples/more_examples.mdx
new file mode 100644
index 0000000000..ef7a02754d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/more_examples.mdx
@@ -0,0 +1,21 @@
+---
+title: See all examples
+---
+
+Our GitHub repository has a wealth of examples to explore and try out. You can check out our [examples folder](https://github.com/run-llama/LlamaIndexTS/tree/main/examples) to see them all at once, or browse the pages in this section for some selected highlights.
+
+## Check out all examples
+
+It may be useful to check out all the examples at once so you can try them out locally. To do this into a folder called `my-new-project`, run these commands:
+
+```bash npm2yarn
+npx degit run-llama/LlamaIndexTS/examples my-new-project
+cd my-new-project
+npm install
+```
+
+Then you can run any example in the folder with `tsx`, e.g.:
+
+```bash npm2yarn
+npx tsx ./vectorIndex.ts
+```
diff --git a/apps/next/src/content/docs/llamaindex/examples/other_llms.mdx b/apps/next/src/content/docs/llamaindex/examples/other_llms.mdx
new file mode 100644
index 0000000000..5fdc6bbaa0
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/other_llms.mdx
@@ -0,0 +1,43 @@
+---
+title: Using other LLM APIs
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/mistral";
+
+By default LlamaIndex.TS uses OpenAI's LLMs and embedding models, but we support [lots of other LLMs](../modules/llms) including models from Mistral (Mistral, Mixtral), Anthropic (Claude) and Google (Gemini).
+
+If you don't want to use an API at all you can [run a local model](../../examples/local_llm)
+
+## Using another LLM
+
+You can specify what LLM LlamaIndex.TS will use on the `Settings` object, like this:
+
+```typescript
+import { MistralAI, Settings } from "llamaindex";
+
+Settings.llm = new MistralAI({
+  model: "mistral-tiny",
+  apiKey: "<YOUR_API_KEY>",
+});
+```
+
+You can see examples of other APIs we support by checking out "Available LLMs" in the sidebar of our [LLMs section](../modules/llms).
+
+## Using another embedding model
+
+A frequent gotcha when trying to use a different API as your LLM is that LlamaIndex will also by default index and embed your data using OpenAI's embeddings. To completely switch away from OpenAI you will need to set your embedding model as well, for example:
+
+```typescript
+import { MistralAIEmbedding, Settings } from "llamaindex";
+
+Settings.embedModel = new MistralAIEmbedding();
+```
+
+We support [many different embeddings](../modules/embeddings).
+
+## Full example
+
+This example uses Mistral's `mistral-tiny` model as the LLM and Mistral for embeddings as well.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/examples/save_load_index.mdx b/apps/next/src/content/docs/llamaindex/examples/save_load_index.mdx
new file mode 100644
index 0000000000..bce10b9db6
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/save_load_index.mdx
@@ -0,0 +1,8 @@
+---
+title: Save/Load an Index
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/storageContext";
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/examples/summary_index.mdx b/apps/next/src/content/docs/llamaindex/examples/summary_index.mdx
new file mode 100644
index 0000000000..344ce6fe84
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/summary_index.mdx
@@ -0,0 +1,8 @@
+---
+title: Summary Index
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/summaryIndex";
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/examples/vector_index.mdx b/apps/next/src/content/docs/llamaindex/examples/vector_index.mdx
new file mode 100644
index 0000000000..03c16fd956
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/examples/vector_index.mdx
@@ -0,0 +1,8 @@
+---
+title: Vector Index
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/vectorIndex";
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/concepts.mdx b/apps/next/src/content/docs/llamaindex/getting_started/concepts.mdx
new file mode 100644
index 0000000000..4189c3204b
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/concepts.mdx
@@ -0,0 +1,76 @@
+---
+title: Concepts
+---
+
+LlamaIndex.TS helps you build LLM-powered applications (e.g. Q&A, chatbot) over custom data.
+
+In this high-level concepts guide, you will learn:
+
+- how an LLM can answer questions using your own data.
+- key concepts and modules in LlamaIndex.TS for composing your own query pipeline.
+
+## Answering Questions Across Your Data
+
+LlamaIndex uses a two stage method when using an LLM with your data:
+
+1. **indexing stage**: preparing a knowledge base, and
+2. **querying stage**: retrieving relevant context from the knowledge to assist the LLM in responding to a question
+
+![](../_static/concepts/rag.jpg)
+
+This process is also known as Retrieval Augmented Generation (RAG).
+
+LlamaIndex.TS provides the essential toolkit for making both steps super easy.
+
+Let's explore each stage in detail.
+
+### Indexing Stage
+
+LlamaIndex.TS help you prepare the knowledge base with a suite of data connectors and indexes.
+
+![](../_static/concepts/indexing.jpg)
+
+[**Data Loaders**](/docs/llamaindex/modules/data_loaders/index):
+A data connector (i.e. `Reader`) ingest data from different data sources and data formats into a simple `Document` representation (text and simple metadata).
+
+[**Documents / Nodes**](/docs/llamaindex/modules/documents_and_nodes/index): A `Document` is a generic container around any data source - for instance, a PDF, an API output, or retrieved data from a database. A `Node` is the atomic unit of data in LlamaIndex and represents a "chunk" of a source `Document`. It's a rich representation that includes metadata and relationships (to other nodes) to enable accurate and expressive retrieval operations.
+
+[**Data Indexes**](/docs/llamaindex/modules/data_index):
+Once you've ingested your data, LlamaIndex helps you index data into a format that's easy to retrieve.
+
+Under the hood, LlamaIndex parses the raw documents into intermediate representations, calculates vector embeddings, and stores your data in-memory or to disk.
+
+### Querying Stage
+
+In the querying stage, the query pipeline retrieves the most relevant context given a user query,
+and pass that to the LLM (along with the query) to synthesize a response.
+
+This gives the LLM up-to-date knowledge that is not in its original training data,
+(also reducing hallucination).
+
+The key challenge in the querying stage is retrieval, orchestration, and reasoning over (potentially many) knowledge bases.
+
+LlamaIndex provides composable modules that help you build and integrate RAG pipelines for Q&A (query engine), chatbot (chat engine), or as part of an agent.
+
+These building blocks can be customized to reflect ranking preferences, as well as composed to reason over multiple knowledge bases in a structured way.
+
+![](../_static/concepts/querying.jpg)
+
+#### Building Blocks
+
+[**Retrievers**](/docs/llamaindex/modules/retriever):
+A retriever defines how to efficiently retrieve relevant context from a knowledge base (i.e. index) when given a query.
+The specific retrieval logic differs for different indices, the most popular being dense retrieval against a vector index.
+
+[**Response Synthesizers**](/docs/llamaindex/modules/response_synthesizer):
+A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.
+
+#### Pipelines
+
+[**Query Engines**](/docs/llamaindex/modules/query_engines):
+A query engine is an end-to-end pipeline that allow you to ask question over your data.
+It takes in a natural language query, and returns a response, along with reference context retrieved and passed to the LLM.
+
+[**Chat Engines**](/docs/llamaindex/modules/chat_engine):
+A chat engine is an end-to-end pipeline for having a conversation with your data
+(multiple back-and-forth instead of a single question & answer).
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/environments.mdx b/apps/next/src/content/docs/llamaindex/getting_started/environments.mdx
new file mode 100644
index 0000000000..87530ffde6
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/environments.mdx
@@ -0,0 +1,20 @@
+---
+title: Environments
+---
+
+We support Node.JS versions 18, 20 and 22, with experimental support for Deno, Bun and Vercel Edge functions.
+
+## NextJS
+
+If you're using NextJS you'll need to add `withLlamaIndex` to your `next.config.js` file. This will add the necessary configuration for included 3rd-party libraries to your build:
+
+```js
+// next.config.js
+const withLlamaIndex = require("llamaindex/next");
+
+module.exports = withLlamaIndex({
+  // your next.js config
+});
+```
+
+For details, check the latest [withLlamaIndex](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/llamaindex/src/next.ts) implementation.
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/index.mdx b/apps/next/src/content/docs/llamaindex/getting_started/index.mdx
new file mode 100644
index 0000000000..f77fa845a0
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/index.mdx
@@ -0,0 +1,35 @@
+---
+title: Getting Started with LlamaIndex.TS
+description: Install llamaindex by running a single command.
+---
+
+import { Tab, Tabs } from "fumadocs-ui/components/tabs";
+
+<Tabs groupId="install" items={["npm", "yarn", "pnpm"]} persist>
+	```shell tab="npm"
+	npm install llamaindex
+	```
+
+	```shell tab="yarn"
+	yarn add llamaindex
+	```
+
+	```shell tab="pnpm"
+	pnpm add llamaindex
+	```
+</Tabs>
+
+## What's next?
+
+<Cards>
+	<Card
+		title="I want to try LlamaIndex.TS"
+		description="Learn how to use LlamaIndex.TS with different JS runtime and frameworks."
+		href="/docs/llamaindex/setup/getting-started"
+	/>
+	<Card
+		title="Show me code examples"
+		description="Explore code examples using LlamaIndex.TS."
+		href="https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples?file=README.md"
+	/>
+</Cards>
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/meta.json b/apps/next/src/content/docs/llamaindex/getting_started/meta.json
new file mode 100644
index 0000000000..ed2c8903e1
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/meta.json
@@ -0,0 +1,4 @@
+{
+  "title": "Getting Started",
+  "pages": ["index", "setup", "starter_tutorial", "environments", "concepts"]
+}
diff --git a/apps/next/src/content/docs/llamaindex/setup/cloudflare.mdx b/apps/next/src/content/docs/llamaindex/getting_started/setup/cloudflare.mdx
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/cloudflare.mdx
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/cloudflare.mdx
diff --git a/apps/next/src/content/docs/llamaindex/setup/getting-started.mdx b/apps/next/src/content/docs/llamaindex/getting_started/setup/getting-started.mdx
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/getting-started.mdx
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/getting-started.mdx
diff --git a/apps/next/src/content/docs/llamaindex/setup/meta.json b/apps/next/src/content/docs/llamaindex/getting_started/setup/meta.json
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/meta.json
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/meta.json
diff --git a/apps/next/src/content/docs/llamaindex/setup/next.mdx b/apps/next/src/content/docs/llamaindex/getting_started/setup/next.mdx
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/next.mdx
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/next.mdx
diff --git a/apps/next/src/content/docs/llamaindex/setup/node.mdx b/apps/next/src/content/docs/llamaindex/getting_started/setup/node.mdx
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/node.mdx
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/node.mdx
diff --git a/apps/next/src/content/docs/llamaindex/setup/typescript.mdx b/apps/next/src/content/docs/llamaindex/getting_started/setup/typescript.mdx
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/typescript.mdx
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/typescript.mdx
diff --git a/apps/next/src/content/docs/llamaindex/setup/vite.mdx b/apps/next/src/content/docs/llamaindex/getting_started/setup/vite.mdx
similarity index 100%
rename from apps/next/src/content/docs/llamaindex/setup/vite.mdx
rename to apps/next/src/content/docs/llamaindex/getting_started/setup/vite.mdx
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/agent.mdx b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/agent.mdx
new file mode 100644
index 0000000000..ffa84aa94c
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/agent.mdx
@@ -0,0 +1,47 @@
+---
+title: Agent tutorial
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/agent/openai";
+
+We have a comprehensive, step-by-step [guide to building agents in LlamaIndex.TS](../../guides/agents/setup) that we recommend to learn what agents are and how to build them for production. But building a basic agent is simple:
+
+## Set up
+
+In a new folder:
+
+```bash npm2yarn
+npm init
+npm install -D typescript @types/node
+```
+
+## Run agent
+
+Create the file `example.ts`. This code will:
+
+- Create two tools for use by the agent:
+  - A `sumNumbers` tool that adds two numbers
+  - A `divideNumbers` tool that divides numbers
+-
+- Give an example of the data structure we wish to generate
+- Prompt the LLM with instructions and the example, plus a sample transcript
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+To run the code:
+
+```bash
+npx tsx example.ts
+```
+
+You should expect output something like:
+
+```
+{
+  content: 'The sum of 5 + 5 is 10. When you divide 10 by 2, you get 5.',
+  role: 'assistant',
+  options: {}
+}
+Done
+```
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/chatbot.mdx b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/chatbot.mdx
new file mode 100644
index 0000000000..8672118933
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/chatbot.mdx
@@ -0,0 +1,25 @@
+---
+title: Chatbot tutorial
+---
+
+Once you've mastered basic [retrieval-augment generation](retrieval_augmented_generation) you may want to create an interface to chat with your data. You can do this step-by-step, but we recommend getting started quickly using `create-llama`.
+
+## Using create-llama
+
+`create-llama` is a powerful but easy to use command-line tool that generates a working, full-stack web application that allows you to chat with your data. You can learn more about it on [the `create-llama` README page](https://www.npmjs.com/package/create-llama).
+
+Run it once and it will ask you a series of questions about the kind of application you want to generate. Then you can customize your application to suit your use-case. To get started, run:
+
+```bash npm2yarn
+npx create-llama@latest
+```
+
+Once your app is generated, `cd` into your app directory and run
+
+```bash npm2yarn
+npm run dev
+```
+
+to start the development server. You can then visit [http://localhost:3000](http://localhost:3000) to see your app, which should look something like this:
+
+![create-llama interface](./images/create_llama.png)
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/images/create_llama.png b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/images/create_llama.png
new file mode 100644
index 0000000000..0dd4daddb7
Binary files /dev/null and b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/images/create_llama.png differ
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/meta.json b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/meta.json
new file mode 100644
index 0000000000..1ea6d9295c
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/meta.json
@@ -0,0 +1,9 @@
+{
+  "title": "Starter Tutorials",
+  "pages": [
+    "retrieval_augmented_generation",
+    "chatbot",
+    "structured_data_extraction",
+    "agent"
+  ]
+}
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/retrieval_augmented_generation.mdx b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/retrieval_augmented_generation.mdx
new file mode 100644
index 0000000000..2da654d1e8
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/retrieval_augmented_generation.mdx
@@ -0,0 +1,56 @@
+---
+title: Retrieval Augmented Generation (RAG) Tutorial
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/vectorIndex";
+import TSConfigSource from "!!raw-loader!../../../../../../../../examples/tsconfig.json";
+
+One of the most common use-cases for LlamaIndex is Retrieval-Augmented Generation or RAG, in which your data is indexed and selectively retrieved to be given to an LLM as source material for responding to a query. You can learn more about the [concepts behind RAG](../concepts).
+
+## Set up the project
+
+In a new folder, run:
+
+```bash npm2yarn
+npm init
+npm install -D typescript @types/node
+```
+
+Then, check out the [installation](../installation) steps to install LlamaIndex.TS and prepare an OpenAI key.
+
+You can use [other LLMs](../../examples/other_llms) via their APIs; if you would prefer to use local models check out our [local LLM example](../../examples/local_llm).
+
+## Run queries
+
+Create the file `example.ts`. This code will
+
+- load an example file
+- convert it into a Document object
+- index it (which creates embeddings using OpenAI)
+- create a query engine to answer questions about the data
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+Create a `tsconfig.json` file in the same folder:
+
+<DynamicCodeBlock lang="json" code={TSConfigSource} />
+
+Now you can run the code with
+
+```bash
+npx tsx example.ts
+```
+
+You should expect output something like:
+
+```
+In college, the author studied subjects like linear algebra and physics, but did not find them particularly interesting. They started slacking off, skipping lectures, and eventually stopped attending classes altogether. They also had a negative experience with their English classes, where they were required to pay for catch-up training despite getting verbal approval to skip most of the classes. Ultimately, the author lost motivation for college due to their job as a software developer and stopped attending classes, only returning years later to pick up their papers.
+
+0: Score: 0.8305309270895813 - I started this decade as a first-year college stud...
+
+
+1: Score: 0.8286388215713089 - A short digression. I’m not saying colleges are wo...
+```
+
+Once you've mastered basic RAG, you may want to consider [chatting with your data](chatbot).
diff --git a/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/structured_data_extraction.mdx b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/structured_data_extraction.mdx
new file mode 100644
index 0000000000..e925192cd1
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/getting_started/starter_tutorial/structured_data_extraction.mdx
@@ -0,0 +1,50 @@
+---
+title: Structured data extraction tutorial
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/jsonExtract";
+
+Make sure you have installed LlamaIndex.TS and have an OpenAI key. If you haven't, check out the [installation](../installation) guide.
+
+You can use [other LLMs](../../examples/other_llms) via their APIs; if you would prefer to use local models check out our [local LLM example](../../examples/local_llm).
+
+## Set up
+
+In a new folder:
+
+```bash npm2yarn
+npm init
+npm install -D typescript @types/node
+```
+
+## Extract data
+
+Create the file `example.ts`. This code will:
+
+- Set up an LLM connection to GPT-4
+- Give an example of the data structure we wish to generate
+- Prompt the LLM with instructions and the example, plus a sample transcript
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+To run the code:
+
+```bash
+npx tsx example.ts
+```
+
+You should expect output something like:
+
+```json
+{
+  "summary": "Sarah from XYZ Company called John to introduce the XYZ Widget, a tool designed to automate tasks and improve productivity. John expressed interest and requested case studies and a product demo. Sarah agreed to send the information and follow up to schedule the demo.",
+  "products": ["XYZ Widget"],
+  "rep_name": "Sarah",
+  "prospect_name": "John",
+  "action_items": [
+    "Send case studies and additional product information to John",
+    "Follow up with John to schedule a product demo"
+  ]
+}
+```
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/1_setup.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/1_setup.mdx
new file mode 100644
index 0000000000..770fb26c87
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/1_setup.mdx
@@ -0,0 +1,39 @@
+---
+title: Agent tutorial
+---
+
+In this guide we'll walk you through the process of building an Agent in JavaScript using the LlamaIndex.TS library, starting from nothing and adding complexity in stages.
+
+## What is an Agent?
+
+In LlamaIndex, an agent is a semi-autonomous piece of software powered by an LLM that is given a task and executes a series of steps towards solving that task. It is given a set of tools, which can be anything from arbitrary functions up to full LlamaIndex query engines, and it selects the best available tool to complete each step. When each step is completed, the agent judges whether the task is now complete, in which case it returns a result to the user, or whether it needs to take another step, in which case it loops back to the start.
+
+![agent flow](./images/agent_flow.png)
+
+## Install LlamaIndex.TS
+
+You'll need to have a recent version of [Node.js](https://nodejs.org/en) installed. Then you can install LlamaIndex.TS by running
+
+```bash
+npm install llamaindex
+```
+
+## Choose your model
+
+By default we'll be using OpenAI with GPT-4, as it's a powerful model and easy to get started with. If you'd prefer to run a local model, see [using a local model](local_model).
+
+## Get an OpenAI API key
+
+If you don't already have one, you can sign up for an [OpenAI API key](https://platform.openai.com/api-keys). You should then put the key in a `.env` file in the root of the project; the file should look like
+
+```
+OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXXXXXX
+```
+
+We'll use `dotenv` to pull the API key out of that .env file, so also run:
+
+```bash
+npm install dotenv
+```
+
+Now you're ready to [create your agent](create_agent).
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/2_create_agent.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/2_create_agent.mdx
new file mode 100644
index 0000000000..909250a2b2
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/2_create_agent.mdx
@@ -0,0 +1,181 @@
+---
+title: Create a basic agent
+---
+
+We want to use `await` so we're going to wrap all of our code in a `main` function, like this:
+
+```typescript
+// Your imports go here
+
+async function main() {
+  // the rest of your code goes here
+}
+
+main().catch(console.error);
+```
+
+For the rest of this guide we'll assume your code is wrapped like this so we can use `await`. You can run the code this way:
+
+```bash
+npx tsx example.ts
+```
+
+### Load your dependencies
+
+First we'll need to pull in our dependencies. These are:
+
+- The OpenAI class to use the OpenAI LLM
+- FunctionTool to provide tools to our agent
+- OpenAIAgent to create the agent itself
+- Settings to define some global settings for the library
+- Dotenv to load our API key from the .env file
+
+```javascript
+import { OpenAI, FunctionTool, OpenAIAgent, Settings } from "llamaindex";
+import "dotenv/config";
+```
+
+### Initialize your LLM
+
+We need to tell our OpenAI class where its API key is, and which of OpenAI's models to use. We'll be using `gpt-4o`, which is capable while still being pretty cheap. This is a global setting, so anywhere an LLM is needed will use the same model.
+
+```javascript
+Settings.llm = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+  model: "gpt-4o",
+});
+```
+
+### Turn on logging
+
+We want to see what our agent is up to, so we're going to hook into some events that the library generates and print them out. There are several events possible, but we'll specifically tune in to `llm-tool-call` (when a tool is called) and `llm-tool-result` (when it responds).
+
+```javascript
+Settings.callbackManager.on("llm-tool-call", (event) => {
+  console.log(event.detail);
+});
+Settings.callbackManager.on("llm-tool-result", (event) => {
+  console.log(event.detail);
+});
+```
+
+### Create a function
+
+We're going to create a very simple function that adds two numbers together. This will be the tool we ask our agent to use.
+
+```javascript
+const sumNumbers = ({ a, b }) => {
+  return `${a + b}`;
+};
+```
+
+Note that we're passing in an object with two named parameters, `a` and `b`. This is a little unusual, but important for defining a tool that an LLM can use.
+
+### Turn the function into a tool for the agent
+
+This is the most complicated part of creating an agent. We need to define a `FunctionTool`. We have to pass in:
+
+- The function itself (`sumNumbers`)
+- A name for the function, which the LLM will use to call it
+- A description of the function. The LLM will read this description to figure out what the tool does, and if it needs to call it
+- A schema for function. We tell the LLM that the parameter is an `object`, and we tell it about the two named parameters we gave it, `a` and `b`. We describe each parameter as a `number`, and we say that both are required.
+- You can see [more examples of function schemas](https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models).
+
+```javascript
+const tool = FunctionTool.from(sumNumbers, {
+  name: "sumNumbers",
+  description: "Use this function to sum two numbers",
+  parameters: {
+    type: "object",
+    properties: {
+      a: {
+        type: "number",
+        description: "First number to sum",
+      },
+      b: {
+        type: "number",
+        description: "Second number to sum",
+      },
+    },
+    required: ["a", "b"],
+  },
+});
+```
+
+We then wrap up the tools into an array. We could provide lots of tools this way, but for this example we're just using the one.
+
+```javascript
+const tools = [tool];
+```
+
+### Create the agent
+
+With your LLM already set up and your tools defined, creating an agent is simple:
+
+```javascript
+const agent = new OpenAIAgent({ tools });
+```
+
+### Ask the agent a question
+
+We can use the `chat` interface to ask our agent a question, and it will use the tools we've defined to find an answer.
+
+```javascript
+let response = await agent.chat({
+  message: "Add 101 and 303",
+});
+
+console.log(response);
+```
+
+Let's see what running this looks like using `npx tsx agent.ts`
+
+**_Output_**
+
+```javascript
+{
+  toolCall: {
+    id: 'call_ze6A8C3mOUBG4zmXO8Z4CPB5',
+    name: 'sumNumbers',
+    input: { a: 101, b: 303 }
+  },
+  toolResult: {
+    tool: FunctionTool { _fn: [Function: sumNumbers], _metadata: [Object] },
+    input: { a: 101, b: 303 },
+    output: '404',
+    isError: false
+  }
+}
+```
+
+```javascript
+{
+  response: {
+    raw: {
+      id: 'chatcmpl-9KwauZku3QOvH78MNvxJs81mDvQYK',
+      object: 'chat.completion',
+      created: 1714778824,
+      model: 'gpt-4-turbo-2024-04-09',
+      choices: [Array],
+      usage: [Object],
+      system_fingerprint: 'fp_ea6eb70039'
+    },
+    message: {
+      content: 'The sum of 101 and 303 is 404.',
+      role: 'assistant',
+      options: {}
+    }
+  },
+  sources: [Getter]
+}
+```
+
+We're seeing two pieces of output here. The first is our callback firing when the tool is called. You can see in `toolResult` that the LLM has correctly passed `101` and `303` to our `sumNumbers` function, which adds them up and returns `404`.
+
+The second piece of output is the response from the LLM itself, where the `message.content` key is giving us the answer.
+
+Great! We've built an agent with tool use! Next you can:
+
+- [See the full code](https://github.com/run-llama/ts-agents/blob/main/1_agent/agent.ts)
+- [Switch to a local LLM](local_model)
+- Move on to [add Retrieval-Augmented Generation to your agent](agentic_rag)
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/3_local_model.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/3_local_model.mdx
new file mode 100644
index 0000000000..0c649dfe37
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/3_local_model.mdx
@@ -0,0 +1,92 @@
+---
+title: Using a local model via Ollama
+---
+
+If you're happy using OpenAI, you can skip this section, but many people are interested in using models they run themselves. The easiest way to do this is via the great work of our friends at [Ollama](https://ollama.com/), who provide a simple to use client that will download, install and run a [growing range of models](https://ollama.com/library) for you.
+
+### Install Ollama
+
+They provide a one-click installer for Mac, Linux and Windows on their [home page](https://ollama.com/).
+
+### Pick and run a model
+
+Since we're going to be doing agentic work, we'll need a very capable model, but the largest models are hard to run on a laptop. We think `mixtral 8x7b` is a good balance between power and resources, but `llama3` is another great option. You can run it simply by running
+
+```bash
+ollama run mixtral:8x7b
+```
+
+The first time you run it will also automatically download and install the model for you.
+
+### Switch the LLM in your code
+
+There are two changes you need to make to the code we already wrote in `1_agent` to get Mixtral 8x7b to work. First, you need to switch to that model. Replace the call to `Settings.llm` with this:
+
+```javascript
+Settings.llm = new Ollama({
+  model: "mixtral:8x7b",
+});
+```
+
+### Swap to a ReActAgent
+
+In our original code we used a specific OpenAIAgent, so we'll need to switch to a more generic agent pattern, the ReAct pattern. This is simple: change the `const agent` line in your code to read
+
+```javascript
+const agent = new ReActAgent({ tools });
+```
+
+(You will also need to bring in `Ollama` and `ReActAgent` in your imports)
+
+### Run your totally local agent
+
+Because your embeddings were already local, your agent can now run entirely locally without making any API calls.
+
+```bash
+node agent.mjs
+```
+
+Note that your model will probably run a lot slower than OpenAI, so be prepared to wait a while!
+
+**_Output_**
+
+```javascript
+{
+  response: {
+    message: {
+      role: 'assistant',
+      content: ' Thought: I need to use a tool to add the numbers 101 and 303.\n' +
+        'Action: sumNumbers\n' +
+        'Action Input: {"a": 101, "b": 303}\n' +
+        '\n' +
+        'Observation: 404\n' +
+        '\n' +
+        'Thought: I can answer without using any more tools.\n' +
+        'Answer: The sum of 101 and 303 is 404.'
+    },
+    raw: {
+      model: 'mixtral:8x7b',
+      created_at: '2024-05-09T00:24:30.339473Z',
+      message: [Object],
+      done: true,
+      total_duration: 64678371209,
+      load_duration: 57394551334,
+      prompt_eval_count: 475,
+      prompt_eval_duration: 4163981000,
+      eval_count: 94,
+      eval_duration: 3116692000
+    }
+  },
+  sources: [Getter]
+}
+```
+
+Tada! You can see all of this in the folder `1a_mixtral`.
+
+### Extending to other examples
+
+You can use a ReActAgent instead of an OpenAIAgent in any of the further examples below, but keep in mind that GPT-4 is a lot more capable than Mixtral 8x7b, so you may see more errors or failures in reasoning if you are using an entirely local setup.
+
+### Next steps
+
+Now you've got a local agent, you can [add Retrieval-Augmented Generation to your agent](agentic_rag).
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/4_agentic_rag.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/4_agentic_rag.mdx
new file mode 100644
index 0000000000..f5f4432afb
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/4_agentic_rag.mdx
@@ -0,0 +1,156 @@
+---
+title: Adding Retrieval-Augmented Generation (RAG)
+---
+
+While an agent that can perform math is nifty (LLMs are usually not very good at math), LLM-based applications are always more interesting when they work with large amounts of data. In this case, we're going to use a 200-page PDF of the proposed budget of the city of San Francisco for fiscal years 2024-2024 and 2024-2025. It's a great example because it's extremely wordy and full of tables of figures, which present a challenge for humans and LLMs alike.
+
+To learn more about RAG, we recommend this [introduction](https://docs.llamaindex.ai/en/stable/getting_started/concepts/) from our Python docs. We'll assume you know the basics:
+
+- Parse your source data into chunks of text.
+- Encode that text as numbers, called embeddings.
+- Search your embeddings for the most relevant chunks of text.
+- Use the relevant chunks along with a query to ask an LLM to generate an answer.
+
+We're going to start with the same agent we [built in step 1](https://github.com/run-llama/ts-agents/blob/main/1_agent/agent.ts), but make a few changes. You can find the finished version [in the repository](https://github.com/run-llama/ts-agents/blob/main/2_agentic_rag/agent.ts).
+
+### New dependencies
+
+We'll be bringing in `SimpleDirectoryReader`, `HuggingFaceEmbedding`, `VectorStoreIndex`, and `QueryEngineTool`, `OpenAIContextAwareAgent` from LlamaIndex.TS, as well as the dependencies we previously used.
+
+```javascript
+import {
+  OpenAI,
+  FunctionTool,
+  OpenAIAgent,
+  OpenAIContextAwareAgent,
+  Settings,
+  SimpleDirectoryReader,
+  HuggingFaceEmbedding,
+  VectorStoreIndex,
+  QueryEngineTool,
+} from "llamaindex";
+```
+
+### Add an embedding model
+
+To encode our text into embeddings, we'll need an embedding model. We could use OpenAI for this but to save on API calls we're going to use a local embedding model from HuggingFace.
+
+```javascript
+Settings.embedModel = new HuggingFaceEmbedding({
+  modelType: "BAAI/bge-small-en-v1.5",
+  quantized: false,
+});
+```
+
+### Load data using SimpleDirectoryReader
+
+`SimpleDirectoryReader` is a flexible tool that can read various file formats. We will point it at our data directory, which contains a single PDF file, and retrieve a set of documents.
+
+```javascript
+const reader = new SimpleDirectoryReader();
+const documents = await reader.loadData("../data");
+```
+
+### Index our data
+
+We will convert our text into embeddings using the `VectorStoreIndex` class through the `fromDocuments` method, which utilizes the embedding model defined earlier in `Settings`.
+
+```javascript
+const index = await VectorStoreIndex.fromDocuments(documents);
+```
+
+### Configure a retriever
+
+Before LlamaIndex can send a query to the LLM, it needs to find the most relevant chunks to send. That's the purpose of a `Retriever`. We're going to get `VectorStoreIndex` to act as a retriever for us
+
+```javascript
+const retriever = await index.asRetriever();
+```
+
+### Configure how many documents to retrieve
+
+By default LlamaIndex will retrieve just the 2 most relevant chunks of text. This document is complex though, so we'll ask for more context.
+
+```javascript
+retriever.similarityTopK = 10;
+```
+
+### Approach 1: Create a Context-Aware Agent
+
+With the retriever ready, you can create a **context-aware agent**.
+
+```javascript
+const agent = new OpenAIContextAwareAgent({
+  contextRetriever: retriever,
+});
+
+// Example query to the context-aware agent
+let response = await agent.chat({
+  message: `What's the budget of San Francisco in 2023-2024?`,
+});
+
+console.log(response);
+```
+
+**Expected Output:**
+
+```md
+The total budget for the City and County of San Francisco for the fiscal year 2023-2024 is $14.6 billion. This represents a $611.8 million, or 4.4 percent, increase over the previous fiscal year's budget. The budget covers various expenditures across different departments and services, including significant allocations to public works, transportation, commerce, public protection, and health services.
+```
+
+### Approach 2: Using QueryEngineTool (Alternative Approach)
+
+If you prefer more flexibility and don't mind additional complexity, you can create a `QueryEngineTool`. This approach allows you to define the query logic, providing a more tailored way to interact with the data, but note that it introduces a delay due to the extra tool call.
+
+```javascript
+const queryEngine = await index.asQueryEngine({ retriever });
+const tools = [
+  new QueryEngineTool({
+    queryEngine: queryEngine,
+    metadata: {
+      name: "san_francisco_budget_tool",
+      description: `This tool can answer detailed questions about the individual components of the budget of San Francisco in 2023-2024.`,
+    },
+  }),
+];
+
+// Create an agent using the tools array
+const agent = new OpenAIAgent({ tools });
+
+let toolResponse = await agent.chat({
+  message: "What's the budget of San Francisco in 2023-2024?",
+});
+
+console.log(toolResponse);
+```
+
+**Expected Output:**
+
+```javascript
+{
+  toolCall: {
+    id: 'call_iNo6rTK4pOpOBbO8FanfWLI9',
+    name: 'san_francisco_budget_tool',
+    input: { query: 'total budget' }
+  },
+  toolResult: {
+    tool: QueryEngineTool {
+      queryEngine: [RetrieverQueryEngine],
+      metadata: [Object]
+    },
+    input: { query: 'total budget' },
+    output: 'The total budget for the City and County of San Francisco for Fiscal Year (FY) 2023-24 is $14.6 billion, which represents a $611.8 million, or 4.4 percent, increase over the FY 2022-23 budget. For FY 2024-25, the total budget is also projected to be $14.6 billion, reflecting a $40.5 million, or 0.3 percent, decrease from the FY 2023-24 proposed budget. This budget includes various expenditures across different departments and services, with significant allocations to public works, transportation, commerce, public protection, and health services.',
+    isError: false
+  }
+}
+```
+
+Once again we see a `toolResult`. You can see the query the LLM decided to send to the query engine ("total budget"), and the output the engine returned. In `response.message` you see that the LLM has returned the output from the tool almost verbatim, although it trimmed out the bit about 2024-2025 since we didn't ask about that year.
+
+### Comparison of Approaches
+
+The `OpenAIContextAwareAgent` approach simplifies the setup by allowing you to directly link the retriever to the agent, making it straightforward to access relevant context for your queries. This is ideal for situations where you want easy integration with existing data sources, like a context chat engine.
+
+On the other hand, using the `QueryEngineTool` offers more flexibility and power. This method allows for customization in how queries are constructed and executed, enabling you to query data from various storages and process them in different ways. However, this added flexibility comes with increased complexity and response time due to the separate tool call and queryEngine generating tool output by LLM that is then passed to the agent.
+
+So now we have an agent that can index complicated documents and answer questions about them. Let's [combine our math agent and our RAG agent](rag_and_tools)!
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/5_rag_and_tools.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/5_rag_and_tools.mdx
new file mode 100644
index 0000000000..0f95857d2f
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/5_rag_and_tools.mdx
@@ -0,0 +1,130 @@
+---
+title: A RAG agent that does math
+---
+
+In [our third iteration of the agent](https://github.com/run-llama/ts-agents/blob/main/3_rag_and_tools/agent.ts) we've combined the two previous agents, so we've defined both `sumNumbers` and a `QueryEngineTool` and created an array of two tools:
+
+```javascript
+// define the query engine as a tool
+const tools = [
+  new QueryEngineTool({
+    queryEngine: queryEngine,
+    metadata: {
+      name: "san_francisco_budget_tool",
+      description: `This tool can answer detailed questions about the individual components of the budget of San Francisco in 2023-2024.`,
+    },
+  }),
+  FunctionTool.from(sumNumbers, {
+    name: "sumNumbers",
+    description: "Use this function to sum two numbers",
+    parameters: {
+      type: "object",
+      properties: {
+        a: {
+          type: "number",
+          description: "First number to sum",
+        },
+        b: {
+          type: "number",
+          description: "Second number to sum",
+        },
+      },
+      required: ["a", "b"],
+    },
+  }),
+];
+```
+
+These tool descriptions are identical to the ones we previously defined. Now let's ask it 3 questions in a row:
+
+```javascript
+let response = await agent.chat({
+  message:
+    "What's the budget of San Francisco for community health in 2023-24?",
+});
+console.log(response);
+
+let response2 = await agent.chat({
+  message:
+    "What's the budget of San Francisco for public protection in 2023-24?",
+});
+console.log(response2);
+
+let response3 = await agent.chat({
+  message:
+    "What's the combined budget of San Francisco for community health and public protection in 2023-24?",
+});
+console.log(response3);
+```
+
+We'll abbreviate the output, but here are the important things to spot:
+
+```javascript
+{
+  toolCall: {
+    id: 'call_ZA1LPx03gO4ABre1r6XowLWq',
+    name: 'san_francisco_budget_tool',
+    input: { query: 'community health budget 2023-2024' }
+  },
+  toolResult: {
+    tool: QueryEngineTool {
+      queryEngine: [RetrieverQueryEngine],
+      metadata: [Object]
+    },
+    input: { query: 'community health budget 2023-2024' },
+    output: 'The proposed Fiscal Year (FY) 2023-24 budget for the Department of Public Health is $3.2 billion
+  }
+}
+```
+
+This is the first tool call, where it used the query engine to get the public health budget.
+
+```javascript
+{
+  toolCall: {
+    id: 'call_oHu1KjEvA47ER6HYVfFIq9yp',
+    name: 'san_francisco_budget_tool',
+    input: { query: 'public protection budget 2023-2024' }
+  },
+  toolResult: {
+    tool: QueryEngineTool {
+      queryEngine: [RetrieverQueryEngine],
+      metadata: [Object]
+    },
+    input: { query: 'public protection budget 2023-2024' },
+    output: "The budget for Public Protection in San Francisco for Fiscal Year (FY) 2023-24 is $2,012.5 million."
+  }
+}
+```
+
+In the second tool call, it got the police budget also from the query engine.
+
+```javascript
+{
+  toolCall: {
+    id: 'call_SzG4yGUnLbv1T7IyaLAOqg3t',
+    name: 'sumNumbers',
+    input: { a: 3200, b: 2012.5 }
+  },
+  toolResult: {
+    tool: FunctionTool { _fn: [Function: sumNumbers], _metadata: [Object] },
+    input: { a: 3200, b: 2012.5 },
+    output: '5212.5',
+    isError: false
+  }
+}
+```
+
+In the final tool call, it used the `sumNumbers` function to add the two budgets together. Perfect! This leads to the final answer:
+
+```javascript
+{
+    message: {
+      content: 'The combined budget of San Francisco for community health and public protection in Fiscal Year (FY) 2023-24 is $5,212.5 million.',
+      role: 'assistant',
+      options: {}
+    }
+}
+```
+
+Great! Now let's improve accuracy by improving our parsing with [LlamaParse](llamaparse).
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/6_llamaparse.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/6_llamaparse.mdx
new file mode 100644
index 0000000000..dc0047addf
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/6_llamaparse.mdx
@@ -0,0 +1,20 @@
+---
+title: Adding LlamaParse
+---
+
+Complicated PDFs can be very tricky for LLMs to understand. To help with this, LlamaIndex provides LlamaParse, a hosted service that parses complex documents including PDFs. To use it, get a `LLAMA_CLOUD_API_KEY` by [signing up for LlamaCloud](https://cloud.llamaindex.ai/) (it's free for up to 1000 pages/day) and adding it to your `.env` file just as you did for your OpenAI key:
+
+```bash
+LLAMA_CLOUD_API_KEY=llx-XXXXXXXXXXXXXXXX
+```
+
+Then replace `SimpleDirectoryReader` with `LlamaParseReader`:
+
+```javascript
+const reader = new LlamaParseReader({ resultType: "markdown" });
+const documents = await reader.loadData("../data/sf_budget_2023_2024.pdf");
+```
+
+Now you will be able to ask more complicated questions of the same PDF and get better results. You can find this code [in our repo](https://github.com/run-llama/ts-agents/blob/main/4_llamaparse/agent.ts).
+
+Next up, let's persist our embedded data so we don't have to re-parse every time by [using a vector store](qdrant).
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/7_qdrant.mdx b/apps/next/src/content/docs/llamaindex/guide/agents/7_qdrant.mdx
new file mode 100644
index 0000000000..d6154c580d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/7_qdrant.mdx
@@ -0,0 +1,77 @@
+---
+title: Adding persistent vector storage
+---
+
+In the previous examples, we've been loading our data into memory each time we run the agent. This is fine for small datasets, but for larger datasets you'll want to store your embeddings in a database. LlamaIndex.TS provides a `VectorStore` class that can store your embeddings in a variety of databases. We're going to use [Qdrant](https://qdrant.tech/), a popular vector store, for this example.
+
+We can get a local instance of Qdrant running very simply with Docker (make sure you [install Docker](https://www.docker.com/products/docker-desktop/) first):
+
+```bash
+docker pull qdrant/qdrant
+docker run -p 6333:6333 qdrant/qdrant
+```
+
+And in our code we initialize a `VectorStore` with the Qdrant URL:
+
+```javascript
+// initialize qdrant vector store
+const vectorStore = new QdrantVectorStore({
+  url: "http://localhost:6333",
+});
+```
+
+Now once we have loaded our documents, we can instantiate an index with the vector store:
+
+```javascript
+// create a query engine from our documents
+const index = await VectorStoreIndex.fromDocuments(documents, { vectorStore });
+```
+
+In [the final iteration](https://github.com/run-llama/ts-agents/blob/main/5_qdrant/agent.ts) you can see that we have also implemented a very naive caching mechanism to avoid re-parsing the PDF each time we run the agent:
+
+```javascript
+// load cache.json and parse it
+let cache = {};
+let cacheExists = false;
+try {
+  await fs.access(PARSING_CACHE, fs.constants.F_OK);
+  cacheExists = true;
+} catch (e) {
+  console.log("No cache found");
+}
+if (cacheExists) {
+  cache = JSON.parse(await fs.readFile(PARSING_CACHE, "utf-8"));
+}
+
+const filesToParse = ["../data/sf_budget_2023_2024.pdf"];
+
+// load our data, reading only files we haven't seen before
+let documents = [];
+const reader = new LlamaParseReader({ resultType: "markdown" });
+for (let file of filesToParse) {
+  if (!cache[file]) {
+    documents = documents.concat(await reader.loadData(file));
+    cache[file] = true;
+  }
+}
+
+// write the cache back to disk
+await fs.writeFile(PARSING_CACHE, JSON.stringify(cache));
+```
+
+Since parsing a PDF can be slow, especially a large one, using the pre-parsed chunks in Qdrant can significantly speed up your agent.
+
+## Next steps
+
+In this guide you've learned how to
+
+- [Create an agent](create_agent)
+- Use remote LLMs like GPT-4
+- [Use local LLMs like Mixtral](local_model)
+- [Create a RAG query engine](agentic_rag)
+- [Turn functions and query engines into agent tools](rag_and_tools)
+- Combine those tools
+- [Enhance your parsing with LlamaParse](llamaparse)
+- Persist your data in a vector store
+
+The next steps are up to you! Try creating more complex functions and query engines, and set your agent loose on the world.
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/images/agent_flow.png b/apps/next/src/content/docs/llamaindex/guide/agents/images/agent_flow.png
new file mode 100644
index 0000000000..ad0456397b
Binary files /dev/null and b/apps/next/src/content/docs/llamaindex/guide/agents/images/agent_flow.png differ
diff --git a/apps/next/src/content/docs/llamaindex/guide/agents/meta.json b/apps/next/src/content/docs/llamaindex/guide/agents/meta.json
new file mode 100644
index 0000000000..0812922414
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/guide/agents/meta.json
@@ -0,0 +1,12 @@
+{
+  "title": "Agents",
+  "pages": [
+    "1_setup",
+    "2_create_agent",
+    "3_local_model",
+    "4_agentic_rag",
+    "5_rag_and_tools",
+    "6_llamaparse",
+    "7_qdrant"
+  ]
+}
diff --git a/apps/next/src/content/docs/llamaindex/guide/meta.json b/apps/next/src/content/docs/llamaindex/guide/meta.json
index 801ed8d57b..b95c3a6110 100644
--- a/apps/next/src/content/docs/llamaindex/guide/meta.json
+++ b/apps/next/src/content/docs/llamaindex/guide/meta.json
@@ -1,5 +1,5 @@
 {
   "title": "Guide",
   "description": "See our guide",
-  "pages": ["workflow", "chat"]
+  "pages": ["workflow", "chat", "agents"]
 }
diff --git a/apps/next/src/content/docs/llamaindex/index.mdx b/apps/next/src/content/docs/llamaindex/index.mdx
index f77fa845a0..b673ccb392 100644
--- a/apps/next/src/content/docs/llamaindex/index.mdx
+++ b/apps/next/src/content/docs/llamaindex/index.mdx
@@ -1,35 +1,24 @@
 ---
-title: Getting Started with LlamaIndex.TS
-description: Install llamaindex by running a single command.
+title: What is LlamaIndex.TS
+description: LlamaIndex is the leading data framework for building LLM applications
 ---
 
-import { Tab, Tabs } from "fumadocs-ui/components/tabs";
+import {
+  SiNodedotjs,
+  SiDeno,
+  SiBun,
+  SiCloudflareworkers,
+} from "@icons-pack/react-simple-icons";
 
-<Tabs groupId="install" items={["npm", "yarn", "pnpm"]} persist>
-	```shell tab="npm"
-	npm install llamaindex
-	```
+LlamaIndex is a framework for building context-augmented generative AI applications with LLMs including agents and workflows.
 
-	```shell tab="yarn"
-	yarn add llamaindex
-	```
+The TypeScript implementation is designed for JavaScript server side applications using <SiNodedotjs className="inline" color="#5FA04E" /> Node.js, <SiDeno className="inline" color="#70FFAF" /> Deno, <SiBun className="inline" /> Bun, <SiCloudflareworkers className="inline" color="#F38020" /> Cloudflare Workers, and more.
 
-	```shell tab="pnpm"
-	pnpm add llamaindex
-	```
-</Tabs>
+LlamaIndex.TS provides tools for beginners, advanced users, and everyone in between.
 
-## What's next?
-
-<Cards>
-	<Card
-		title="I want to try LlamaIndex.TS"
-		description="Learn how to use LlamaIndex.TS with different JS runtime and frameworks."
-		href="/docs/llamaindex/setup/getting-started"
-	/>
-	<Card
-		title="Show me code examples"
-		description="Explore code examples using LlamaIndex.TS."
-		href="https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples?file=README.md"
-	/>
-</Cards>
+<iframe
+  className="w-full h-[440px]"
+  aria-label="LlamaIndex.TS Starter"
+  aria-description="This is a starter example for LlamaIndex.TS, it shows the basic usage of the library."
+  src="https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples?file=starter.ts"
+/>
diff --git a/apps/next/src/content/docs/llamaindex/meta.json b/apps/next/src/content/docs/llamaindex/meta.json
index 9d07337d40..8ab569a136 100644
--- a/apps/next/src/content/docs/llamaindex/meta.json
+++ b/apps/next/src/content/docs/llamaindex/meta.json
@@ -6,10 +6,12 @@
     "---Guide---",
     "what-is-llamaindex",
     "index",
-    "setup",
-    "starter",
+    "getting_started",
     "loading",
     "guide",
+    "examples",
+    "recipes",
+    "modules",
     "integration"
   ]
 }
diff --git a/apps/next/src/content/docs/llamaindex/modules/agent/index.mdx b/apps/next/src/content/docs/llamaindex/modules/agent/index.mdx
new file mode 100644
index 0000000000..de65109764
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/agent/index.mdx
@@ -0,0 +1,31 @@
+---
+title: Agents
+---
+
+An “agent” is an automated reasoning and decision engine. It takes in a user input/query and can make internal decisions for executing that query in order to return the correct result. The key agent components can include, but are not limited to:
+
+- Breaking down a complex question into smaller ones
+- Choosing an external Tool to use + coming up with parameters for calling the Tool
+- Planning out a set of tasks
+- Storing previously completed tasks in a memory module
+
+## Getting Started
+
+LlamaIndex.TS comes with a few built-in agents, but you can also create your own. The built-in agents include:
+
+- OpenAI Agent
+- Anthropic Agent both via Anthropic and Bedrock (in `@llamaIndex/community`)
+- Gemini Agent
+- ReACT Agent
+- Meta3.1 504B via Bedrock (in `@llamaIndex/community`)
+
+## Examples
+
+- [OpenAI Agent](/docs/llamaindex/examples/agent)
+- [Gemini Agent](/docs/llamaindex/examples/agent_gemini)
+
+## Api References
+
+- [OpenAIAgent](/docs/api/classes/OpenAIAgent)
+- [AnthropicAgent](/docs/api/classes/AnthropicAgent)
+- [ReActAgent](/docs/api/classes/ReActAgent)
diff --git a/apps/next/src/content/docs/llamaindex/modules/chat_engine.mdx b/apps/next/src/content/docs/llamaindex/modules/chat_engine.mdx
new file mode 100644
index 0000000000..d794963376
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/chat_engine.mdx
@@ -0,0 +1,28 @@
+---
+title: ChatEngine
+---
+
+The chat engine is a quick and simple way to chat with the data in your index.
+
+```typescript
+const retriever = index.asRetriever();
+const chatEngine = new ContextChatEngine({ retriever });
+
+// start chatting
+const response = await chatEngine.chat({ message: query });
+```
+
+The `chat` function also supports streaming, just add `stream: true` as an option:
+
+```typescript
+const stream = await chatEngine.chat({ message: query, stream: true });
+for await (const chunk of stream) {
+  process.stdout.write(chunk.response);
+}
+```
+
+## Api References
+
+- [ContextChatEngine](/docs/api/classes/ContextChatEngine)
+- [CondenseQuestionChatEngine](/docs/api/classes/ContextChatEngine)
+- [SimpleChatEngine](/docs/api/classes/SimpleChatEngine)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_index.mdx
new file mode 100644
index 0000000000..3c53d6f12d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_index.mdx
@@ -0,0 +1,23 @@
+---
+title: Index
+---
+
+An index is the basic container and organization for your data. LlamaIndex.TS supports two indexes:
+
+- `VectorStoreIndex` - will send the top-k `Node`s to the LLM when generating a response. The default top-k is 2.
+- `SummaryIndex` - will send every `Node` in the index to the LLM in order to generate a response
+- `KeywordTableIndex` extracts and provides keywords from `Node`s to the LLM
+
+```typescript
+import { Document, VectorStoreIndex } from "llamaindex";
+
+const document = new Document({ text: "test" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## API Reference
+
+- [SummaryIndex](/docs/api/classes/SummaryIndex)
+- [VectorStoreIndex](/docs/api/classes/VectorStoreIndex)
+- [KeywordTableIndex](/docs/api/classes/KeywordTableIndex)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/discord.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/discord.mdx
new file mode 100644
index 0000000000..52e9125f10
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/discord.mdx
@@ -0,0 +1,36 @@
+---
+title: DiscordReader
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/readers/src/discord";
+
+DiscordReader is a simple data loader that reads all messages in a given Discord channel and returns them as Document objects.
+It uses the [@discordjs/rest](https://github.com/discordjs/discord.js/tree/main/packages/rest) library to fetch the messages.
+
+## Usage
+
+First step is to create a Discord Application and generating a bot token [here](https://discord.com/developers/applications).
+In your Discord Application, go to the `OAuth2` tab and generate an invite URL by selecting `bot` and click `Read Messages/View Channels` as wells as `Read Message History`.
+This will invite the bot with the necessary permissions to read messages.
+Copy the URL in your browser and select the server you want your bot to join.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+### Params
+
+#### DiscordReader()
+
+- `discordToken?`: The Discord bot token.
+- `requestHandler?`: Optionally provide a custom request function for edge environments, e.g. `fetch`. See discord.js for more info.
+
+#### DiscordReader.loadData
+
+- `channelIDs`: The ID(s) of discord channels as an array of strings.
+- `limit?`: Optionally limit the number of messages to read
+- `additionalInfo?`: An optional flag to include embedded messages and attachment urls in the document.
+- `oldestFirst?`: An optional flag to return the oldest messages first.
+
+## API Reference
+
+- [DiscordReader](/docs/api/classes/DiscordReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx
new file mode 100644
index 0000000000..d5a4fd3a6a
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Loader
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/readers/src/simple-directory-reader";
+import CodeSource2 from "!raw-loader!../../../../../../../../examples/readers/src/custom-simple-directory-reader";
+
+Before you can start indexing your documents, you need to load them into memory.
+
+All "basic" data loaders can be seen below, mapped to their respective filetypes in `SimpleDirectoryReader`. More loaders are shown in the sidebar on the left.
+Additionally the following loaders exist without separate documentation:
+
+- `AssemblyAIReader` transcribes audio using [AssemblyAI](https://www.assemblyai.com/).
+  - [AudioTranscriptReader](/docs/api/classes/AudioTranscriptReader): loads entire transcript as a single document.
+  - [AudioTranscriptParagraphsReader](/docs/api/classes/AudioTranscriptParagraphsReader): creates a document per paragraph.
+  - [AudioTranscriptSentencesReader](/docs/api/classes/AudioTranscriptSentencesReader): creates a document per sentence.
+  - [AudioSubtitlesReader](/docs/api/classes/AudioTranscriptParagraphsReader): creates a document containing the subtitles of a transcript.
+- [NotionReader](/docs/api/classes/NotionReader) loads [Notion](https://www.notion.so/) pages.
+- [SimpleMongoReader](/docs/api/classes/SimpleMongoReader) loads data from a [MongoDB](https://www.mongodb.com/).
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## SimpleDirectoryReader
+
+[![Open in StackBlitz](https://developer.stackblitz.com/img/open_in_stackblitz.svg)](https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples/readers?file=src/simple-directory-reader.ts&title=Simple%20Directory%20Reader)
+
+LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class.
+
+It is a simple reader that reads all files from a directory and its subdirectories.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+Currently, the following readers are mapped to specific file types:
+
+- [TextFileReader](/docs/api/classes/TextFileReader): `.txt`
+- [PDFReader](/docs/api/classes/PDFReader): `.pdf`
+- [PapaCSVReader](/docs/api/classes/PapaCSVReader): `.csv`
+- [MarkdownReader](/docs/api/classes/MarkdownReader): `.md`
+- [DocxReader](/docs/api/classes/DocxReader): `.docx`
+- [HTMLReader](/docs/api/classes/HTMLReader): `.htm`, `.html`
+- [ImageReader](/docs/api/classes/ImageReader): `.jpg`, `.jpeg`, `.png`, `.gif`
+
+You can modify the reader three different ways:
+
+- `overrideReader` overrides the reader for all file types, including unsupported ones.
+- `fileExtToReader` maps a reader to a specific file type. Can override reader for existing file types or add support for new file types.
+- `defaultReader` sets a fallback reader for files with unsupported extensions. By default it is `TextFileReader`.
+
+SimpleDirectoryReader supports up to 9 concurrent requests. Use the `numWorkers` option to set the number of concurrent requests. By default it runs in sequential mode, i.e. set to 1.
+
+### Example
+
+<DynamicCodeBlock lang="ts" code={CodeSource2} />
+
+## API Reference
+
+- [SimpleDirectoryReader](/docs/api/classes/SimpleDirectoryReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/json.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/json.mdx
new file mode 100644
index 0000000000..cf69bf73af
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/json.mdx
@@ -0,0 +1,149 @@
+---
+title: JSONReader
+---
+
+A simple JSON data loader with various options.
+Either parses the entire string, cleaning it and treat each line as an embedding or performs a recursive depth-first traversal yielding JSON paths.
+Supports streaming of large JSON data using [@discoveryjs/json-ext](https://github.com/discoveryjs/json-ext)
+
+## Usage
+
+```ts
+import { JSONReader } from "llamaindex";
+
+const file = "../../PATH/TO/FILE";
+const content = new TextEncoder().encode("JSON_CONTENT");
+
+const reader = new JSONReader({ levelsBack: 0, collapseLength: 100 });
+const docsFromFile = reader.loadData(file);
+const docsFromContent = reader.loadDataAsContent(content);
+```
+
+### Options
+
+Basic:
+
+- `streamingThreshold?`: The threshold for using streaming mode in MB of the JSON Data. CEstimates characters by calculating bytes: `(streamingThreshold * 1024 * 1024) / 2` and comparing against `.length` of the JSON string. Set `undefined` to disable streaming or `0` to always use streaming. Default is `50` MB.
+
+- `ensureAscii?`: Wether to ensure only ASCII characters be present in the output by converting non-ASCII characters to their unicode escape sequence. Default is `false`.
+
+- `isJsonLines?`: Wether the JSON is in JSON Lines format. If true, will split into lines, remove empty one and parse each line as JSON. Note: Uses a custom streaming parser, most likely less robust than json-ext. Default is `false`
+
+- `cleanJson?`: Whether to clean the JSON by filtering out structural characters (`{}, [], and ,`). If set to false, it will just parse the JSON, not removing structural characters. Default is `true`.
+
+- `logger?`: A placeholder for a custom logger function.
+
+Depth-First-Traversal:
+
+- `levelsBack?`: Specifies how many levels up the JSON structure to include in the output. `cleanJson` will be ignored. If set to 0, all levels are included. If undefined, parses the entire JSON, treat each line as an embedding and create a document per top-level array. Default is `undefined`
+
+- `collapseLength?`: The maximum length of JSON string representation to be collapsed into a single line. Only applicable when `levelsBack` is set. Default is `undefined`
+
+#### Examples
+
+Input:
+
+```json
+{"a": {"1": {"key1": "value1"}, "2": {"key2": "value2"}}, "b": {"3": {"k3": "v3"}, "4": {"k4": "v4"}}}
+```
+
+Default options:
+
+`LevelsBack` = `undefined` & `cleanJson` = `true`
+
+Output:
+
+```json
+"a": {
+"1": {
+"key1": "value1"
+"2": {
+"key2": "value2"
+"b": {
+"3": {
+"k3": "v3"
+"4": {
+"k4": "v4"
+```
+
+Depth-First Traversal all levels:
+
+`levelsBack` = `0`
+
+Output:
+
+```json
+a 1 key1 value1
+a 2 key2 value2
+b 3 k3 v3
+b 4 k4 v4
+```
+
+Depth-First Traversal and Collapse:
+
+`levelsBack` = `0` & `collapseLength` = `35`
+
+Output:
+
+```json
+a 1 {"key1":"value1"}
+a 2 {"key2":"value2"}
+b {"3":{"k3":"v3"},"4":{"k4":"v4"}}
+```
+
+Depth-First Traversal limited levels:
+
+`levelsBack` = `2`
+
+Output:
+
+```json
+1 key1 value1
+2 key2 value2
+3 k3 v3
+4 k4 v4
+```
+
+Uncleaned JSON:
+
+`levelsBack` = `undefined` & `cleanJson` = `false`
+
+Output:
+
+```json
+{"a":{"1":{"key1":"value1"},"2":{"key2":"value2"}},"b":{"3":{"k3":"v3"},"4":{"k4":"v4"}}}
+```
+
+ASCII-Conversion:
+
+Input:
+
+```json
+{ "message": "こんにちは世界" }
+```
+
+Output:
+
+```json
+"message": "\u3053\u3093\u306b\u3061\u306f\u4e16\u754c"
+```
+
+JSON Lines Format:
+
+Input:
+
+```json
+{"tweet": "Hello world"}\n{"tweet": "こんにちは世界"}
+```
+
+Output:
+
+```json
+"tweet": "Hello world"
+
+"tweet": "こんにちは世界"
+```
+
+## API Reference
+
+- [JSONReader](/docs/api/classes/JSONReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/images.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/images.mdx
new file mode 100644
index 0000000000..b8e099e180
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/images.mdx
@@ -0,0 +1,115 @@
+---
+title: Image Retrieval
+---
+
+LlamaParse `json` mode supports extracting any images found in a page object by using the `getImages` function. They are downloaded to a local folder and can then be sent to a multimodal LLM for further processing.
+
+## Usage
+
+We use the `getImages` method to input our array of JSON objects, download the images to a specified folder and get a list of ImageNodes.
+
+```ts
+const reader = new LlamaParseReader();
+const jsonObjs = await reader.loadJson("../data/uber_10q_march_2022.pdf");
+const imageDicts = await reader.getImages(jsonObjs, "images");
+```
+
+### Multimodal Indexing
+
+You can create an index across both text and image nodes by requesting alternative text for the image from a multimodal LLM.
+
+```ts
+import {
+  Document,
+  ImageNode,
+  LlamaParseReader,
+  OpenAI,
+  VectorStoreIndex,
+} from "llamaindex";
+import { createMessageContent } from "llamaindex/synthesizers/utils";
+
+const reader = new LlamaParseReader();
+async function main() {
+  // Load PDF using LlamaParse JSON mode and return an array of json objects
+  const jsonObjs = await reader.loadJson("../data/uber_10q_march_2022.pdf");
+  // Access the first "pages" (=a single parsed file) object in the array
+  const jsonList = jsonObjs[0]["pages"];
+
+  const textDocs = getTextDocs(jsonList);
+  const imageTextDocs = await getImageTextDocs(jsonObjs);
+  const documents = [...textDocs, ...imageTextDocs];
+  // Split text, create embeddings and query the index
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query({
+    query:
+      "What does the bar graph titled 'Monthly Active Platform Consumers' show?",
+  });
+
+  console.log(response.toString());
+}
+
+main().catch(console.error);
+```
+
+We use two helper functions to create documents from the text and image nodes provided.
+
+#### Text Documents
+
+To create documents from the text nodes of the json object, we just map the needed values to a new `Document` object. In this case we assign the text as text and the page number as metadata.
+
+```ts
+function getTextDocs(jsonList: { text: string; page: number }[]): Document[] {
+  return jsonList.map(
+    (page) => new Document({ text: page.text, metadata: { page: page.page } }),
+  );
+}
+```
+
+#### Image Documents
+
+To create documents from the images, we need to use a multimodal LLM to generate alt text.
+
+For this we create `ImageNodes` and add them as part of our message.
+
+We can use the `createMessageContent` function to simplify this.
+
+```ts
+async function getImageTextDocs(
+  jsonObjs: Record<string, any>[],
+): Promise<Document[]> {
+  const llm = new OpenAI({
+    model: "gpt-4o",
+    temperature: 0.2,
+    maxTokens: 1000,
+  });
+  const imageDicts = await reader.getImages(jsonObjs, "images");
+  const imageDocs = [];
+
+  for (const imageDict of imageDicts) {
+    const imageDoc = new ImageNode({ image: imageDict.path });
+    const prompt = () => `Describe the image as alt text`;
+    const message = await createMessageContent(prompt, [imageDoc]);
+
+    const response = await llm.complete({
+      prompt: message,
+    });
+
+    const doc = new Document({
+      text: response.text,
+      metadata: { path: imageDict.path },
+    });
+    imageDocs.push(doc);
+  }
+
+  return imageDocs;
+}
+```
+
+The returned `imageDocs` have the alt text assigned as text and the image path as metadata.
+
+You can see the full example file [here](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/readers/src/llamaparse-json.ts).
+
+## API Reference
+
+- [LlamaParseReader](/docs/api/classes/LlamaParseReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/index.mdx
new file mode 100644
index 0000000000..23c0f193f0
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/index.mdx
@@ -0,0 +1,68 @@
+---
+title: LlamaParse
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../../examples/readers/src/llamaparse";
+import CodeSource2 from "!raw-loader!../../../../../../../../../examples/readers/src/simple-directory-reader-with-llamaparse.ts";
+
+LlamaParse is an API created by LlamaIndex to efficiently parse files, e.g. it's great at converting PDF tables into markdown.
+
+To use it, first login and get an API key from https://cloud.llamaindex.ai. Make sure to store the key as `apiKey` parameter or in the environment variable `LLAMA_CLOUD_API_KEY`.
+
+Official documentation for LlamaParse can be found [here](https://docs.cloud.llamaindex.ai/).
+
+## Usage
+
+You can then use the `LlamaParseReader` class to load local files and convert them into a parsed document that can be used by LlamaIndex.
+See [reader.ts](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/cloud/src/reader.ts) for a list of supported file types:
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+### Params
+
+All options can be set with the `LlamaParseReader` constructor.
+
+They can be divided into two groups.
+
+#### General params:
+
+- `apiKey` is required. Can be set as an environment variable `LLAMA_CLOUD_API_KEY`
+- `checkInterval` is the interval in seconds to check if the parsing is done. Default is `1`.
+- `maxTimeout` is the maximum timeout to wait for parsing to finish. Default is `2000`
+- `verbose` shows progress of the parsing. Default is `true`
+- `ignoreErrors` set to false to get errors while parsing. Default is `true` and returns an empty array on error.
+
+#### Advanced params:
+
+- `resultType` can be set to `markdown`, `text` or `json`. Defaults to `text`. More information about `json` mode on the next pages.
+- `language` primarily helps with OCR recognition. Defaults to `en`. Click [here](/docs/api/type-aliases/Language) for a list of supported languages.
+- `parsingInstructions?` Optional. Can help with complicated document structures. See this [LlamaIndex Blog Post](https://www.llamaindex.ai/blog/launching-the-first-genai-native-document-parsing-platform) for an example.
+- `skipDiagonalText?` Optional. Set to true to ignore diagonal text. (Text that is not rotated 0, 90, 180 or 270 degrees)
+- `invalidateCache?` Optional. Set to true to ignore the LlamaCloud cache. All document are kept in cache for 48hours after the job was completed to avoid processing the same document twice. Can be useful for testing when trying to re-parse the same document with, e.g. different `parsingInstructions`.
+- `doNotCache?` Optional. Set to true to not cache the document.
+- `fastMode?` Optional. Set to true to use the fast mode. This mode will skip OCR of images, and table/heading reconstruction. Note: Non-compatible with `gpt4oMode`.
+- `doNotUnrollColumns?` Optional. Set to true to keep the text according to document layout. Reduce reconstruction accuracy, and LLMs/embeddings performances in most cases.
+- `pageSeparator?` Optional. A templated page separator to use to split the text. If the results contain `{page_number}` (e.g. JSON mode), it will be replaced by the next page number. If not set the default separator `\\n---\\n` will be used.
+- `pagePrefix?` Optional. A templated prefix to add to the beginning of each page. If the results contain `{page_number}`, it will be replaced by the page number.
+- `pageSuffix?` Optional. A templated suffix to add to the end of each page. If the results contain `{page_number}`, it will be replaced by the page number.
+- `gpt4oMode` Deprecated. Use vendorMultimodal params. Set to true to use GPT-4o to extract content. Default is `false`.
+- `gpt4oApiKey?` Deprecated. Use vendorMultimodal params. Optional. Set the GPT-4o API key. Lowers the cost of parsing by using your own API key. Your OpenAI account will be charged. Can also be set in the environment variable `LLAMA_CLOUD_GPT4O_API_KEY`.
+- `boundingBox?` Optional. Specify an area of the document to parse. Expects the bounding box margins as a string in clockwise order, e.g. `boundingBox = "0.1,0,0,0"` to not parse the top 10% of the document.
+- `targetPages?` Optional. Specify which pages to parse by specifying them as a comma-separated list. First page is `0`.
+- `splitByPage` Wether to split the results, creating one document per page. Uses the set `pageSeparator` or `\n---\n` as fallback. Default is true.
+- `useVendorMultimodalModel` set to true to use a multimodal model. Default is `false`.
+- `vendorMultimodalModel?` Optional. Specify which multimodal model to use. Default is GPT4o. See [here](https://docs.cloud.llamaindex.ai/llamaparse/features/multimodal) for a list of available models and cost.
+- `vendorMultimodalApiKey?` Optional. Set the multimodal model API key. Can also be set in the environment variable `LLAMA_CLOUD_VENDOR_MULTIMODAL_API_KEY`.
+- `numWorkers` as in the python version, is set in `SimpleDirectoryReader`. Default is 1.
+
+### LlamaParse with SimpleDirectoryReader
+
+Below a full example of `LlamaParse` integrated in `SimpleDirectoryReader` with additional options.
+
+<DynamicCodeBlock lang="ts" code={CodeSource2} />
+
+## API Reference
+
+- [SimpleDirectoryReader](/docs/api/classes/SimpleDirectoryReader)
+- [LlamaParseReader](/docs/api/classes/LlamaParseReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/json_mode.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/json_mode.mdx
new file mode 100644
index 0000000000..537d5cba43
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/llama_parse/json_mode.mdx
@@ -0,0 +1,93 @@
+---
+title: JSON Mode
+---
+
+In JSON mode, LlamaParse will return a data structure representing the parsed object.
+
+## Usage
+
+For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method.
+More information about indexing the results on the next page.
+
+```ts
+const reader = new LlamaParseReader();
+async function main() {
+  // Load the file and return an array of json objects
+  const jsonObjs = await reader.loadJson("../data/uber_10q_march_2022.pdf");
+  // Access the first "pages" (=a single parsed file) object in the array
+  const jsonList = jsonObjs[0]["pages"];
+  // Further process the jsonList object as needed.
+}
+```
+
+### Output
+
+The result format of the response, written to `jsonObjs` in the example, follows this structure:
+
+```json
+{
+    "pages": [
+        ..page objects..
+    ],
+    "job_metadata": {
+        "credits_used": int,
+        "credits_max": int,
+        "job_credits_usage": int,
+        "job_pages": int,
+        "job_is_cache_hit": boolean
+    },
+    "job_id": string ,
+    "file_path": string,
+    }
+}
+```
+
+#### Page objects
+
+Within page objects, the following keys may be present depending on your document.
+
+- `page`: The page number of the document.
+- `text`: The text extracted from the page.
+- `md`: The markdown version of the extracted text.
+- `images`: Any images extracted from the page.
+- `items`: An array of heading, text and table objects in the order they appear on the page.
+
+### JSON Mode with SimpleDirectoryReader
+
+All Readers share a `loadData` method with `SimpleDirectoryReader` that promises to return a uniform Document with Metadata. This makes JSON mode incompatible with SimpleDirectoryReader.
+
+However, a simple work around is to create a new reader class that extends `LlamaParseReader` and adds a new method or overrides `loadData`, wrapping around JSON mode, extracting the required values, and returning a Document object.
+
+```ts
+import { LlamaParseReader, Document } from "llamaindex";
+
+class LlamaParseReaderWithJson extends LlamaParseReader {
+  // Override the loadData method
+  override async loadData(filePath: string): Promise<Document[]> {
+    // Call loadJson method that was inherited by LlamaParseReader
+    const jsonObjs = await super.loadJson(filePath);
+    let documents: Document[] = [];
+
+    jsonObjs.forEach((jsonObj) => {
+      // Making sure it's an array before iterating over it
+      if (Array.isArray(jsonObj.pages)) {
+      }
+      const docs = jsonObj.pages.map(
+        (page: { text: string; page: number }) =>
+          new Document({ text: page.text, metadata: { page: page.page } }),
+      );
+      documents = documents.concat(docs);
+    });
+    return documents;
+  }
+}
+```
+
+Now we have documents with page number as metadata. This new reader can be used like any other and be integrated with SimpleDirectoryReader. Since it extends `LlamaParseReader`, you can use the same params.
+
+You can assign any other values of the JSON response to the Document as needed.
+
+## API Reference
+
+- [LlamaParseReader](/docs/api/classes/LlamaParseReader)
+- [SimpleDirectoryReader](/docs/api/classes/SimpleDirectoryReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx
new file mode 100644
index 0000000000..7e15a4d788
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx
@@ -0,0 +1,16 @@
+---
+title: Chat Stores
+---
+
+Chat stores manage chat history by storing sequences of messages in a structured way, ensuring the order of messages is maintained for accurate conversation flow.
+
+## Available Chat Stores
+
+- [SimpleChatStore](/docs/api/classes/SimpleChatStore): A simple in-memory chat store with support for [persisting](/docs/llamaindex/modules/data_stores/#local-storage) data to disk.
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [BaseChatStore](/docs/api/interfaces/BaseChatStore)
+
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/doc_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/doc_stores/index.mdx
new file mode 100644
index 0000000000..9546efd3f7
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/doc_stores/index.mdx
@@ -0,0 +1,16 @@
+---
+title: Document Stores
+---
+
+Document stores contain ingested document chunks, i.e. [Node](/docs/llamaindex/modules/documents_and_nodes/index)s.
+
+## Available Document Stores
+
+- [SimpleDocumentStore](/docs/api/classes/SimpleDocumentStore): A simple in-memory document store with support for [persisting](/docs/llamaindex/modules/data_stores/#local-storage) data to disk.
+- [PostgresDocumentStore](/docs/api/classes/PostgresDocumentStore): A PostgreSQL document store, see [PostgreSQL Storage](/docs/llamaindex/modules/data_stores/#postgresql-storage).
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [BaseDocumentStore](/docs/api/classes/BaseDocumentStore)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/index.mdx
new file mode 100644
index 0000000000..4282f5cdb4
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Storage
+---
+
+Storage in LlamaIndex.TS works automatically once you've configured a
+`StorageContext` object.
+
+## Local Storage
+
+You can configure the `persistDir` and attach it to an index.
+
+```typescript
+import {
+  Document,
+  VectorStoreIndex,
+  storageContextFromDefaults,
+} from "llamaindex";
+
+const storageContext = await storageContextFromDefaults({
+  persistDir: "./storage",
+});
+
+const document = new Document({ text: "Test Text" });
+const index = await VectorStoreIndex.fromDocuments([document], {
+  storageContext,
+});
+```
+
+## PostgreSQL Storage
+
+You can configure the `schemaName`, `tableName`, `namespace`, and
+`connectionString`. If a `connectionString` is not
+provided, it will use the environment variables `PGHOST`, `PGUSER`,
+`PGPASSWORD`, `PGDATABASE` and `PGPORT`.
+
+```typescript
+import {
+  Document,
+  VectorStoreIndex,
+  PostgresDocumentStore,
+  PostgresIndexStore,
+  storageContextFromDefaults,
+} from "llamaindex";
+
+const storageContext = await storageContextFromDefaults({
+  docStore: new PostgresDocumentStore(),
+  indexStore: new PostgresIndexStore(),
+});
+
+const document = new Document({ text: "Test Text" });
+const index = await VectorStoreIndex.fromDocuments([document], {
+  storageContext,
+});
+```
+
+## API Reference
+
+- [StorageContext](/docs/api/interfaces/StorageContext)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/index_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/index_stores/index.mdx
new file mode 100644
index 0000000000..19b063c28a
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/index_stores/index.mdx
@@ -0,0 +1,16 @@
+---
+title: Index Stores
+---
+
+Index stores are underlying storage components that contain metadata(i.e. information created when indexing) about the [index](/docs/llamaindex/modules/data_index) itself.
+
+## Available Index Stores
+
+- [SimpleIndexStore](/docs/api/classes/SimpleIndexStore): A simple in-memory index store with support for [persisting](/docs/llamaindex/modules/data_stores/#local-storage) data to disk.
+- [PostgresIndexStore](/docs/api/classes/PostgresIndexStore): A PostgreSQL index store, , see [PostgreSQL Storage](/docs/llamaindex/modules/data_stores/#postgresql-storage).
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [BaseIndexStore](/docs/api/classes/BaseIndexStore)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/kv_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/kv_stores/index.mdx
new file mode 100644
index 0000000000..8ba76fe481
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/kv_stores/index.mdx
@@ -0,0 +1,16 @@
+---
+title: Key-Value Stores
+---
+
+Key-Value Stores represent underlying storage components used in [Document Stores](/docs/llamaindex/modules/data_stores/doc_stores/index) and [Index Stores](/docs/llamaindex/modules/data_stores/index_stores/index)
+
+## Available Key-Value Stores
+
+- [SimpleKVStore](/docs/api/classes/SimpleKVStore): A simple Key-Value store with support of [persisting](/docs/llamaindex/modules/data_stores/#local-storage) data to disk.
+- [PostgresKVStore](/docs/api/classes/PostgresKVStore): A PostgreSQL Key-Value store, see [PostgreSQL Storage](/docs/llamaindex/modules/data_stores/#postgresql-storage).
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [BaseKVStore](/docs/api/classes/BaseKVStore)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/vector_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/vector_stores/index.mdx
new file mode 100644
index 0000000000..af1453efef
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/vector_stores/index.mdx
@@ -0,0 +1,24 @@
+---
+title: Vector Stores
+---
+
+Vector stores save embedding vectors of your ingested document chunks.
+
+## Available Vector Stores
+
+Available Vector Stores are shown on the sidebar to the left. Additionally the following integrations exist without separate documentation:
+
+- [SimpleVectorStore](/docs/api/classes/SimpleVectorStore): A simple in-memory vector store with optional [persistance](/docs/llamaindex/modules/data_stores/#local-storage) to disk.
+- [AstraDBVectorStore](/docs/api/classes/AstraDBVectorStore): A cloud-native, scalable Database-as-a-Service built on Apache Cassandra, see [datastax.com](https://www.datastax.com/products/datastax-astra)
+- [ChromaVectorStore](/docs/api/classes/ChromaVectorStore): An open-source vector database, focused on ease of use and performance, see [trychroma.com](https://www.trychroma.com/)
+- [MilvusVectorStore](/docs/api/classes/MilvusVectorStore): An open-source, high-performance, highly scalable vector database, see [milvus.io](https://milvus.io/)
+- [MongoDBAtlasVectorSearch](/docs/api/classes/MongoDBAtlasVectorSearch): A cloud-based vector search solution for MongoDB, see [mongodb.com](https://www.mongodb.com/products/platform/atlas-vector-search)
+- [PGVectorStore](/docs/api/classes/PGVectorStore): An open-source vector store built on PostgreSQL, see [pgvector Github](https://github.com/pgvector/pgvector)
+- [PineconeVectorStore](/docs/api/classes/PineconeVectorStore): A managed, cloud-native vector database, see [pinecone.io](https://www.pinecone.io/)
+- [WeaviateVectorStore](/docs/api/classes/WeaviateVectorStore): An open-source, ai-native vector database, see [weaviate.io](https://weaviate.io/)
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [VectorStoreBase](/docs/api/classes/VectorStoreBase)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/vector_stores/qdrant.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/vector_stores/qdrant.mdx
new file mode 100644
index 0000000000..c5a363340c
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/vector_stores/qdrant.mdx
@@ -0,0 +1,94 @@
+---
+title: Qdrant Vector Store
+---
+
+[qdrant.tech](https://qdrant.tech/)
+
+To run this example, you need to have a Qdrant instance running. You can run it with Docker:
+
+```bash
+docker pull qdrant/qdrant
+docker run -p 6333:6333 qdrant/qdrant
+```
+
+## Importing the modules
+
+```ts
+import fs from "node:fs/promises";
+import { Document, VectorStoreIndex, QdrantVectorStore } from "llamaindex";
+```
+
+## Load the documents
+
+```ts
+const path = "node_modules/llamaindex/examples/abramov.txt";
+const essay = await fs.readFile(path, "utf-8");
+```
+
+## Setup Qdrant
+
+```ts
+const vectorStore = new QdrantVectorStore({
+  url: "http://localhost:6333",
+});
+```
+
+## Setup the index
+
+```ts
+const document = new Document({ text: essay, id_: path });
+
+const index = await VectorStoreIndex.fromDocuments([document], {
+  vectorStore,
+});
+```
+
+## Query the index
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const response = await queryEngine.query({
+  query: "What did the author do in college?",
+});
+
+// Output response
+console.log(response.toString());
+```
+
+## Full code
+
+```ts
+import fs from "node:fs/promises";
+import { Document, VectorStoreIndex, QdrantVectorStore } from "llamaindex";
+
+async function main() {
+  const path = "node_modules/llamaindex/examples/abramov.txt";
+  const essay = await fs.readFile(path, "utf-8");
+
+  const vectorStore = new QdrantVectorStore({
+    url: "http://localhost:6333",
+  });
+
+  const document = new Document({ text: essay, id_: path });
+
+  const index = await VectorStoreIndex.fromDocuments([document], {
+    vectorStore,
+  });
+
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query({
+    query: "What did the author do in college?",
+  });
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
+```
+
+## API Reference
+
+- [QdrantVectorStore](/docs/api/classes/QdrantVectorStore)
diff --git a/apps/next/src/content/docs/llamaindex/modules/documents_and_nodes/index.mdx b/apps/next/src/content/docs/llamaindex/modules/documents_and_nodes/index.mdx
new file mode 100644
index 0000000000..6b5385fe01
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/documents_and_nodes/index.mdx
@@ -0,0 +1,16 @@
+---
+title: Documents and Nodes
+---
+
+`Document`s and `Node`s are the basic building blocks of any index. While the API for these objects is similar, `Document` objects represent entire files, while `Node`s are smaller pieces of that original document, that are suitable for an LLM and Q&A.
+
+```typescript
+import { Document } from "llamaindex";
+
+document = new Document({ text: "text", metadata: { key: "val" } });
+```
+
+## API Reference
+
+- [Document](/docs/api/classes/Document)
+- [TextNode](/docs/api/classes/TextNode)
diff --git a/apps/next/src/content/docs/llamaindex/modules/documents_and_nodes/metadata_extraction.mdx b/apps/next/src/content/docs/llamaindex/modules/documents_and_nodes/metadata_extraction.mdx
new file mode 100644
index 0000000000..fbadb14a0f
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/documents_and_nodes/metadata_extraction.mdx
@@ -0,0 +1,54 @@
+---
+title: Metadata Extraction Usage Pattern
+---
+
+You can use LLMs to automate metadata extraction with our `Metadata Extractor` modules.
+
+Our metadata extractor modules include the following "feature extractors":
+
+- `SummaryExtractor` - automatically extracts a summary over a set of Nodes
+- `QuestionsAnsweredExtractor` - extracts a set of questions that each Node can answer
+- `TitleExtractor` - extracts a title over the context of each Node by document and combine them
+- `KeywordExtractor` - extracts keywords over the context of each Node
+
+Then you can chain the `Metadata Extractors` with the `IngestionPipeline` to extract metadata from a set of documents.
+
+```ts
+import {
+  IngestionPipeline,
+  TitleExtractor,
+  QuestionsAnsweredExtractor,
+  Document,
+  OpenAI,
+} from "llamaindex";
+
+async function main() {
+  const pipeline = new IngestionPipeline({
+    transformations: [
+      new TitleExtractor(),
+      new QuestionsAnsweredExtractor({
+        questions: 5,
+      }),
+    ],
+  });
+
+  const nodes = await pipeline.run({
+    documents: [
+      new Document({ text: "I am 10 years old. John is 20 years old." }),
+    ],
+  });
+
+  for (const node of nodes) {
+    console.log(node.metadata);
+  }
+}
+
+main().then(() => console.log("done"));
+```
+
+## API Reference
+
+- [SummaryExtractor](/docs/api/classes/SummaryExtractor)
+- [QuestionsAnsweredExtractor](/docs/api/classes/QuestionsAnsweredExtractor)
+- [TitleExtractor](/docs/api/classes/TitleExtractor)
+- [KeywordExtractor](/docs/api/classes/KeywordExtractor)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/deepinfra.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/deepinfra.mdx
new file mode 100644
index 0000000000..3cfd6c369d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/deepinfra.mdx
@@ -0,0 +1,85 @@
+---
+title: DeepInfra
+---
+
+To use DeepInfra embeddings, you need to import `DeepInfraEmbedding` from llamaindex.
+Check out available embedding models [here](https://deepinfra.com/models/embeddings).
+
+```ts
+import {
+  DeepInfraEmbedding,
+  Settings,
+  Document,
+  VectorStoreIndex,
+} from "llamaindex";
+
+// Update Embed Model
+Settings.embedModel = new DeepInfraEmbedding();
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+By default, DeepInfraEmbedding is using the sentence-transformers/clip-ViT-B-32 model. You can change the model by passing the model parameter to the constructor.
+For example:
+
+```ts
+import { DeepInfraEmbedding } from "llamaindex";
+
+const model = "intfloat/e5-large-v2";
+Settings.embedModel = new DeepInfraEmbedding({
+  model,
+});
+```
+
+You can also set the `maxRetries` and `timeout` parameters when initializing `DeepInfraEmbedding` for better control over the request behavior.
+
+For example:
+
+```ts
+import { DeepInfraEmbedding, Settings } from "llamaindex";
+
+const model = "intfloat/e5-large-v2";
+const maxRetries = 5;
+const timeout = 5000; // 5 seconds
+
+Settings.embedModel = new DeepInfraEmbedding({
+  model,
+  maxRetries,
+  timeout,
+});
+```
+
+Standalone usage:
+
+```ts
+import { DeepInfraEmbedding } from "llamaindex";
+import { config } from "dotenv";
+// For standalone usage, you need to configure DEEPINFRA_API_TOKEN in .env file
+config();
+
+const main = async () => {
+  const model = "intfloat/e5-large-v2";
+  const embeddings = new DeepInfraEmbedding({ model });
+  const text = "What is the meaning of life?";
+  const response = await embeddings.embed([text]);
+  console.log(response);
+};
+
+main();
+```
+
+For questions or feedback, please contact us at [feedback@deepinfra.com](mailto:feedback@deepinfra.com)
+
+## API Reference
+
+- [DeepInfraEmbedding](/docs/api/classes/DeepInfraEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/gemini.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/gemini.mdx
new file mode 100644
index 0000000000..074482cc46
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/gemini.mdx
@@ -0,0 +1,39 @@
+---
+title: Gemini
+---
+
+To use Gemini embeddings, you need to import `GeminiEmbedding` from `llamaindex`.
+
+```ts
+import { GeminiEmbedding, Settings } from "llamaindex";
+
+// Update Embed Model
+Settings.embedModel = new GeminiEmbedding();
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+Per default, `GeminiEmbedding` is using the `gemini-pro` model. You can change the model by passing the `model` parameter to the constructor.
+For example:
+
+```ts
+import { GEMINI_MODEL, GeminiEmbedding } from "llamaindex";
+
+Settings.embedModel = new GeminiEmbedding({
+  model: GEMINI_MODEL.GEMINI_PRO_LATEST,
+});
+```
+
+## API Reference
+
+- [GeminiEmbedding](/docs/api/classes/GeminiEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/huggingface.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/huggingface.mdx
new file mode 100644
index 0000000000..7b37de3a68
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/huggingface.mdx
@@ -0,0 +1,40 @@
+---
+title: HuggingFace
+---
+
+To use HuggingFace embeddings, you need to import `HuggingFaceEmbedding` from `llamaindex`.
+
+```ts
+import { HuggingFaceEmbedding, Settings } from "llamaindex";
+
+// Update Embed Model
+Settings.embedModel = new HuggingFaceEmbedding();
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+Per default, `HuggingFaceEmbedding` is using the `Xenova/all-MiniLM-L6-v2` model. You can change the model by passing the `modelType` parameter to the constructor.
+If you're not using a quantized model, set the `quantized` parameter to `false`.
+
+For example, to use the not quantized `BAAI/bge-small-en-v1.5` model, you can use the following code:
+
+```ts
+Settings.embedModel = new HuggingFaceEmbedding({
+  modelType: "BAAI/bge-small-en-v1.5",
+  quantized: false,
+});
+```
+
+## API Reference
+
+- [HuggingFaceEmbedding](/docs/api/classes/HuggingFaceEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx
new file mode 100644
index 0000000000..6a308ec0d6
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/jinaai.mdx
@@ -0,0 +1,27 @@
+---
+title: Jina AI
+---
+
+To use Jina AI embeddings, you need to import `JinaAIEmbedding` from `llamaindex`.
+
+```ts
+import { JinaAIEmbedding, Settings } from "llamaindex";
+
+Settings.embedModel = new JinaAIEmbedding();
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## API Reference
+
+- [JinaAIEmbedding](/docs/api/classes/JinaAIEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/mistral.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/mistral.mdx
new file mode 100644
index 0000000000..b7722640e0
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/mistral.mdx
@@ -0,0 +1,30 @@
+---
+title: MistralAI
+---
+
+To use MistralAI embeddings, you need to import `MistralAIEmbedding` from `llamaindex`.
+
+```ts
+import { MistralAIEmbedding, Settings } from "llamaindex";
+
+// Update Embed Model
+Settings.embedModel = new MistralAIEmbedding({
+  apiKey: "<YOUR_API_KEY>",
+});
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## API Reference
+
+- [MistralAIEmbedding](/docs/api/classes/MistralAIEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/mixedbreadai.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/mixedbreadai.mdx
new file mode 100644
index 0000000000..f8395b58fb
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/mixedbreadai.mdx
@@ -0,0 +1,106 @@
+---
+title: MixedbreadAI
+---
+
+Welcome to the mixedbread embeddings guide! This guide will help you use the mixedbread ai's API to generate embeddings for your text documents, ensuring you get the most relevant information, just like picking the freshest bread from the bakery.
+
+To find out more about the latest features, updates, and available models, visit [mixedbread.ai](https://mixedbread-ai.com/).
+
+## Table of Contents
+
+1. [Setup](#setup)
+2. [Usage with LlamaIndex](#usage-with-llamaindex)
+3. [Embeddings with Custom Parameters](#embeddings-with-custom-parameters)
+
+## Setup
+
+First, you will need to install the `llamaindex` package.
+
+```bash
+pnpm install llamaindex
+```
+
+Next, sign up for an API key at [mixedbread.ai](https://mixedbread.ai/). Once you have your API key, you can import the necessary modules and create a new instance of the `MixedbreadAIEmbeddings` class.
+
+```ts
+import { MixedbreadAIEmbeddings, Document, Settings } from "llamaindex";
+```
+
+## Usage with LlamaIndex
+
+This section will guide you through integrating mixedbread embeddings with LlamaIndex for more advanced usage.
+
+### Step 1: Load and Index Documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index, like a variety of breads in a bakery.
+
+```ts
+Settings.embedModel = new MixedbreadAIEmbeddings({
+  apiKey: "<MIXEDBREAD_API_KEY>",
+  model: "mixedbread-ai/mxbai-embed-large-v1",
+});
+
+const document = new Document({
+  text: "The true source of happiness.",
+  id_: "bread",
+});
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+### Step 2: Create a Query Engine
+
+Combine the retriever and the embed model to create a query engine. This setup ensures that your queries are processed to provide the best results, like arranging the bread in the order of freshness and quality.
+
+Models can require prompts to generate embeddings for queries, in the 'mixedbread-ai/mxbai-embed-large-v1' model's case, the prompt is `Represent this sentence for searching relevant passages:`.
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query =
+  "Represent this sentence for searching relevant passages: What is bread?";
+
+// Log the response
+const results = await queryEngine.query(query);
+console.log(results); // Serving up the freshest, most relevant results.
+```
+
+## Embeddings with Custom Parameters
+
+This section will guide you through generating embeddings with custom parameters and usage with f.e. matryoshka and binary embeddings.
+
+### Step 1: Create an Instance of MixedbreadAIEmbeddings
+
+Create a new instance of the `MixedbreadAIEmbeddings` class with custom parameters. For example, to use the `mixedbread-ai/mxbai-embed-large-v1` model with a batch size of 64, normalized embeddings, and binary encoding format:
+
+```ts
+const embeddings = new MixedbreadAIEmbeddings({
+  apiKey: "<MIXEDBREAD_API_KEY>",
+  model: "mixedbread-ai/mxbai-embed-large-v1",
+  batchSize: 64,
+  normalized: true,
+  dimensions: 512,
+  encodingFormat: MixedbreadAI.EncodingFormat.Binary,
+});
+```
+
+### Step 2: Define Texts
+
+Define the texts you want to generate embeddings for.
+
+```ts
+const texts = ["Bread is life", "Bread is love"];
+```
+
+### Step 3: Generate Embeddings
+
+Use the `embedDocuments` method to generate embeddings for the texts.
+
+```ts
+const result = await embeddings.embedDocuments(texts);
+console.log(result); // Perfectly customized embeddings, ready to serve.
+```
+
+## API Reference
+
+- [MixedbreadAIEmbeddings](/docs/api/classes/MixedbreadAIEmbeddings)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/ollama.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/ollama.mdx
new file mode 100644
index 0000000000..b7e7eeb916
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/ollama.mdx
@@ -0,0 +1,35 @@
+---
+title: Ollama
+---
+
+To use Ollama embeddings, you need to import `OllamaEmbedding` from `llamaindex`.
+
+Note that you need to pull the embedding model first before using it.
+
+In the example below, we're using the [`nomic-embed-text`](https://ollama.com/library/nomic-embed-text) model, so you have to call:
+
+```shell
+ollama pull nomic-embed-text
+```
+
+```ts
+import { OllamaEmbedding, Settings } from "llamaindex";
+
+Settings.embedModel = new OllamaEmbedding({ model: "nomic-embed-text" });
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## API Reference
+
+- [OllamaEmbedding](/docs/api/classes/OllamaEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/openai.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/openai.mdx
new file mode 100644
index 0000000000..cf1ef52bad
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/openai.mdx
@@ -0,0 +1,27 @@
+---
+title: OpenAI
+---
+
+To use OpenAI embeddings, you need to import `OpenAIEmbedding` from `llamaindex`.
+
+```ts
+import { OpenAIEmbedding, Settings } from "llamaindex";
+
+Settings.embedModel = new OpenAIEmbedding();
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## API Reference
+
+- [OpenAIEmbedding](/docs/api/classes/OpenAIEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx
new file mode 100644
index 0000000000..7d5a6a832e
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/available_embeddings/together.mdx
@@ -0,0 +1,29 @@
+---
+title: Together
+---
+
+To use together embeddings, you need to import `TogetherEmbedding` from `llamaindex`.
+
+```ts
+import { TogetherEmbedding, Settings } from "llamaindex";
+
+Settings.embedModel = new TogetherEmbedding({
+  apiKey: "<YOUR_API_KEY>",
+});
+
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## API Reference
+
+- [TogetherEmbedding](/docs/api/classes/TogetherEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/embeddings/index.mdx b/apps/next/src/content/docs/llamaindex/modules/embeddings/index.mdx
new file mode 100644
index 0000000000..d1a9110071
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/embeddings/index.mdx
@@ -0,0 +1,33 @@
+---
+title: Embedding
+---
+
+The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI.
+
+This can be explicitly updated through `Settings`
+
+```typescript
+import { OpenAIEmbedding, Settings } from "llamaindex";
+
+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-ada-002",
+});
+```
+
+## Local Embedding
+
+For local embeddings, you can use the [HuggingFace](/docs/llamaindex/modules/embeddings/available_embeddings/huggingface) embedding model.
+
+## Available Embeddings
+
+Most available embeddings are listed in the sidebar on the left.
+Additionally the following integrations exist without separate documentation:
+
+- [ClipEmbedding](/docs/api/classes/ClipEmbedding) using `@xenova/transformers`
+- [FireworksEmbedding](/docs/api/classes/FireworksEmbedding) see [fireworks.ai](https://fireworks.ai/)
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [OpenAIEmbedding](/docs/api/classes/OpenAIEmbedding)
diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx
new file mode 100644
index 0000000000..e6352809d8
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx
@@ -0,0 +1,34 @@
+---
+title: Evaluating
+---
+
+## Concept
+
+Evaluation and benchmarking are crucial concepts in LLM development. To improve the performance of an LLM app (RAG, agents) you must have a way to measure it.
+
+LlamaIndex offers key modules to measure the quality of generated results. We also offer key modules to measure retrieval quality.
+
+- **Response Evaluation**: Does the response match the retrieved context? Does it also match the query? Does it match the reference answer or guidelines?
+- **Retrieval Evaluation**: Are the retrieved sources relevant to the query?
+
+## Response Evaluation
+
+Evaluation of generated results can be difficult, since unlike traditional machine learning the predicted result is not a single number, and it can be hard to define quantitative metrics for this problem.
+
+LlamaIndex offers LLM-based evaluation modules to measure the quality of results. This uses a “gold” LLM (e.g. GPT-4) to decide whether the predicted answer is correct in a variety of ways.
+
+Note that many of these current evaluation modules do not require ground-truth labels. Evaluation can be done with some combination of the query, context, response, and combine these with LLM calls.
+
+These evaluation modules are in the following forms:
+
+- **Correctness**: Whether the generated answer matches that of the reference answer given the query (requires labels).
+
+- **Faithfulness**: Evaluates if the answer is faithful to the retrieved contexts (in other words, whether if there’s hallucination).
+
+- **Relevancy**: Evaluates if the response from a query engine matches any source nodes.
+
+## Usage
+
+- [Correctness Evaluator](/docs/llamaindex/modules/evaluation/correctness)
+- [Faithfulness Evaluator](/docs/llamaindex/modules/evaluation/faithfulness)
+- [Relevancy Evaluator](/docs/llamaindex/modules/evaluation/relevancy)
diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/correctness.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/correctness.mdx
new file mode 100644
index 0000000000..2e83594094
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/correctness.mdx
@@ -0,0 +1,64 @@
+---
+title: Correctness Evaluator
+---
+
+Correctness evaluates the relevance and correctness of a generated answer against a reference answer.
+
+This is useful for measuring if the response was correct. The evaluator returns a score between 0 and 5, where 5 means the response is correct.
+
+## Usage
+
+Firstly, you need to install the package:
+
+```bash
+pnpm i llamaindex
+```
+
+Set the OpenAI API key:
+
+```bash
+export OPENAI_API_KEY=your-api-key
+```
+
+Import the required modules:
+
+```ts
+import { CorrectnessEvaluator, OpenAI, Settings, Response } from "llamaindex";
+```
+
+Let's setup gpt-4 for better results:
+
+```ts
+Settings.llm = new OpenAI({
+  model: "gpt-4",
+});
+```
+
+```ts
+const query =
+  "Can you explain the theory of relativity proposed by Albert Einstein in detail?";
+
+const response = ` Certainly! Albert Einstein's theory of relativity consists of two main components: special relativity and general relativity. Special relativity, published in 1905, introduced the concept that the laws of physics are the same for all non-accelerating observers and that the speed of light in a vacuum is a constant, regardless of the motion of the source or observer. It also gave rise to the famous equation E=mc², which relates energy (E) and mass (m).
+
+However, general relativity, published in 1915, extended these ideas to include the effects of magnetism. According to general relativity, gravity is not a force between masses but rather the result of the warping of space and time by magnetic fields generated by massive objects. Massive objects, such as planets and stars, create magnetic fields that cause a curvature in spacetime, and smaller objects follow curved paths in response to this magnetic curvature. This concept is often illustrated using the analogy of a heavy ball placed on a rubber sheet with magnets underneath, causing it to create a depression that other objects (representing smaller masses) naturally move towards due to magnetic attraction.
+`;
+
+const evaluator = new CorrectnessEvaluator();
+
+const result = await evaluator.evaluateResponse({
+  query,
+  response: new Response(response),
+});
+
+console.log(
+  `the response is ${result.passing ? "correct" : "not correct"} with a score of ${result.score}`,
+);
+```
+
+```bash
+the response is not correct with a score of 2.5
+```
+
+## API Reference
+
+- [CorrectnessEvaluator](/docs/api/classes/CorrectnessEvaluator)
diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/faithfulness.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/faithfulness.mdx
new file mode 100644
index 0000000000..bf87bad80e
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/faithfulness.mdx
@@ -0,0 +1,84 @@
+---
+title: Faithfulness Evaluator
+---
+
+Faithfulness is a measure of whether the generated answer is faithful to the retrieved contexts. In other words, it measures whether there is any hallucination in the generated answer.
+
+This uses the FaithfulnessEvaluator module to measure if the response from a query engine matches any source nodes.
+
+This is useful for measuring if the response was hallucinated. The evaluator returns a score between 0 and 1, where 1 means the response is faithful to the retrieved contexts.
+
+## Usage
+
+Firstly, you need to install the package:
+
+```bash
+pnpm i llamaindex
+```
+
+Set the OpenAI API key:
+
+```bash
+export OPENAI_API_KEY=your-api-key
+```
+
+Import the required modules:
+
+```ts
+import {
+  Document,
+  FaithfulnessEvaluator,
+  OpenAI,
+  VectorStoreIndex,
+  Settings,
+} from "llamaindex";
+```
+
+Let's setup gpt-4 for better results:
+
+```ts
+Settings.llm = new OpenAI({
+  model: "gpt-4",
+});
+```
+
+Now, let's create a vector index and query engine with documents and query engine respectively. Then, we can evaluate the response with the query and response from the query engine.:
+
+```ts
+const documents = [
+  new Document({
+    text: `The city came under British control in 1664 and was renamed New York after King Charles II of England granted the lands to his brother, the Duke of York. The city was regained by the Dutch in July 1673 and was renamed New Orange for one year and three months; the city has been continuously named New York since November 1674. New York City was the capital of the United States from 1785 until 1790, and has been the largest U.S. city since 1790. The Statue of Liberty greeted millions of immigrants as they came to the U.S. by ship in the late 19th and early 20th centuries, and is a symbol of the U.S. and its ideals of liberty and peace. In the 21st century, New York City has emerged as a global node of creativity, entrepreneurship, and as a symbol of freedom and cultural diversity. The New York Times has won the most Pulitzer Prizes for journalism and remains the U.S. media's "newspaper of record". In 2019, New York City was voted the greatest city in the world in a survey of over 30,000 p...	Pass`,
+  }),
+];
+
+const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
+
+const queryEngine = vectorIndex.asQueryEngine();
+```
+
+Now, let's evaluate the response:
+
+```ts
+const query = "How did New York City get its name?";
+
+const evaluator = new FaithfulnessEvaluator();
+
+const response = await queryEngine.query({
+  query,
+});
+
+const result = await evaluator.evaluateResponse({
+  query,
+  response,
+});
+
+console.log(`the response is ${result.passing ? "faithful" : "not faithful"}`);
+```
+
+```bash
+the response is faithful
+```
+
+## API Reference
+
+- [FaithfulnessEvaluator](/docs/api/classes/FaithfulnessEvaluator)
diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/relevancy.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/relevancy.mdx
new file mode 100644
index 0000000000..211998fb26
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/modules/relevancy.mdx
@@ -0,0 +1,78 @@
+---
+title: Relevancy Evaluator
+---
+
+Relevancy measure if the response from a query engine matches any source nodes.
+
+It is useful for measuring if the response was relevant to the query. The evaluator returns a score between 0 and 1, where 1 means the response is relevant to the query.
+
+## Usage
+
+Firstly, you need to install the package:
+
+```bash
+pnpm i llamaindex
+```
+
+Set the OpenAI API key:
+
+```bash
+export OPENAI_API_KEY=your-api-key
+```
+
+Import the required modules:
+
+```ts
+import {
+  RelevancyEvaluator,
+  OpenAI,
+  Settings,
+  Document,
+  VectorStoreIndex,
+} from "llamaindex";
+```
+
+Let's setup gpt-4 for better results:
+
+```ts
+Settings.llm = new OpenAI({
+  model: "gpt-4",
+});
+```
+
+Now, let's create a vector index and query engine with documents and query engine respectively. Then, we can evaluate the response with the query and response from the query engine.:
+
+```ts
+const documents = [
+  new Document({
+    text: `The city came under British control in 1664 and was renamed New York after King Charles II of England granted the lands to his brother, the Duke of York. The city was regained by the Dutch in July 1673 and was renamed New Orange for one year and three months; the city has been continuously named New York since November 1674. New York City was the capital of the United States from 1785 until 1790, and has been the largest U.S. city since 1790. The Statue of Liberty greeted millions of immigrants as they came to the U.S. by ship in the late 19th and early 20th centuries, and is a symbol of the U.S. and its ideals of liberty and peace. In the 21st century, New York City has emerged as a global node of creativity, entrepreneurship, and as a symbol of freedom and cultural diversity. The New York Times has won the most Pulitzer Prizes for journalism and remains the U.S. media's "newspaper of record". In 2019, New York City was voted the greatest city in the world in a survey of over 30,000 p...	Pass`,
+  }),
+];
+
+const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
+
+const queryEngine = vectorIndex.asQueryEngine();
+
+const query = "How did New York City get its name?";
+
+const response = await queryEngine.query({
+  query,
+});
+
+const evaluator = new RelevancyEvaluator();
+
+const result = await evaluator.evaluateResponse({
+  query,
+  response: response,
+});
+
+console.log(`the response is ${result.passing ? "relevant" : "not relevant"}`);
+```
+
+```bash
+the response is relevant
+```
+
+## API Reference
+
+- [RelevancyEvaluator](/docs/api/classes/RelevancyEvaluator)
diff --git a/apps/next/src/content/docs/llamaindex/modules/ingestion_pipeline/index.mdx b/apps/next/src/content/docs/llamaindex/modules/ingestion_pipeline/index.mdx
new file mode 100644
index 0000000000..045e7c5cf0
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/ingestion_pipeline/index.mdx
@@ -0,0 +1,105 @@
+---
+title: Ingestion Pipeline
+---
+
+An `IngestionPipeline` uses a concept of `Transformations` that are applied to input data.
+These `Transformations` are applied to your input data, and the resulting nodes are either returned or inserted into a vector database (if given).
+
+## Usage Pattern
+
+The simplest usage is to instantiate an IngestionPipeline like so:
+
+```ts
+import fs from "node:fs/promises";
+
+import {
+  Document,
+  IngestionPipeline,
+  MetadataMode,
+  OpenAIEmbedding,
+  TitleExtractor,
+  SentenceSplitter,
+} from "llamaindex";
+
+async function main() {
+  // Load essay from abramov.txt in Node
+  const path = "node_modules/llamaindex/examples/abramov.txt";
+
+  const essay = await fs.readFile(path, "utf-8");
+
+  // Create Document object with essay
+  const document = new Document({ text: essay, id_: path });
+  const pipeline = new IngestionPipeline({
+    transformations: [
+      new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
+      new TitleExtractor(),
+      new OpenAIEmbedding(),
+    ],
+  });
+
+  // run the pipeline
+  const nodes = await pipeline.run({ documents: [document] });
+
+  // print out the result of the pipeline run
+  for (const node of nodes) {
+    console.log(node.getContent(MetadataMode.NONE));
+  }
+}
+
+main().catch(console.error);
+```
+
+## Connecting to Vector Databases
+
+When running an ingestion pipeline, you can also chose to automatically insert the resulting nodes into a remote vector store.
+
+Then, you can construct an index from that vector store later on.
+
+```ts
+import fs from "node:fs/promises";
+
+import {
+  Document,
+  IngestionPipeline,
+  MetadataMode,
+  OpenAIEmbedding,
+  TitleExtractor,
+  SentenceSplitter,
+  QdrantVectorStore,
+  VectorStoreIndex,
+} from "llamaindex";
+
+async function main() {
+  // Load essay from abramov.txt in Node
+  const path = "node_modules/llamaindex/examples/abramov.txt";
+
+  const essay = await fs.readFile(path, "utf-8");
+
+  const vectorStore = new QdrantVectorStore({
+    host: "http://localhost:6333",
+  });
+
+  // Create Document object with essay
+  const document = new Document({ text: essay, id_: path });
+  const pipeline = new IngestionPipeline({
+    transformations: [
+      new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
+      new TitleExtractor(),
+      new OpenAIEmbedding(),
+    ],
+    vectorStore,
+  });
+
+  // run the pipeline
+  const nodes = await pipeline.run({ documents: [document] });
+
+  // create an index
+  const index = VectorStoreIndex.fromVectorStore(vectorStore);
+}
+
+main().catch(console.error);
+```
+
+## API Reference
+
+- [IngestionPipeline](/docs/api/classes/IngestionPipeline)
diff --git a/apps/next/src/content/docs/llamaindex/modules/ingestion_pipeline/transformations.mdx b/apps/next/src/content/docs/llamaindex/modules/ingestion_pipeline/transformations.mdx
new file mode 100644
index 0000000000..70bd297c8c
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/ingestion_pipeline/transformations.mdx
@@ -0,0 +1,83 @@
+---
+title: Transformations
+---
+
+A transformation is something that takes a list of nodes as an input, and returns a list of nodes. Each component that implements the Transformation class has both a `transform` definition responsible for transforming the nodes.
+
+Currently, the following components are Transformation objects:
+
+- [SentenceSplitter](/docs/api/classes/SentenceSplitter)
+- [MetadataExtractor](/docs/llamaindex/modules/documents_and_nodes/metadata_extraction)
+- [Embeddings](/docs/llamaindex/modules/embeddings/index)
+
+## Usage Pattern
+
+While transformations are best used with with an IngestionPipeline, they can also be used directly.
+
+```ts
+import { SentenceSplitter, TitleExtractor, Document } from "llamaindex";
+
+async function main() {
+  let nodes = new SentenceSplitter().getNodesFromDocuments([
+    new Document({ text: "I am 10 years old. John is 20 years old." }),
+  ]);
+
+  const titleExtractor = new TitleExtractor();
+
+  nodes = await titleExtractor.transform(nodes);
+
+  for (const node of nodes) {
+    console.log(node.getContent(MetadataMode.NONE));
+  }
+}
+
+main().catch(console.error);
+```
+
+## Custom Transformations
+
+You can implement any transformation yourself by implementing the `TransformComponent`.
+
+The following custom transformation will remove any special characters or punctuation in text.
+
+```ts
+import { TransformComponent, TextNode } from "llamaindex";
+
+export class RemoveSpecialCharacters extends TransformComponent {
+  async transform(nodes: TextNode[]): Promise<TextNode[]> {
+    for (const node of nodes) {
+      node.text = node.text.replace(/[^\w\s]/gi, "");
+    }
+
+    return nodes;
+  }
+}
+```
+
+These can then be used directly or in any IngestionPipeline.
+
+```ts
+import { IngestionPipeline, Document } from "llamaindex";
+
+async function main() {
+  const pipeline = new IngestionPipeline({
+    transformations: [new RemoveSpecialCharacters()],
+  });
+
+  const nodes = await pipeline.run({
+    documents: [
+      new Document({ text: "I am 10 years old. John is 20 years old." }),
+    ],
+  });
+
+  for (const node of nodes) {
+    console.log(node.getContent(MetadataMode.NONE));
+  }
+}
+
+main().catch(console.error);
+```
+
+## API Reference
+
+- [TransformComponent](/docs/api/classes/TransformComponent)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llamacloud.mdx b/apps/next/src/content/docs/llamaindex/modules/llamacloud.mdx
new file mode 100644
index 0000000000..898275ccc7
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llamacloud.mdx
@@ -0,0 +1,34 @@
+---
+title: LlamaCloud
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/cloud/chat.ts";
+
+LlamaCloud is a new generation of managed parsing, ingestion, and retrieval services, designed to bring production-grade context-augmentation to your LLM and RAG applications.
+
+Currently, LlamaCloud supports
+
+- Managed Ingestion API, handling parsing and document management
+- Managed Retrieval API, configuring optimal retrieval for your RAG system
+
+## Access
+
+We are opening up a private beta to a limited set of enterprise partners for the managed ingestion and retrieval API. If you’re interested in centralizing your data pipelines and spending more time working on your actual RAG use cases, come [talk to us.](https://www.llamaindex.ai/contact)
+
+If you have access to LlamaCloud, you can visit [LlamaCloud](https://cloud.llamaindex.ai) to sign in and get an API key.
+
+## Create a Managed Index
+
+Currently, you can't create a managed index on LlamaCloud using LlamaIndexTS, but you can use an existing managed index for retrieval that was created by the Python version of LlamaIndex. See [the LlamaCloudIndex documentation](https://docs.llamaindex.ai/en/stable/module_guides/indexing/llama_cloud_index.html#usage) for more information on how to create a managed index.
+
+## Use a Managed Index
+
+Here's an example of how to use a managed index together with a chat engine:
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+## API Reference
+
+- [LlamaCloudIndex](/docs/api/classes/LlamaCloudIndex)
+- [LlamaCloudRetriever](/docs/api/classes/LlamaCloudRetriever)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/anthropic.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/anthropic.mdx
new file mode 100644
index 0000000000..7b508fa476
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/anthropic.mdx
@@ -0,0 +1,71 @@
+---
+title: Anthropic
+---
+
+## Usage
+
+```ts
+import { Anthropic, Settings } from "llamaindex";
+
+Settings.llm = new Anthropic({
+  apiKey: "<YOUR_API_KEY>",
+});
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { Anthropic, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+Settings.llm = new Anthropic({
+  apiKey: "<YOUR_API_KEY>",
+});
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [Anthropic](/docs/api/classes/Anthropic)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/azure.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/azure.mdx
new file mode 100644
index 0000000000..e43cf39f1c
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/azure.mdx
@@ -0,0 +1,82 @@
+---
+title: Azure OpenAI
+---
+
+To use Azure OpenAI, you only need to set a few environment variables together with the `OpenAI` class.
+
+For example:
+
+## Environment Variables
+
+```
+export AZURE_OPENAI_KEY="<YOUR KEY HERE>"
+export AZURE_OPENAI_ENDPOINT="<YOUR ENDPOINT, see https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api>"
+export AZURE_OPENAI_DEPLOYMENT="gpt-4" # or some other deployment name
+```
+
+## Usage
+
+```ts
+import { OpenAI, Settings } from "llamaindex";
+
+Settings.llm = new OpenAI({ model: "gpt-4", temperature: 0 });
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { OpenAI, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+Settings.llm = new OpenAI({ model: "gpt-4", temperature: 0 });
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [OpenAI](/docs/api/classes/OpenAI)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/bedrock.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/bedrock.mdx
new file mode 100644
index 0000000000..ba9f52c14d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/bedrock.mdx
@@ -0,0 +1,174 @@
+---
+title: Bedrock
+---
+
+## Usage
+
+```ts
+import { BEDROCK_MODELS, Bedrock } from "@llamaindex/community";
+
+Settings.llm = new Bedrock({
+  model: BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
+  region: "us-east-1", // can be provided via env AWS_REGION
+  credentials: {
+    accessKeyId: "...", // optional and can be provided via env AWS_ACCESS_KEY_ID
+    secretAccessKey: "...", // optional and can be provided via env AWS_SECRET_ACCESS_KEY
+  },
+});
+```
+
+Currently only supports Anthropic and Meta models:
+
+```ts
+ANTHROPIC_CLAUDE_INSTANT_1 = "anthropic.claude-instant-v1";
+ANTHROPIC_CLAUDE_2 = "anthropic.claude-v2";
+ANTHROPIC_CLAUDE_2_1 = "anthropic.claude-v2:1";
+ANTHROPIC_CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0";
+ANTHROPIC_CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0";
+ANTHROPIC_CLAUDE_3_OPUS = "anthropic.claude-3-opus-20240229-v1:0"; // available on us-west-2
+ANTHROPIC_CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0";
+ANTHROPIC_CLAUDE_3_5_HAIKU = "anthropic.claude-3-5-haiku-20241022-v1:0";
+META_LLAMA2_13B_CHAT = "meta.llama2-13b-chat-v1";
+META_LLAMA2_70B_CHAT = "meta.llama2-70b-chat-v1";
+META_LLAMA3_8B_INSTRUCT = "meta.llama3-8b-instruct-v1:0";
+META_LLAMA3_70B_INSTRUCT = "meta.llama3-70b-instruct-v1:0";
+META_LLAMA3_1_8B_INSTRUCT = "meta.llama3-1-8b-instruct-v1:0"; // available on us-west-2
+META_LLAMA3_1_70B_INSTRUCT = "meta.llama3-1-70b-instruct-v1:0"; // available on us-west-2
+META_LLAMA3_1_405B_INSTRUCT = "meta.llama3-1-405b-instruct-v1:0"; // available on us-west-2, tool calling supported
+META_LLAMA3_2_1B_INSTRUCT = "meta.llama3-2-1b-instruct-v1:0"; // only available via inference endpoints (see below)
+META_LLAMA3_2_3B_INSTRUCT = "meta.llama3-2-3b-instruct-v1:0"; // only available via inference endpoints (see below)
+META_LLAMA3_2_11B_INSTRUCT = "meta.llama3-2-11b-instruct-v1:0"; // only available via inference endpoints (see below), multimodal and function call supported
+META_LLAMA3_2_90B_INSTRUCT = "meta.llama3-2-90b-instruct-v1:0"; // only available via inference endpoints (see below), multimodal and function call supported
+AMAZON_NOVA_PRO_1 = "amazon.nova-pro-v1:0";
+AMAZON_NOVA_LITE_1 = "amazon.nova-lite-v1:0";
+AMAZON_NOVA_MICRO_1 = "amazon.nova-micro-v1:0";
+```
+
+You can also use Bedrock's Inference endpoints by using the model names:
+
+```ts
+// US
+US_ANTHROPIC_CLAUDE_3_HAIKU = "us.anthropic.claude-3-haiku-20240307-v1:0";
+US_ANTHROPIC_CLAUDE_3_OPUS = "us.anthropic.claude-3-opus-20240229-v1:0";
+US_ANTHROPIC_CLAUDE_3_SONNET = "us.anthropic.claude-3-sonnet-20240229-v1:0";
+US_ANTHROPIC_CLAUDE_3_5_SONNET = "us.anthropic.claude-3-5-sonnet-20240620-v1:0";
+US_ANTHROPIC_CLAUDE_3_5_SONNET_V2 =
+  "us.anthropic.claude-3-5-sonnet-20241022-v2:0";
+US_META_LLAMA_3_2_1B_INSTRUCT = "us.meta.llama3-2-1b-instruct-v1:0";
+US_META_LLAMA_3_2_3B_INSTRUCT = "us.meta.llama3-2-3b-instruct-v1:0";
+US_META_LLAMA_3_2_11B_INSTRUCT = "us.meta.llama3-2-11b-instruct-v1:0";
+US_META_LLAMA_3_2_90B_INSTRUCT = "us.meta.llama3-2-90b-instruct-v1:0";
+US_AMAZON_NOVA_PRO_1 = "us.amazon.nova-pro-v1:0";
+US_AMAZON_NOVA_LITE_1 = "us.amazon.nova-lite-v1:0";
+US_AMAZON_NOVA_MICRO_1 = "us.amazon.nova-micro-v1:0";
+
+// EU
+EU_ANTHROPIC_CLAUDE_3_HAIKU = "eu.anthropic.claude-3-haiku-20240307-v1:0";
+EU_ANTHROPIC_CLAUDE_3_SONNET = "eu.anthropic.claude-3-sonnet-20240229-v1:0";
+EU_ANTHROPIC_CLAUDE_3_5_SONNET = "eu.anthropic.claude-3-5-sonnet-20240620-v1:0";
+EU_META_LLAMA_3_2_1B_INSTRUCT = "eu.meta.llama3-2-1b-instruct-v1:0";
+EU_META_LLAMA_3_2_3B_INSTRUCT = "eu.meta.llama3-2-3b-instruct-v1:0";
+```
+
+Sonnet, Haiku and Opus are multimodal, image_url only supports base64 data url format, e.g. `data:image/jpeg;base64,SGVsbG8sIFdvcmxkIQ==`
+
+## Full Example
+
+```ts
+import { BEDROCK_MODELS, Bedrock } from "llamaindex";
+
+Settings.llm = new Bedrock({
+  model: BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
+});
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## Agent Example
+
+```ts
+import { BEDROCK_MODELS, Bedrock } from "@llamaindex/community";
+import { FunctionTool, LLMAgent } from "llamaindex";
+
+const sumNumbers = FunctionTool.from(
+  ({ a, b }: { a: number; b: number }) => `${a + b}`,
+  {
+    name: "sumNumbers",
+    description: "Use this function to sum two numbers",
+    parameters: {
+      type: "object",
+      properties: {
+        a: {
+          type: "number",
+          description: "The first number",
+        },
+        b: {
+          type: "number",
+          description: "The second number",
+        },
+      },
+      required: ["a", "b"],
+    },
+  },
+);
+
+const divideNumbers = FunctionTool.from(
+  ({ a, b }: { a: number; b: number }) => `${a / b}`,
+  {
+    name: "divideNumbers",
+    description: "Use this function to divide two numbers",
+    parameters: {
+      type: "object",
+      properties: {
+        a: {
+          type: "number",
+          description: "The dividend a to divide",
+        },
+        b: {
+          type: "number",
+          description: "The divisor b to divide by",
+        },
+      },
+      required: ["a", "b"],
+    },
+  },
+);
+
+const bedrock = new Bedrock({
+  model: BEDROCK_MODELS.META_LLAMA3_1_405B_INSTRUCT,
+  ...
+});
+
+async function main() {
+  const agent = new LLMAgent({
+    llm: bedrock,
+    tools: [sumNumbers, divideNumbers],
+  });
+
+  const response = await agent.chat({
+    message: "How much is 5 + 5? then divide by 2",
+  });
+
+  console.log(response.message);
+}
+```
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepinfra.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepinfra.mdx
new file mode 100644
index 0000000000..7a82ffeeec
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepinfra.mdx
@@ -0,0 +1,89 @@
+---
+title: DeepInfra
+---
+
+Check out available LLMs [here](https://deepinfra.com/models/text-generation).
+
+```ts
+import { DeepInfra, Settings } from "llamaindex";
+
+// Get the API key from `DEEPINFRA_API_TOKEN` environment variable
+import { config } from "dotenv";
+config();
+Settings.llm = new DeepInfra();
+
+// Set the API key
+apiKey = "YOUR_API_KEY";
+Settings.llm = new DeepInfra({ apiKey });
+```
+
+You can setup the apiKey on the environment variables, like:
+
+```bash
+export DEEPINFRA_API_TOKEN="<YOUR_API_KEY>"
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { DeepInfra, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+// Use custom LLM
+const model = "meta-llama/Meta-Llama-3-8B-Instruct";
+Settings.llm = new DeepInfra({ model, temperature: 0 });
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## Feedback
+
+If you have any feedback, please reach out to us at [feedback@deepinfra.com](mailto:feedback@deepinfra.com)
+
+## API Reference
+
+- [DeepInfra](/docs/api/classes/DeepInfra)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx
new file mode 100644
index 0000000000..928164ec87
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/deepseek.mdx
@@ -0,0 +1,54 @@
+---
+title: DeepSeek LLM
+---
+
+[DeepSeek Platform](https://platform.deepseek.com/)
+
+## Usage
+
+```ts
+import { DeepSeekLLM, Settings } from "llamaindex";
+
+Settings.llm = new DeepSeekLLM({
+  apiKey: "<YOUR_API_KEY>",
+  model: "deepseek-coder", // or "deepseek-chat"
+});
+```
+
+## Example
+
+```ts
+import { DeepSeekLLM, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+const deepseekLlm = new DeepSeekLLM({
+  apiKey: "<YOUR_API_KEY>",
+  model: "deepseek-coder", // or "deepseek-chat"
+});
+
+async function main() {
+  const response = await llm.deepseekLlm.chat({
+    messages: [
+      {
+        role: "system",
+        content: "You are an AI assistant",
+      },
+      {
+        role: "user",
+        content: "Tell me about San Francisco",
+      },
+    ],
+    stream: false,
+  });
+  console.log(response);
+}
+```
+
+# Limitations
+
+Currently does not support function calling.
+
+[Currently does not support json-output param while still is very good at json generating.](https://platform.deepseek.com/api-docs/faq#does-your-api-support-json-output)
+
+## API Reference
+
+- [DeepSeekLLM](/docs/api/classes/DeepSeekLLM)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx
new file mode 100644
index 0000000000..1680de683d
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/fireworks.mdx
@@ -0,0 +1,67 @@
+---
+title: Fireworks LLM
+---
+
+[Fireworks.ai](https://fireworks.ai/) focus on production use cases for open source LLMs, offering speed and quality.
+
+## Usage
+
+```ts
+import { FireworksLLM, Settings } from "llamaindex";
+
+Settings.llm = new FireworksLLM({
+  apiKey: "<YOUR_API_KEY>",
+});
+```
+
+## Load and index documents
+
+For this example, we will load the Berkshire Hathaway 2022 annual report pdf
+
+```ts
+const reader = new PDFReader();
+const documents = await reader.loadData("../data/brk-2022.pdf");
+
+// Split text and create embeddings. Store them in a VectorStoreIndex
+const index = await VectorStoreIndex.fromDocuments(documents);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+const response = await queryEngine.query({
+  query: "What mistakes did Warren E. Buffett make?",
+});
+```
+
+## Full Example
+
+```ts
+import { VectorStoreIndex } from "llamaindex";
+import { PDFReader } from "llamaindex/readers/PDFReader";
+
+async function main() {
+  // Load PDF
+  const reader = new PDFReader();
+  const documents = await reader.loadData("../data/brk-2022.pdf");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query({
+    query: "What mistakes did Warren E. Buffett make?",
+  });
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
+```
+
+## API Reference
+
+- [FireworksLLM](/docs/api/classes/FireworksLLM)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/gemini.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/gemini.mdx
new file mode 100644
index 0000000000..0f981b6786
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/gemini.mdx
@@ -0,0 +1,107 @@
+---
+title: Gemini
+---
+
+## Usage
+
+```ts
+import { Gemini, Settings, GEMINI_MODEL } from "llamaindex";
+
+Settings.llm = new Gemini({
+  model: GEMINI_MODEL.GEMINI_PRO,
+});
+```
+
+### Usage with Vertex AI
+
+To use Gemini via Vertex AI you can use `GeminiVertexSession`.
+
+GeminiVertexSession accepts the env variables: `GOOGLE_VERTEX_LOCATION` and `GOOGLE_VERTEX_PROJECT`
+
+```ts
+import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "llamaindex";
+
+const gemini = new Gemini({
+  model: GEMINI_MODEL.GEMINI_PRO,
+  session: new GeminiVertexSession({
+    location: "us-central1",      // optional if provided by GOOGLE_VERTEX_LOCATION env variable
+    project: "project1",          // optional if provided by GOOGLE_VERTEX_PROJECT env variable
+    googleAuthOptions: {...},     // optional, but useful for production. It accepts all values from `GoogleAuthOptions`
+  }),
+});
+```
+
+[GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts)
+
+To authenticate for local development:
+
+```bash
+npm install @google-cloud/vertexai
+gcloud auth application-default login
+```
+
+To authenticate for production you'll have to use a [service account](https://cloud.google.com/docs/authentication/). `googleAuthOptions` has `credentials` which might be useful for you.
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import {
+  Gemini,
+  Document,
+  VectorStoreIndex,
+  Settings,
+  GEMINI_MODEL,
+} from "llamaindex";
+
+Settings.llm = new Gemini({
+  model: GEMINI_MODEL.GEMINI_PRO,
+});
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [Gemini](/docs/api/classes/Gemini)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/groq.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/groq.mdx
new file mode 100644
index 0000000000..b2bcfceddf
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/groq.mdx
@@ -0,0 +1,56 @@
+---
+title: Groq
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../../examples/groq.ts";
+
+## Usage
+
+First, create an API key at the [Groq Console](https://console.groq.com/keys). Then save it in your environment:
+
+```bash
+export GROQ_API_KEY=<your-api-key>
+```
+
+The initialize the Groq module.
+
+```ts
+import { Groq, Settings } from "llamaindex";
+
+Settings.llm = new Groq({
+  // If you do not wish to set your API key in the environment, you may
+  // configure your API key when you initialize the Groq class.
+  // apiKey: "<your-api-key>",
+});
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+## API Reference
+
+- [Groq](/docs/api/classes/Groq)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/llama2.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/llama2.mdx
new file mode 100644
index 0000000000..47202a66d5
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/llama2.mdx
@@ -0,0 +1,98 @@
+---
+title: LLama2
+---
+
+## Usage
+
+```ts
+import { Ollama, Settings, DeuceChatStrategy } from "llamaindex";
+
+Settings.llm = new LlamaDeuce({ chatStrategy: DeuceChatStrategy.META });
+```
+
+## Usage with Replication
+
+```ts
+import {
+  Ollama,
+  ReplicateSession,
+  Settings,
+  DeuceChatStrategy,
+} from "llamaindex";
+
+const replicateSession = new ReplicateSession({
+  replicateKey,
+});
+
+Settings.llm = new LlamaDeuce({
+  chatStrategy: DeuceChatStrategy.META,
+  replicateSession,
+});
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import {
+  LlamaDeuce,
+  Document,
+  VectorStoreIndex,
+  Settings,
+  DeuceChatStrategy,
+} from "llamaindex";
+
+// Use the LlamaDeuce LLM
+Settings.llm = new LlamaDeuce({ chatStrategy: DeuceChatStrategy.META });
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [LlamaDeuce](/docs/api/variables/LlamaDeuce)
+- [DeuceChatStrategy](/docs/api/variables/DeuceChatStrategy)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/mistral.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/mistral.mdx
new file mode 100644
index 0000000000..ef25de1e7b
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/mistral.mdx
@@ -0,0 +1,74 @@
+---
+title: Mistral
+---
+
+## Usage
+
+```ts
+import { MistralAI, Settings } from "llamaindex";
+
+Settings.llm = new MistralAI({
+  model: "mistral-tiny",
+  apiKey: "<YOUR_API_KEY>",
+});
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { MistralAI, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+// Use the MistralAI LLM
+Settings.llm = new MistralAI({ model: "mistral-tiny" });
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [MistralAI](/docs/api/classes/MistralAI)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/ollama.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/ollama.mdx
new file mode 100644
index 0000000000..81e1eddbec
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/ollama.mdx
@@ -0,0 +1,79 @@
+---
+title: Ollama
+---
+
+## Usage
+
+```ts
+import { Ollama, Settings } from "llamaindex";
+
+Settings.llm = ollamaLLM;
+Settings.embedModel = ollamaLLM;
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { Ollama, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+import fs from "fs/promises";
+
+const ollama = new Ollama({ model: "llama2", temperature: 0.75 });
+
+// Use Ollama LLM and Embed Model
+Settings.llm = ollama;
+Settings.embedModel = ollama;
+
+async function main() {
+  const essay = await fs.readFile("./paul_graham_essay.txt", "utf-8");
+
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [Ollama](/docs/api/classes/Ollama)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/openai.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/openai.mdx
new file mode 100644
index 0000000000..7211bd6c64
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/openai.mdx
@@ -0,0 +1,75 @@
+---
+title: OpenAI
+---
+
+```ts
+import { OpenAI, Settings } from "llamaindex";
+
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0, apiKey: <YOUR_API_KEY> });
+```
+
+You can setup the apiKey on the environment variables, like:
+
+```bash
+export OPENAI_API_KEY="<YOUR_API_KEY>"
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { OpenAI, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+// Use the OpenAI LLM
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [OpenAI](/docs/api/classes/OpenAI)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/portkey.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/portkey.mdx
new file mode 100644
index 0000000000..b199afcbfa
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/portkey.mdx
@@ -0,0 +1,76 @@
+---
+title: Portkey LLM
+---
+
+## Usage
+
+```ts
+import { Portkey, Settings } from "llamaindex";
+
+Settings.llm = new Portkey({
+  apiKey: "<YOUR_API_KEY>",
+});
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { Portkey, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+// Use the Portkey LLM
+Settings.llm = new Portkey({
+  apiKey: "<YOUR_API_KEY>",
+});
+
+async function main() {
+  // Create a document
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [Portkey](/docs/api/classes/Portkey)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx
new file mode 100644
index 0000000000..6e395bc3a2
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/available_llms/together.mdx
@@ -0,0 +1,74 @@
+---
+title: Together LLM
+---
+
+## Usage
+
+```ts
+import { TogetherLLM, Settings } from "llamaindex";
+
+Settings.llm = new TogetherLLM({
+  apiKey: "<YOUR_API_KEY>",
+});
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { TogetherLLM, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+Settings.llm = new TogetherLLM({
+  apiKey: "<YOUR_API_KEY>",
+});
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## API Reference
+
+- [TogetherLLM](/docs/api/classes/TogetherLLM)
diff --git a/apps/next/src/content/docs/llamaindex/modules/llms/index.mdx b/apps/next/src/content/docs/llamaindex/modules/llms/index.mdx
new file mode 100644
index 0000000000..7fecb26f9e
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/llms/index.mdx
@@ -0,0 +1,42 @@
+---
+title: Large Language Models (LLMs)
+---
+
+The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`.
+
+The LLM can be explicitly updated through `Settings`.
+
+```typescript
+import { OpenAI, Settings } from "llamaindex";
+
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
+```
+
+## Azure OpenAI
+
+To use Azure OpenAI, you only need to set a few environment variables.
+
+For example:
+
+```
+export AZURE_OPENAI_KEY="<YOUR KEY HERE>"
+export AZURE_OPENAI_ENDPOINT="<YOUR ENDPOINT, see https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api>"
+export AZURE_OPENAI_DEPLOYMENT="gpt-4" # or some other deployment name
+```
+
+## Local LLM
+
+For local LLMs, currently we recommend the use of [Ollama](/docs/llamaindex/modules/llms/available_llms/ollama) LLM.
+
+## Available LLMs
+
+Most available LLMs are listed in the sidebar on the left. Additionally the following integrations exist without separate documentation:
+
+- [HuggingFaceLLM](/docs/api/classes/HuggingFaceLLM) and [HuggingFaceInferenceAPI](/docs/api/classes/HuggingFaceInferenceAPI).
+- [ReplicateLLM](/docs/api/classes/ReplicateLLM) see [replicate.com](https://replicate.com/)
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## API Reference
+
+- [OpenAI](/docs/api/classes/OpenAI)
diff --git a/apps/next/src/content/docs/llamaindex/modules/node_parser.mdx b/apps/next/src/content/docs/llamaindex/modules/node_parser.mdx
new file mode 100644
index 0000000000..5f9af92c7e
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/node_parser.mdx
@@ -0,0 +1,95 @@
+---
+title: NodeParser
+---
+
+The `NodeParser` in LlamaIndex is responsible for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `Settings` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.
+
+```typescript
+import { Document, SentenceSplitter } from "llamaindex";
+
+const nodeParser = new SentenceSplitter();
+
+Settings.nodeParser = nodeParser;
+```
+
+## TextSplitter
+
+The underlying text splitter will split text by sentences. It can also be used as a standalone module for splitting raw text.
+
+```typescript
+import { SentenceSplitter } from "llamaindex";
+
+const splitter = new SentenceSplitter({ chunkSize: 1 });
+
+const textSplits = splitter.splitText("Hello World");
+```
+
+## MarkdownNodeParser
+
+The `MarkdownNodeParser` is a more advanced `NodeParser` that can handle markdown documents. It will split the markdown into nodes and then parse the nodes into a `Document` object.
+
+```typescript
+import { MarkdownNodeParser } from "llamaindex";
+
+const nodeParser = new MarkdownNodeParser();
+
+const nodes = nodeParser.getNodesFromDocuments([
+  new Document({
+    text: `# Main Header
+Main content
+
+# Header 2
+Header 2 content
+
+## Sub-header
+Sub-header content
+
+  `,
+  }),
+]);
+```
+
+The output metadata will be something like:
+
+```bash
+[
+  TextNode {
+    id_: '008e41a8-b097-487c-bee8-bd88b9455844',
+    metadata: { 'Header 1': 'Main Header' },
+    excludedEmbedMetadataKeys: [],
+    excludedLlmMetadataKeys: [],
+    relationships: { PARENT: [Array] },
+    hash: 'KJ5e/um/RkHaNR6bonj9ormtZY7I8i4XBPVYHXv1A5M=',
+    text: 'Main Header\nMain content',
+    textTemplate: '',
+    metadataSeparator: '\n'
+  },
+  TextNode {
+    id_: '0f5679b3-ba63-4aff-aedc-830c4208d0b5',
+    metadata: { 'Header 1': 'Header 2' },
+    excludedEmbedMetadataKeys: [],
+    excludedLlmMetadataKeys: [],
+    relationships: { PARENT: [Array] },
+    hash: 'IP/g/dIld3DcbK+uHzDpyeZ9IdOXY4brxhOIe7wc488=',
+    text: 'Header 2\nHeader 2 content',
+    textTemplate: '',
+    metadataSeparator: '\n'
+  },
+  TextNode {
+    id_: 'e81e9bd0-121c-4ead-8ca7-1639d65fdf90',
+    metadata: { 'Header 1': 'Header 2', 'Header 2': 'Sub-header' },
+    excludedEmbedMetadataKeys: [],
+    excludedLlmMetadataKeys: [],
+    relationships: { PARENT: [Array] },
+    hash: 'B3kYNnxaYi9ghtAgwza0ZEVKF4MozobkNUlcekDL7JQ=',
+    text: 'Sub-header\nSub-header content',
+    textTemplate: '',
+    metadataSeparator: '\n'
+  }
+]
+```
+
+## API Reference
+
+- [SentenceSplitter](/docs/api/classes/SentenceSplitter)
+- [MarkdownNodeParser](/docs/api/classes/MarkdownNodeParser)
diff --git a/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/cohere_reranker.mdx b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/cohere_reranker.mdx
new file mode 100644
index 0000000000..a64cea55ca
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/cohere_reranker.mdx
@@ -0,0 +1,74 @@
+---
+title: Cohere Reranker
+---
+
+The Cohere Reranker is a postprocessor that uses the Cohere API to rerank the results of a search query.
+
+## Setup
+
+Firstly, you will need to install the `llamaindex` package.
+
+```bash
+pnpm install llamaindex
+```
+
+Now, you will need to sign up for an API key at [Cohere](https://cohere.ai/). Once you have your API key you can import the necessary modules and create a new instance of the `CohereRerank` class.
+
+```ts
+import {
+  CohereRerank,
+  Document,
+  OpenAI,
+  VectorStoreIndex,
+  Settings,
+} from "llamaindex";
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Increase similarity topK to retrieve more results
+
+The default value for `similarityTopK` is 2. This means that only the most similar document will be returned. To retrieve more results, you can increase the value of `similarityTopK`.
+
+```ts
+const retriever = index.asRetriever({
+  similarityTopK: 5,
+});
+```
+
+## Create a new instance of the CohereRerank class
+
+Then you can create a new instance of the `CohereRerank` class and pass in your API key and the number of results you want to return.
+
+```ts
+const nodePostprocessor = new CohereRerank({
+  apiKey: "<COHERE_API_KEY>",
+  topN: 4,
+});
+```
+
+## Create a query engine with the retriever and node postprocessor
+
+```ts
+const queryEngine = index.asQueryEngine({
+  retriever,
+  nodePostprocessors: [nodePostprocessor],
+});
+
+// log the response
+const response = await queryEngine.query("Where did the author grown up?");
+```
+
+## API Reference
+
+- [CohereRerank](/docs/api/classes/CohereRerank)
diff --git a/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/index.mdx b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/index.mdx
new file mode 100644
index 0000000000..8a6cd35f68
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/index.mdx
@@ -0,0 +1,112 @@
+---
+title: Node Postprocessors
+---
+
+## Concept
+
+Node postprocessors are a set of modules that take a set of nodes, and apply some kind of transformation or filtering before returning them.
+
+In LlamaIndex, node postprocessors are most commonly applied within a query engine, after the node retrieval step and before the response synthesis step.
+
+LlamaIndex offers several node postprocessors for immediate use, while also providing a simple API for adding your own custom postprocessors.
+
+## Usage Pattern
+
+An example of using a node postprocessors is below:
+
+```ts
+import {
+  Node,
+  NodeWithScore,
+  SimilarityPostprocessor,
+  CohereRerank,
+} from "llamaindex";
+
+const nodes: NodeWithScore[] = [
+  {
+    node: new TextNode({ text: "hello world" }),
+    score: 0.8,
+  },
+  {
+    node: new TextNode({ text: "LlamaIndex is the best" }),
+    score: 0.6,
+  },
+];
+
+// similarity postprocessor: filter nodes below 0.75 similarity score
+const processor = new SimilarityPostprocessor({
+  similarityCutoff: 0.7,
+});
+
+const filteredNodes = await processor.postprocessNodes(nodes);
+
+// cohere rerank: rerank nodes given query using trained model
+const reranker = new CohereRerank({
+  apiKey: "<COHERE_API_KEY>",
+  topN: 2,
+});
+
+const rerankedNodes = await reranker.postprocessNodes(nodes, "<user_query>");
+
+console.log(filteredNodes, rerankedNodes);
+```
+
+Now you can use the `filteredNodes` and `rerankedNodes` in your application.
+
+## Using Node Postprocessors in LlamaIndex
+
+Most commonly, node-postprocessors will be used in a query engine, where they are applied to the nodes returned from a retriever, and before the response synthesis step.
+
+### Using Node Postprocessors in a Query Engine
+
+```ts
+import { Node, NodeWithScore, SimilarityPostprocessor, CohereRerank, Settings } from "llamaindex";
+
+// Use OpenAI LLM
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+
+const nodes: NodeWithScore[] = [
+  {
+    node: new TextNode({ text: "hello world" }),
+    score: 0.8,
+  },
+  {
+    node: new TextNode({ text: "LlamaIndex is the best" }),
+    score: 0.6,
+  }
+];
+
+// cohere rerank: rerank nodes given query using trained model
+const reranker = new CohereRerank({
+  apiKey: "<COHERE_API_KEY>,
+  topN: 2,
+})
+
+const document = new Document({ text: "essay", id_: "essay" });
+
+const queryEngine = index.asQueryEngine({
+  nodePostprocessors: [processor, reranker],
+});
+
+// all node post-processors will be applied during each query
+const response = await queryEngine.query("<user_query>");
+```
+
+### Using with retrieved nodes
+
+```ts
+import { SimilarityPostprocessor } from "llamaindex";
+
+nodes = await index.asRetriever().retrieve({ query: "test query str" });
+
+const processor = new SimilarityPostprocessor({
+  similarityCutoff: 0.7,
+});
+
+const filteredNodes = processor.postprocessNodes(nodes);
+```
+
+## API Reference
+
+- [SimilarityPostprocessor](/docs/api/classes/SimilarityPostprocessor)
+- [MetadataReplacementPostProcessor](/docs/api/classes/MetadataReplacementPostProcessor)
diff --git a/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/jinaai_reranker.mdx b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/jinaai_reranker.mdx
new file mode 100644
index 0000000000..a43145668c
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/jinaai_reranker.mdx
@@ -0,0 +1,78 @@
+---
+title: Jina AI Reranker
+---
+
+The Jina AI Reranker is a postprocessor that uses the Jina AI Reranker API to rerank the results of a search query.
+
+## Setup
+
+Firstly, you will need to install the `llamaindex` package.
+
+```bash
+pnpm install llamaindex
+```
+
+Now, you will need to sign up for an API key at [Jina AI](https://jina.ai/reranker). Once you have your API key you can import the necessary modules and create a new instance of the `JinaAIReranker` class.
+
+```ts
+import {
+  JinaAIReranker,
+  Document,
+  OpenAI,
+  VectorStoreIndex,
+  Settings,
+} from "llamaindex";
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Increase similarity topK to retrieve more results
+
+The default value for `similarityTopK` is 2. This means that only the most similar document will be returned. To retrieve more results, you can increase the value of `similarityTopK`.
+
+```ts
+const retriever = index.asRetriever({
+  similarityTopK: 5,
+});
+```
+
+## Create a new instance of the JinaAIReranker class
+
+Then you can create a new instance of the `JinaAIReranker` class and pass in the number of results you want to return.
+The Jina AI Reranker API key is set in the `JINAAI_API_KEY` environment variable.
+
+```bash
+export JINAAI_API_KEY=<YOUR API KEY>
+```
+
+```ts
+const nodePostprocessor = new JinaAIReranker({
+  topN: 5,
+});
+```
+
+## Create a query engine with the retriever and node postprocessor
+
+```ts
+const queryEngine = index.asQueryEngine({
+  retriever,
+  nodePostprocessors: [nodePostprocessor],
+});
+
+// log the response
+const response = await queryEngine.query("Where did the author grown up?");
+```
+
+## API Reference
+
+- [JinaAIReranker](/docs/api/classes/JinaAIReranker)
diff --git a/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/mixedbreadiai_reranker.mdx b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/mixedbreadiai_reranker.mdx
new file mode 100644
index 0000000000..1a13a7ccbb
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/node_postprocessors/mixedbreadiai_reranker.mdx
@@ -0,0 +1,171 @@
+---
+title: MixedbreadAI
+---
+
+Welcome to the mixedbread ai reranker guide! This guide will help you use mixedbread ai's API to rerank search query results, ensuring you get the most relevant information, just like picking the freshest bread from the bakery.
+
+To find out more about the latest features and updates, visit the [mixedbread.ai](https://mixedbread.ai/).
+
+## Table of Contents
+
+1. [Setup](#setup)
+2. [Usage with LlamaIndex](#usage-with-llamaindex)
+3. [Simple Reranking Guide](#simple-reranking-guide)
+4. [Reranking with Objects](#reranking-with-objects)
+
+## Setup
+
+First, you will need to install the `llamaindex` package.
+
+```bash
+pnpm install llamaindex
+```
+
+Next, sign up for an API key at [mixedbread.ai](https://mixedbread.ai/). Once you have your API key, you can import the necessary modules and create a new instance of the `MixedbreadAIReranker` class.
+
+```ts
+import {
+  MixedbreadAIReranker,
+  Document,
+  OpenAI,
+  VectorStoreIndex,
+  Settings,
+} from "llamaindex";
+```
+
+## Usage with LlamaIndex
+
+This section will guide you through integrating mixedbread's reranker with LlamaIndex.
+
+### Step 1: Load and Index Documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index, like a variety of breads in a bakery.
+
+```ts
+const document = new Document({
+  text: "This is a sample document.",
+  id_: "sampleDoc",
+});
+
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+### Step 2: Increase Similarity TopK
+
+The default value for `similarityTopK` is 2, which means only the most similar document will be returned. To get more results, like picking a variety of fresh breads, you can increase the value of `similarityTopK`.
+
+```ts
+const retriever = index.asRetriever({
+  similarityTopK: 5,
+});
+```
+
+### Step 3: Create a MixedbreadAIReranker Instance
+
+Create a new instance of the `MixedbreadAIReranker` class.
+
+```ts
+const nodePostprocessor = new MixedbreadAIReranker({
+  apiKey: "<MIXEDBREAD_API_KEY>",
+  topN: 4,
+});
+```
+
+### Step 4: Create a Query Engine
+
+Combine the retriever and node postprocessor to create a query engine. This setup ensures that your queries are processed and reranked to provide the best results, like arranging the bread in the order of freshness and quality.
+
+```ts
+const queryEngine = index.asQueryEngine({
+  retriever,
+  nodePostprocessors: [nodePostprocessor],
+});
+
+// Log the response
+const response = await queryEngine.query("Where did the author grow up?");
+console.log(response);
+```
+
+With mixedbread's Reranker, you're all set to serve up the most relevant and well-ordered results, just like a skilled baker arranging their best breads for eager customers. Enjoy the perfect blend of technology and culinary delight!
+
+## Simple Reranking Guide
+
+This section will guide you through a simple reranking process using mixedbread ai.
+
+### Step 1: Create an Instance of MixedbreadAIReranker
+
+Create a new instance of the `MixedbreadAIReranker` class, passing in your API key and the number of results you want to return. It's like setting up your bakery to offer a specific number of freshly baked items.
+
+```ts
+const reranker = new MixedbreadAIReranker({
+  apiKey: "<MIXEDBREAD_API_KEY>",
+  topN: 4,
+});
+```
+
+### Step 2: Define Nodes and Query
+
+Define the nodes (documents) you want to rerank and the query.
+
+```ts
+const nodes = [
+  { node: new BaseNode("To bake bread you need flour") },
+  { node: new BaseNode("To bake bread you need yeast") },
+];
+const query = "What do you need to bake bread?";
+```
+
+### Step 3: Perform Reranking
+
+Use the `postprocessNodes` method to rerank the nodes based on the query.
+
+```ts
+const result = await reranker.postprocessNodes(nodes, query);
+console.log(result); // Like pulling freshly baked nodes out of the oven.
+```
+
+## Reranking with Objects
+
+This section will guide you through reranking when working with objects.
+
+### Step 1: Create an Instance of MixedbreadAIReranker
+
+Create a new instance of the `MixedbreadAIReranker` class, just like before.
+
+```ts
+const reranker = new MixedbreadAIReranker({
+  apiKey: "<MIXEDBREAD_API_KEY>",
+  model: "mixedbread-ai/mxbai-rerank-large-v1",
+  topK: 5,
+  rankFields: ["title", "content"],
+  returnInput: true,
+  maxRetries: 5,
+});
+```
+
+### Step 2: Define Documents and Query
+
+Define the documents (objects) you want to rerank and the query.
+
+```ts
+const documents = [
+  { title: "Bread Recipe", content: "To bake bread you need flour" },
+  { title: "Bread Recipe", content: "To bake bread you need yeast" },
+];
+const query = "What do you need to bake bread?";
+```
+
+### Step 3: Perform Reranking
+
+Use the `rerank` method to reorder the documents based on the query.
+
+```ts
+const result = await reranker.rerank(documents, query);
+console.log(result); // Perfectly customized results, ready to serve.
+```
+
+## API Reference
+
+- [MixedbreadAIReranker](/docs/api/classes/MixedbreadAIReranker)
diff --git a/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx b/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx
new file mode 100644
index 0000000000..f26d0dd3cc
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx
@@ -0,0 +1,79 @@
+---
+title: Prompts
+---
+
+Prompting is the fundamental input that gives LLMs their expressive power. LlamaIndex uses prompts to build the index, do insertion, perform traversal during querying, and to synthesize the final answer.
+
+Users may also provide their own prompt templates to further customize the behavior of the framework. The best method for customizing is copying the default prompt from the link above, and using that as the base for any modifications.
+
+## Usage Pattern
+
+Currently, there are two ways to customize prompts in LlamaIndex:
+
+For both methods, you will need to create an function that overrides the default prompt.
+
+```ts
+// Define a custom prompt
+const newTextQaPrompt: TextQaPrompt = ({ context, query }) => {
+  return `Context information is below.
+---------------------
+${context}
+---------------------
+Given the context information and not prior knowledge, answer the query.
+Answer the query in the style of a Sherlock Holmes detective novel.
+Query: ${query}
+Answer:`;
+};
+```
+
+### 1. Customizing the default prompt on initialization
+
+The first method is to create a new instance of `ResponseSynthesizer` (or the module you would like to update the prompt) and pass the custom prompt to the `responseBuilder` parameter. Then, pass the instance to the `asQueryEngine` method of the index.
+
+```ts
+// Create an instance of response synthesizer
+const responseSynthesizer = new ResponseSynthesizer({
+  responseBuilder: new CompactAndRefine(undefined, newTextQaPrompt),
+});
+
+// Create index
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+// Query the index
+const queryEngine = index.asQueryEngine({ responseSynthesizer });
+
+const response = await queryEngine.query({
+  query: "What did the author do in college?",
+});
+```
+
+### 2. Customizing submodules prompt
+
+The second method is that most of the modules in LlamaIndex have a `getPrompts` and a `updatePrompt` method that allows you to override the default prompt. This method is useful when you want to change the prompt on the fly or in submodules on a more granular level.
+
+```ts
+// Create index
+const index = await VectorStoreIndex.fromDocuments([document]);
+
+// Query the index
+const queryEngine = index.asQueryEngine();
+
+// Get a list of prompts for the query engine
+const prompts = queryEngine.getPrompts();
+
+// output: { "responseSynthesizer:textQATemplate": defaultTextQaPrompt, "responseSynthesizer:refineTemplate": defaultRefineTemplatePrompt }
+
+// Now, we can override the default prompt
+queryEngine.updatePrompt({
+  "responseSynthesizer:textQATemplate": newTextQaPrompt,
+});
+
+const response = await queryEngine.query({
+  query: "What did the author do in college?",
+});
+```
+
+## API Reference
+
+- [ResponseSynthesizer](/docs/api/classes/ResponseSynthesizer)
+- [CompactAndRefine](/docs/api/classes/CompactAndRefine)
diff --git a/apps/next/src/content/docs/llamaindex/modules/query_engines/index.mdx b/apps/next/src/content/docs/llamaindex/modules/query_engines/index.mdx
new file mode 100644
index 0000000000..1b5f7ef7f2
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/query_engines/index.mdx
@@ -0,0 +1,43 @@
+---
+title: QueryEngine
+---
+
+A query engine wraps a `Retriever` and a `ResponseSynthesizer` into a pipeline, that will use the query string to fetch nodes and then send them to the LLM to generate a response.
+
+```typescript
+const queryEngine = index.asQueryEngine();
+const response = await queryEngine.query({ query: "query string" });
+```
+
+The `query` function also supports streaming, just add `stream: true` as an option:
+
+```typescript
+const stream = await queryEngine.query({ query: "query string", stream: true });
+for await (const chunk of stream) {
+  process.stdout.write(chunk.response);
+}
+```
+
+## Sub Question Query Engine
+
+The basic concept of the Sub Question Query Engine is that it splits a single query into multiple queries, gets an answer for each of those queries, and then combines those different answers into a single coherent response for the user. You can think of it as the "think this through step by step" prompt technique but iterating over your data sources!
+
+### Getting Started
+
+The easiest way to start trying the Sub Question Query Engine is running the subquestion.ts file in [examples](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/subquestion.ts).
+
+```bash
+npx ts-node subquestion.ts
+```
+
+### Tools
+
+SubQuestionQueryEngine is implemented with Tools. The basic idea of Tools is that they are executable options for the large language model. In this case, our SubQuestionQueryEngine relies on QueryEngineTool, which as you guessed it is a tool to run queries on a QueryEngine. This allows us to give the model an option to query different documents for different questions for example. You could also imagine that the SubQuestionQueryEngine could use a Tool that searches for something on the web or gets an answer using Wolfram Alpha.
+
+You can learn more about Tools by taking a look at the LlamaIndex Python documentation https://gpt-index.readthedocs.io/en/latest/core_modules/agent_modules/tools/root.html
+
+## API Reference
+
+- [RetrieverQueryEngine](/docs/api/classes/RetrieverQueryEngine)
+- [SubQuestionQueryEngine](/docs/api/classes/SubQuestionQueryEngine)
+- [QueryEngineTool](/docs/api/classes/QueryEngineTool)
diff --git a/apps/next/src/content/docs/llamaindex/modules/query_engines/metadata_filtering.mdx b/apps/next/src/content/docs/llamaindex/modules/query_engines/metadata_filtering.mdx
new file mode 100644
index 0000000000..480edbf1ec
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/query_engines/metadata_filtering.mdx
@@ -0,0 +1,163 @@
+---
+title: Metadata Filtering
+---
+
+Metadata filtering is a way to filter the documents that are returned by a query based on the metadata associated with the documents. This is useful when you want to filter the documents based on some metadata that is not part of the document text.
+
+You can also check our multi-tenancy blog post to see how metadata filtering can be used in a multi-tenant environment. [https://blog.llamaindex.ai/building-multi-tenancy-rag-system-with-llamaindex-0d6ab4e0c44b] (the article uses the Python version of LlamaIndex, but the concepts are the same).
+
+## Setup
+
+Firstly if you haven't already, you need to install the `llamaindex` package:
+
+```bash
+pnpm i llamaindex
+```
+
+Then you can import the necessary modules from `llamaindex`:
+
+```ts
+import {
+  ChromaVectorStore,
+  Document,
+  VectorStoreIndex,
+  storageContextFromDefaults,
+} from "llamaindex";
+
+const collectionName = "dog_colors";
+```
+
+## Creating documents with metadata
+
+You can create documents with metadata using the `Document` class:
+
+```ts
+const docs = [
+  new Document({
+    text: "The dog is brown",
+    metadata: {
+      color: "brown",
+      dogId: "1",
+    },
+  }),
+  new Document({
+    text: "The dog is red",
+    metadata: {
+      color: "red",
+      dogId: "2",
+    },
+  }),
+];
+```
+
+## Creating a ChromaDB vector store
+
+You can create a `ChromaVectorStore` to store the documents:
+
+```ts
+const chromaVS = new ChromaVectorStore({ collectionName });
+
+const storageContext = await storageContextFromDefaults({
+  vectorStore: chromaVS,
+});
+
+const index = await VectorStoreIndex.fromDocuments(docs, {
+  storageContext: storageContext,
+});
+```
+
+## Querying the index with metadata filtering
+
+Now you can query the index with metadata filtering using the `preFilters` option:
+
+```ts
+const queryEngine = index.asQueryEngine({
+  preFilters: {
+    filters: [
+      {
+        key: "dogId",
+        value: "2",
+        operator: "==",
+      },
+    ],
+  },
+});
+
+const response = await queryEngine.query({
+  query: "What is the color of the dog?",
+});
+
+console.log(response.toString());
+```
+
+Besides using the equal operator (`==`), you can also use a whole set of different [operators](/docs/api/interfaces/MetadataFilter#operator) to filter your documents.
+
+## Full Code
+
+```ts
+import {
+  ChromaVectorStore,
+  Document,
+  VectorStoreIndex,
+  storageContextFromDefaults,
+} from "llamaindex";
+
+const collectionName = "dog_colors";
+
+async function main() {
+  try {
+    const docs = [
+      new Document({
+        text: "The dog is brown",
+        metadata: {
+          color: "brown",
+          dogId: "1",
+        },
+      }),
+      new Document({
+        text: "The dog is red",
+        metadata: {
+          color: "red",
+          dogId: "2",
+        },
+      }),
+    ];
+
+    console.log("Creating ChromaDB vector store");
+    const chromaVS = new ChromaVectorStore({ collectionName });
+    const ctx = await storageContextFromDefaults({ vectorStore: chromaVS });
+
+    console.log("Embedding documents and adding to index");
+    const index = await VectorStoreIndex.fromDocuments(docs, {
+      storageContext: ctx,
+    });
+
+    console.log("Querying index");
+    const queryEngine = index.asQueryEngine({
+      preFilters: {
+        filters: [
+          {
+            key: "dogId",
+            value: "2",
+            operator: "==",
+          },
+        ],
+      },
+    });
+    const response = await queryEngine.query({
+      query: "What is the color of the dog?",
+    });
+    console.log(response.toString());
+  } catch (e) {
+    console.error(e);
+  }
+}
+
+main();
+```
+
+## API Reference
+
+- [VectorStoreIndex](/docs/api/classes/VectorStoreIndex)
+- [ChromaVectorStore](/docs/api/classes/ChromaVectorStore)
+- [MetadataFilter](/docs/api/interfaces/MetadataFilter)
diff --git a/apps/next/src/content/docs/llamaindex/modules/query_engines/router_query_engine.mdx b/apps/next/src/content/docs/llamaindex/modules/query_engines/router_query_engine.mdx
new file mode 100644
index 0000000000..0cd4dc0001
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/query_engines/router_query_engine.mdx
@@ -0,0 +1,173 @@
+---
+title: Router Query Engine
+---
+
+In this tutorial, we define a custom router query engine that selects one out of several candidate query engines to execute a query.
+
+## Setup
+
+First, we need to install import the necessary modules from `llamaindex`:
+
+```bash
+pnpm i lamaindex
+```
+
+```ts
+import {
+  OpenAI,
+  RouterQueryEngine,
+  SimpleDirectoryReader,
+  SentenceSplitter,
+  SummaryIndex,
+  VectorStoreIndex,
+  Settings,
+} from "llamaindex";
+```
+
+## Loading Data
+
+Next, we need to load some data. We will use the `SimpleDirectoryReader` to load documents from a directory:
+
+```ts
+const documents = await new SimpleDirectoryReader().loadData({
+  directoryPath: "node_modules/llamaindex/examples",
+});
+```
+
+## Service Context
+
+Next, we need to define some basic rules and parse the documents into nodes. We will use the `SentenceSplitter` to parse the documents into nodes and `Settings` to define the rules (eg. LLM API key, chunk size, etc.):
+
+```ts
+Settings.llm = new OpenAI();
+Settings.nodeParser = new SentenceSplitter({
+  chunkSize: 1024,
+});
+```
+
+## Creating Indices
+
+Next, we need to create some indices. We will create a `VectorStoreIndex` and a `SummaryIndex`:
+
+```ts
+const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
+const summaryIndex = await SummaryIndex.fromDocuments(documents);
+```
+
+## Creating Query Engines
+
+Next, we need to create some query engines. We will create a `VectorStoreQueryEngine` and a `SummaryQueryEngine`:
+
+```ts
+const vectorQueryEngine = vectorIndex.asQueryEngine();
+const summaryQueryEngine = summaryIndex.asQueryEngine();
+```
+
+## Creating a Router Query Engine
+
+Next, we need to create a router query engine. We will use the `RouterQueryEngine` to create a router query engine:
+
+We're defining two query engines, one for summarization and one for retrieving specific context. The router query engine will select the most appropriate query engine based on the query.
+
+```ts
+const queryEngine = RouterQueryEngine.fromDefaults({
+  queryEngineTools: [
+    {
+      queryEngine: vectorQueryEngine,
+      description: "Useful for summarization questions related to Abramov",
+    },
+    {
+      queryEngine: summaryQueryEngine,
+      description: "Useful for retrieving specific context from Abramov",
+    },
+  ],
+});
+```
+
+## Querying the Router Query Engine
+
+Finally, we can query the router query engine:
+
+```ts
+const summaryResponse = await queryEngine.query({
+  query: "Give me a summary about his past experiences?",
+});
+
+console.log({
+  answer: summaryResponse.response,
+  metadata: summaryResponse?.metadata?.selectorResult,
+});
+```
+
+## Full code
+
+```ts
+import {
+  OpenAI,
+  RouterQueryEngine,
+  SimpleDirectoryReader,
+  SentenceSplitter,
+  SummaryIndex,
+  VectorStoreIndex,
+  Settings,
+} from "llamaindex";
+
+Settings.llm = new OpenAI();
+Settings.nodeParser = new SentenceSplitter({
+  chunkSize: 1024,
+});
+
+async function main() {
+  // Load documents from a directory
+  const documents = await new SimpleDirectoryReader().loadData({
+    directoryPath: "node_modules/llamaindex/examples",
+  });
+
+  // Create indices
+  const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
+  const summaryIndex = await SummaryIndex.fromDocuments(documents);
+
+  // Create query engines
+  const vectorQueryEngine = vectorIndex.asQueryEngine();
+  const summaryQueryEngine = summaryIndex.asQueryEngine();
+
+  // Create a router query engine
+  const queryEngine = RouterQueryEngine.fromDefaults({
+    queryEngineTools: [
+      {
+        queryEngine: vectorQueryEngine,
+        description: "Useful for summarization questions related to Abramov",
+      },
+      {
+        queryEngine: summaryQueryEngine,
+        description: "Useful for retrieving specific context from Abramov",
+      },
+    ],
+  });
+
+  // Query the router query engine
+  const summaryResponse = await queryEngine.query({
+    query: "Give me a summary about his past experiences?",
+  });
+
+  console.log({
+    answer: summaryResponse.response,
+    metadata: summaryResponse?.metadata?.selectorResult,
+  });
+
+  const specificResponse = await queryEngine.query({
+    query: "Tell me about abramov first job?",
+  });
+
+  console.log({
+    answer: specificResponse.response,
+    metadata: specificResponse.metadata.selectorResult,
+  });
+}
+
+main().then(() => console.log("Done"));
+```
+
+## API Reference
+
+- [RouterQueryEngine](/docs/api/classes/RouterQueryEngine)
diff --git a/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx b/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx
new file mode 100644
index 0000000000..88e3509307
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx
@@ -0,0 +1,62 @@
+---
+title: ResponseSynthesizer
+---
+
+The ResponseSynthesizer is responsible for sending the query, nodes, and prompt templates to the LLM to generate a response. There are a few key modes for generating a response:
+
+- `Refine`: "create and refine" an answer by sequentially going through each retrieved text chunk.
+  This makes a separate LLM call per Node. Good for more detailed answers.
+- `CompactAndRefine` (default): "compact" the prompt during each LLM call by stuffing as
+  many text chunks that can fit within the maximum prompt size. If there are
+  too many chunks to stuff in one prompt, "create and refine" an answer by going through
+  multiple compact prompts. The same as `refine`, but should result in less LLM calls.
+- `TreeSummarize`: Given a set of text chunks and the query, recursively construct a tree
+  and return the root node as the response. Good for summarization purposes.
+- `SimpleResponseBuilder`: Given a set of text chunks and the query, apply the query to each text
+  chunk while accumulating the responses into an array. Returns a concatenated string of all
+  responses. Good for when you need to run the same query separately against each text
+  chunk.
+
+```typescript
+import { NodeWithScore, ResponseSynthesizer, TextNode } from "llamaindex";
+
+const responseSynthesizer = new ResponseSynthesizer();
+
+const nodesWithScore: NodeWithScore[] = [
+  {
+    node: new TextNode({ text: "I am 10 years old." }),
+    score: 1,
+  },
+  {
+    node: new TextNode({ text: "John is 20 years old." }),
+    score: 0.5,
+  },
+];
+
+const response = await responseSynthesizer.synthesize({
+  query: "What age am I?",
+  nodesWithScore,
+});
+console.log(response.response);
+```
+
+The `synthesize` function also supports streaming, just add `stream: true` as an option:
+
+```typescript
+const stream = await responseSynthesizer.synthesize({
+  query: "What age am I?",
+  nodesWithScore,
+  stream: true,
+});
+for await (const chunk of stream) {
+  process.stdout.write(chunk.response);
+}
+```
+
+## API Reference
+
+- [ResponseSynthesizer](/docs/api/classes/ResponseSynthesizer)
+- [Refine](/docs/api/classes/Refine)
+- [CompactAndRefine](/docs/api/classes/CompactAndRefine)
+- [TreeSummarize](/docs/api/classes/TreeSummarize)
+- [SimpleResponseBuilder](/docs/api/classes/SimpleResponseBuilder)
diff --git a/apps/next/src/content/docs/llamaindex/modules/retriever.mdx b/apps/next/src/content/docs/llamaindex/modules/retriever.mdx
new file mode 100644
index 0000000000..72c7362586
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/retriever.mdx
@@ -0,0 +1,21 @@
+---
+title: Retriever
+---
+
+A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string.
+
+- [VectorIndexRetriever](/docs/api/classes/VectorIndexRetriever) will fetch the top-k most similar nodes. Ideal for dense retrieval to find most relevant nodes.
+- [SummaryIndexRetriever](/docs/api/classes/SummaryIndexRetriever) will fetch all nodes no matter the query. Ideal when complete context is necessary, e.g. analyzing large datasets.
+- [SummaryIndexLLMRetriever](/docs/api/classes/SummaryIndexLLMRetriever) utilizes an LLM to score and filter nodes based on relevancy to the query.
+- [KeywordTableLLMRetriever](/docs/api/classes/KeywordTableLLMRetriever) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
+- [KeywordTableSimpleRetriever](/docs/api/classes/KeywordTableSimpleRetriever) uses a basic frequency-based approach to extract keywords and retrieve nodes.
+- [KeywordTableRAKERetriever](/docs/api/classes/KeywordTableRAKERetriever) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
+
+```typescript
+const retriever = vectorIndex.asRetriever({
+  similarityTopK: 3,
+});
+
+// Fetch nodes!
+const nodesWithScore = await retriever.retrieve({ query: "query string" });
+```
diff --git a/apps/next/src/content/docs/llamaindex/modules/workflows.mdx b/apps/next/src/content/docs/llamaindex/modules/workflows.mdx
new file mode 100644
index 0000000000..63f1765557
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/modules/workflows.mdx
@@ -0,0 +1,170 @@
+---
+title: Workflows
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/workflow/joke.ts";
+
+A `Workflow` in LlamaIndexTS is an event-driven abstraction used to chain together several events. Workflows are made up of `steps`, with each step responsible for handling certain event types and emitting new events.
+
+Workflows in LlamaIndexTS work by defining step functions that handle specific event types and emit new events.
+
+When a step function is added to a workflow, you need to specify the input and optionally the output event types (used for validation). The specification of the input events ensures each step only runs when an accepted event is ready.
+
+You can create a `Workflow` to do anything! Build an agent, a RAG flow, an extraction flow, or anything else you want.
+
+## Getting Started
+
+As an illustrative example, let's consider a naive workflow where a joke is generated and then critiqued.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+There's a few moving pieces here, so let's go through this piece by piece.
+
+### Defining Workflow Events
+
+```typescript
+export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
+```
+
+Events are user-defined classes that extend `WorkflowEvent` and contain arbitrary data provided as template argument. In this case, our workflow relies on a single user-defined event, the `JokeEvent` with a `joke` attribute of type `string`.
+
+### Setting up the Workflow Class
+
+```typescript
+const llm = new OpenAI();
+...
+const jokeFlow = new Workflow({ verbose: true });
+```
+
+Our workflow is implemented by initiating the `Workflow` class. For simplicity, we created a `OpenAI` llm instance.
+
+### Workflow Entry Points
+
+```typescript
+const generateJoke = async (_context: Context, ev: StartEvent) => {
+  const prompt = `Write your best joke about ${ev.data.input}.`;
+  const response = await llm.complete({ prompt });
+  return new JokeEvent({ joke: response.text });
+};
+```
+
+Here, we come to the entry-point of our workflow. While events are user-defined, there are two special-case events, the `StartEvent` and the `StopEvent`. Here, the `StartEvent` signifies where to send the initial workflow input.
+
+The `StartEvent` is a bit of a special object since it can hold arbitrary attributes. Here, we accessed the topic with `ev.data.input`.
+
+At this point, you may have noticed that we haven't explicitly told the workflow what events are handled by which steps.
+
+To do so, we use the `addStep` method which adds a step to the workflow. The first argument is the event type that the step will handle, and the second argument is the previously defined step function:
+
+```typescript
+jokeFlow.addStep(StartEvent, generateJoke);
+```
+
+### Workflow Exit Points
+
+```typescript
+const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
+  const response = await llm.complete({ prompt });
+  return new StopEvent({ result: response.text });
+};
+```
+
+Here, we have our second, and last step, in the workflow. We know its the last step because the special `StopEvent` is returned. When the workflow encounters a returned `StopEvent`, it immediately stops the workflow and returns whatever the result was.
+
+In this case, the result is a string, but it could be a map, array, or any other object.
+
+Don't forget to add the step to the workflow:
+
+```typescript
+jokeFlow.addStep(JokeEvent, critiqueJoke);
+```
+
+### Running the Workflow
+
+```typescript
+const result = await jokeFlow.run("pirates");
+console.log(result.data.result);
+```
+
+Lastly, we run the workflow. The `.run()` method is async, so we use await here to wait for the result.
+
+### Validating Workflows
+
+To tell the workflow what events are produced by each step, you can optionally provide a third argument to `addStep` to specify the output event type:
+
+```typescript
+jokeFlow.addStep(StartEvent, generateJoke, { outputs: JokeEvent });
+jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
+```
+
+To validate a workflow, you need to call the `validate` method:
+
+```typescript
+jokeFlow.validate();
+```
+
+To automatically validate a workflow when you run it, you can set the `validate` flag to `true` at initialization:
+
+```typescript
+const jokeFlow = new Workflow({ verbose: true, validate: true });
+```
+
+## Working with Global Context/State
+
+Optionally, you can choose to use global context between steps. For example, maybe multiple steps access the original `query` input from the user. You can store this in global context so that every step has access.
+
+```typescript
+import { Context } from "@llamaindex/core/workflow";
+
+const query = async (context: Context, ev: MyEvent) => {
+  // get the query from the context
+  const query = context.get("query");
+  // do something with context and event
+  const val = ...
+  const result = ...
+  // store in context
+  context.set("key", val);
+
+  return new StopEvent({ result });
+};
+```
+
+## Waiting for Multiple Events
+
+The context does more than just hold data, it also provides utilities to buffer and wait for multiple events.
+
+For example, you might have a step that waits for a query and retrieved nodes before synthesizing a response:
+
+```typescript
+const synthesize = async (context: Context, ev: QueryEvent | RetrieveEvent) => {
+  const events = context.collectEvents(ev, [QueryEvent | RetrieveEvent]);
+  if (!events) {
+    return;
+  }
+  const prompt = events
+    .map((event) => {
+      if (event instanceof QueryEvent) {
+        return `Answer this query using the context provided: ${event.data.query}`;
+      } else if (event instanceof RetrieveEvent) {
+        return `Context: ${event.data.context}`;
+      }
+      return "";
+    })
+    .join("\n");
+
+  const response = await llm.complete({ prompt });
+  return new StopEvent({ result: response.text });
+};
+```
+
+Using `ctx.collectEvents()` we can buffer and wait for ALL expected events to arrive. This function will only return events (in the requested order) once all events have arrived.
+
+## Manually Triggering Events
+
+Normally, events are triggered by returning another event during a step. However, events can also be manually dispatched using the `ctx.sendEvent(event)` method within a workflow.
+
+## Examples
+
+You can find many useful examples of using workflows in the [examples folder](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/workflow).
diff --git a/apps/next/src/content/docs/llamaindex/recipes/cost-analysis.mdx b/apps/next/src/content/docs/llamaindex/recipes/cost-analysis.mdx
new file mode 100644
index 0000000000..bab0a75236
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/recipes/cost-analysis.mdx
@@ -0,0 +1,16 @@
+---
+title: Cost Analysis
+---
+
+This page shows how to track LLM cost using APIs.
+
+## Callback Manager
+
+The callback manager is a class that manages the callback functions.
+
+You can register `llm-start`, `llm-end`, and `llm-stream` callbacks to the callback manager for tracking the cost.
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../examples/recipes/cost-analysis";
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
diff --git a/apps/next/src/content/docs/llamaindex/recipes/meta.json b/apps/next/src/content/docs/llamaindex/recipes/meta.json
new file mode 100644
index 0000000000..48b2bba593
--- /dev/null
+++ b/apps/next/src/content/docs/llamaindex/recipes/meta.json
@@ -0,0 +1,4 @@
+{
+  "title": "Recipes",
+  "pages": ["cost-analysis"]
+}
diff --git a/apps/next/src/content/docs/llamaindex/what-is-llamaindex.mdx b/apps/next/src/content/docs/llamaindex/what-is-llamaindex.mdx
deleted file mode 100644
index b673ccb392..0000000000
--- a/apps/next/src/content/docs/llamaindex/what-is-llamaindex.mdx
+++ /dev/null
@@ -1,24 +0,0 @@
----
-title: What is LlamaIndex.TS
-description: LlamaIndex is the leading data framework for building LLM applications
----
-
-import {
-  SiNodedotjs,
-  SiDeno,
-  SiBun,
-  SiCloudflareworkers,
-} from "@icons-pack/react-simple-icons";
-
-LlamaIndex is a framework for building context-augmented generative AI applications with LLMs including agents and workflows.
-
-The TypeScript implementation is designed for JavaScript server side applications using <SiNodedotjs className="inline" color="#5FA04E" /> Node.js, <SiDeno className="inline" color="#70FFAF" /> Deno, <SiBun className="inline" /> Bun, <SiCloudflareworkers className="inline" color="#F38020" /> Cloudflare Workers, and more.
-
-LlamaIndex.TS provides tools for beginners, advanced users, and everyone in between.
-
-<iframe
-  className="w-full h-[440px]"
-  aria-label="LlamaIndex.TS Starter"
-  aria-description="This is a starter example for LlamaIndex.TS, it shows the basic usage of the library."
-  src="https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples?file=starter.ts"
-/>