run-llama · himself65 · Dec 18, 2024 · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024
diff --git a/apps/next/scripts/migrate-docs.mjs b/apps/next/scripts/migrate-docs.mjs
@@ -0,0 +1,48 @@
+/**
+ * This script is used to migrate the docs from the old format to the new format.
+ * It will rename all .md files to .mdx and add frontmatter to the top of the file.
+ * It will also update all links to the correct format.
+ * It's run in local only to reduce time mannually copying the docs.
+ * After running the script, you also need to manually fix some contents (e.g: update CodeBlock, CodeSource, etc.)
+ */
+
+import { generateFiles } from "fumadocs-typescript";
+import * as path from "node:path";
+
+const out = "./src/content/docs/llamaindex";
+
+void generateFiles({
+  input: ["../docs/docs/modules/**/*.md", "../docs/docs/modules/**/*.mdx"],
+  output: (file) =>
+    path.resolve(
+      path.join(out, path.dirname(file).replace("../docs/docs/", "")),
+      `${path.basename(file).split(".")[0]}.mdx`, // rename .md to .mdx
+    ),
+  transformOutput,
+});
+
+// Replace h1 title with frontmatter title, update all links
+// Example: # LLM -> --- title: LLM ---
+function transformOutput(filePath, content) {
+  const lines = content.split("\n");
+  const h1Index = lines.findIndex((line) => /^# /.test(line));
+  const title = lines[h1Index].replace("# ", "").trim();
+  const mdxLines = [
+    `---`,
+    `title: ${title}`,
+    `---`,
+    ...lines.slice(h1Index + 1),
+  ];
+  const mdxContent = mdxLines.join("\n");
+
+  // update all links, remove .md and replace ../../api (or ../api, ./api, ...) with /docs/api
+  // eg: [SentenceSplitter](../api/classes/SentenceSplitter.md) -> [SentenceSplitter](/docs/api/classes/SentenceSplitter)
+  const result = mdxContent.replace(
+    /\]\((\.{0,2}\/)*api\/([^)]+)\.md([^)]*)\)/g,
+    (match, prefix, path, anchor) => {
+      return `](/docs/api/${path}${anchor})`;
+    },
+  );
+
+  return result;
+}
diff --git a/apps/next/src/content/docs/llamaindex/meta.json b/apps/next/src/content/docs/llamaindex/meta.json
@@ -10,6 +10,7 @@
     "starter",
     "loading",
     "guide",
-    "integration"
+    "integration",
+    "modules"
   ]
 }
diff --git a/apps/next/src/content/docs/llamaindex/modules/agent/index.mdx b/apps/next/src/content/docs/llamaindex/modules/agent/index.mdx
@@ -0,0 +1,31 @@
+---
+title: Agents
+---
+
+An “agent” is an automated reasoning and decision engine. It takes in a user input/query and can make internal decisions for executing that query in order to return the correct result. The key agent components can include, but are not limited to:
+
+- Breaking down a complex question into smaller ones
+- Choosing an external Tool to use + coming up with parameters for calling the Tool
+- Planning out a set of tasks
+- Storing previously completed tasks in a memory module
+
+## Getting Started
+
+LlamaIndex.TS comes with a few built-in agents, but you can also create your own. The built-in agents include:
+
+- OpenAI Agent
+- Anthropic Agent both via Anthropic and Bedrock (in `@llamaIndex/community`)
+- Gemini Agent
+- ReACT Agent
+- Meta3.1 504B via Bedrock (in `@llamaIndex/community`)
+
+## Examples
+
+- [OpenAI Agent](../../examples/agent.mdx)
+- [Gemini Agent](../../examples/agent_gemini.mdx)
+
+## Api References
+
+- [OpenAIAgent](/docs/api/classes/OpenAIAgent)
+- [AnthropicAgent](/docs/api/classes/AnthropicAgent)
+- [ReActAgent](/docs/api/classes/ReActAgent)
diff --git a/apps/next/src/content/docs/llamaindex/modules/chat_engine.mdx b/apps/next/src/content/docs/llamaindex/modules/chat_engine.mdx
@@ -0,0 +1,28 @@
+---
+title: ChatEngine
+---
+
+The chat engine is a quick and simple way to chat with the data in your index.
+
+```typescript
+const retriever = index.asRetriever();
+const chatEngine = new ContextChatEngine({ retriever });
+
+// start chatting
+const response = await chatEngine.chat({ message: query });
+```
+
+The `chat` function also supports streaming, just add `stream: true` as an option:
+
+```typescript
+const stream = await chatEngine.chat({ message: query, stream: true });
+for await (const chunk of stream) {
+  process.stdout.write(chunk.response);
+}
+```
+
+## Api References
+
+- [ContextChatEngine](/docs/api/classes/ContextChatEngine)
+- [CondenseQuestionChatEngine](/docs/api/classes/ContextChatEngine)
+- [SimpleChatEngine](/docs/api/classes/SimpleChatEngine)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_index.mdx
@@ -0,0 +1,23 @@
+---
+title: Index
+---
+
+An index is the basic container and organization for your data. LlamaIndex.TS supports two indexes:
+
+- `VectorStoreIndex` - will send the top-k `Node`s to the LLM when generating a response. The default top-k is 2.
+- `SummaryIndex` - will send every `Node` in the index to the LLM in order to generate a response
+- `KeywordTableIndex` extracts and provides keywords from `Node`s to the LLM
+
+```typescript
+import { Document, VectorStoreIndex } from "llamaindex";
+
+const document = new Document({ text: "test" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## API Reference
+
+- [SummaryIndex](/docs/api/classes/SummaryIndex)
+- [VectorStoreIndex](/docs/api/classes/VectorStoreIndex)
+- [KeywordTableIndex](/docs/api/classes/KeywordTableIndex)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/discord.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/discord.mdx
@@ -0,0 +1,36 @@
+---
+title: DiscordReader
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/readers/src/discord";
+
+DiscordReader is a simple data loader that reads all messages in a given Discord channel and returns them as Document objects.
+It uses the [@discordjs/rest](https://github.com/discordjs/discord.js/tree/main/packages/rest) library to fetch the messages.
+
+## Usage
+
+First step is to create a Discord Application and generating a bot token [here](https://discord.com/developers/applications).
+In your Discord Application, go to the `OAuth2` tab and generate an invite URL by selecting `bot` and click `Read Messages/View Channels` as wells as `Read Message History`.
+This will invite the bot with the necessary permissions to read messages.
+Copy the URL in your browser and select the server you want your bot to join.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+### Params
+
+#### DiscordReader()
+
+- `discordToken?`: The Discord bot token.
+- `requestHandler?`: Optionally provide a custom request function for edge environments, e.g. `fetch`. See discord.js for more info.
+
+#### DiscordReader.loadData
+
+- `channelIDs`: The ID(s) of discord channels as an array of strings.
+- `limit?`: Optionally limit the number of messages to read
+- `additionalInfo?`: An optional flag to include embedded messages and attachment urls in the document.
+- `oldestFirst?`: An optional flag to return the oldest messages first.
+
+## API Reference
+
+- [DiscordReader](/docs/api/classes/DiscordReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx
@@ -0,0 +1,58 @@
+---
+title: Loader
+---
+
+import { DynamicCodeBlock } from 'fumadocs-ui/components/dynamic-codeblock';
+import CodeSource from "!raw-loader!../../../../../../../../examples/readers/src/simple-directory-reader";
+import CodeSource2 from "!raw-loader!../../../../../../../../examples/readers/src/custom-simple-directory-reader";
+
+Before you can start indexing your documents, you need to load them into memory.
+
+All "basic" data loaders can be seen below, mapped to their respective filetypes in `SimpleDirectoryReader`. More loaders are shown in the sidebar on the left.
+Additionally the following loaders exist without separate documentation:
+
+- `AssemblyAIReader` transcribes audio using [AssemblyAI](https://www.assemblyai.com/).
+  - [AudioTranscriptReader](/docs/api/classes/AudioTranscriptReader): loads entire transcript as a single document.
+  - [AudioTranscriptParagraphsReader](/docs/api/classes/AudioTranscriptParagraphsReader): creates a document per paragraph.
+  - [AudioTranscriptSentencesReader](/docs/api/classes/AudioTranscriptSentencesReader): creates a document per sentence.
+  - [AudioSubtitlesReader](/docs/api/classes/AudioTranscriptParagraphsReader): creates a document containing the subtitles of a transcript.
+- [NotionReader](/docs/api/classes/NotionReader) loads [Notion](https://www.notion.so/) pages.
+- [SimpleMongoReader](../../api/classes/SimpleMongoReader) loads data from a [MongoDB](https://www.mongodb.com/).
+
+Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
+
+## SimpleDirectoryReader
+
+[![Open in StackBlitz](https://developer.stackblitz.com/img/open_in_stackblitz.svg)](https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples/readers?file=src/simple-directory-reader.ts&title=Simple%20Directory%20Reader)
+
+LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class.
+
+It is a simple reader that reads all files from a directory and its subdirectories.
+
+<DynamicCodeBlock lang="ts" code={CodeSource} />
+
+Currently, the following readers are mapped to specific file types:
+
+- [TextFileReader](/docs/api/classes/TextFileReader): `.txt`
+- [PDFReader](/docs/api/classes/PDFReader): `.pdf`
+- [PapaCSVReader](/docs/api/classes/PapaCSVReader): `.csv`
+- [MarkdownReader](/docs/api/classes/MarkdownReader): `.md`
+- [DocxReader](/docs/api/classes/DocxReader): `.docx`
+- [HTMLReader](/docs/api/classes/HTMLReader): `.htm`, `.html`
+- [ImageReader](/docs/api/classes/ImageReader): `.jpg`, `.jpeg`, `.png`, `.gif`
+
+You can modify the reader three different ways:
+
+- `overrideReader` overrides the reader for all file types, including unsupported ones.
+- `fileExtToReader` maps a reader to a specific file type. Can override reader for existing file types or add support for new file types.
+- `defaultReader` sets a fallback reader for files with unsupported extensions. By default it is `TextFileReader`.
+
+SimpleDirectoryReader supports up to 9 concurrent requests. Use the `numWorkers` option to set the number of concurrent requests. By default it runs in sequential mode, i.e. set to 1.
+
+### Example
+
+<DynamicCodeBlock lang="ts" code={CodeSource2} />
+
+## API Reference
+
+- [SimpleDirectoryReader](/docs/api/classes/SimpleDirectoryReader)
diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/json.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/json.mdx
@@ -0,0 +1,149 @@
+---
+title: JSONReader
+---
+
+A simple JSON data loader with various options.
+Either parses the entire string, cleaning it and treat each line as an embedding or performs a recursive depth-first traversal yielding JSON paths.
+Supports streaming of large JSON data using [@discoveryjs/json-ext](https://github.com/discoveryjs/json-ext)
+
+## Usage
+
+```ts
+import { JSONReader } from "llamaindex";
+
+const file = "../../PATH/TO/FILE";
+const content = new TextEncoder().encode("JSON_CONTENT");
+
+const reader = new JSONReader({ levelsBack: 0, collapseLength: 100 });
+const docsFromFile = reader.loadData(file);
+const docsFromContent = reader.loadDataAsContent(content);
+```
+
+### Options
+
+Basic:
+
+- `streamingThreshold?`: The threshold for using streaming mode in MB of the JSON Data. CEstimates characters by calculating bytes: `(streamingThreshold * 1024 * 1024) / 2` and comparing against `.length` of the JSON string. Set `undefined` to disable streaming or `0` to always use streaming. Default is `50` MB.
+
+- `ensureAscii?`: Wether to ensure only ASCII characters be present in the output by converting non-ASCII characters to their unicode escape sequence. Default is `false`.
+
+- `isJsonLines?`: Wether the JSON is in JSON Lines format. If true, will split into lines, remove empty one and parse each line as JSON. Note: Uses a custom streaming parser, most likely less robust than json-ext. Default is `false`
+
+- `cleanJson?`: Whether to clean the JSON by filtering out structural characters (`{}, [], and ,`). If set to false, it will just parse the JSON, not removing structural characters. Default is `true`.
+
+- `logger?`: A placeholder for a custom logger function.
+
+Depth-First-Traversal:
+
+- `levelsBack?`: Specifies how many levels up the JSON structure to include in the output. `cleanJson` will be ignored. If set to 0, all levels are included. If undefined, parses the entire JSON, treat each line as an embedding and create a document per top-level array. Default is `undefined`
+
+- `collapseLength?`: The maximum length of JSON string representation to be collapsed into a single line. Only applicable when `levelsBack` is set. Default is `undefined`
+
+#### Examples
+
+Input:
+
+```json
+{"a": {"1": {"key1": "value1"}, "2": {"key2": "value2"}}, "b": {"3": {"k3": "v3"}, "4": {"k4": "v4"}}}
+```
+
+Default options:
+
+`LevelsBack` = `undefined` & `cleanJson` = `true`
+
+Output:
+
+```json
+"a": {
+"1": {
+"key1": "value1"
+"2": {
+"key2": "value2"
+"b": {
+"3": {
+"k3": "v3"
+"4": {
+"k4": "v4"
+```
+
+Depth-First Traversal all levels:
+
+`levelsBack` = `0`
+
+Output:
+
+```json
+a 1 key1 value1
+a 2 key2 value2
+b 3 k3 v3
+b 4 k4 v4
+```
+
+Depth-First Traversal and Collapse:
+
+`levelsBack` = `0` & `collapseLength` = `35`
+
+Output:
+
+```json
+a 1 {"key1":"value1"}
+a 2 {"key2":"value2"}
+b {"3":{"k3":"v3"},"4":{"k4":"v4"}}
+```
+
+Depth-First Traversal limited levels:
+
+`levelsBack` = `2`
+
+Output:
+
+```json
+1 key1 value1
+2 key2 value2
+3 k3 v3
+4 k4 v4
+```
+
+Uncleaned JSON:
+
+`levelsBack` = `undefined` & `cleanJson` = `false`
+
+Output:
+
+```json
+{"a":{"1":{"key1":"value1"},"2":{"key2":"value2"}},"b":{"3":{"k3":"v3"},"4":{"k4":"v4"}}}
+```
+
+ASCII-Conversion:
+
+Input:
+
+```json
+{ "message": "こんにちは世界" }
+```
+
+Output:
+
+```json
+"message": "\u3053\u3093\u306b\u3061\u306f\u4e16\u754c"
+```
+
+JSON Lines Format:
+
+Input:
+
+```json
+{"tweet": "Hello world"}\n{"tweet": "こんにちは世界"}
+```
+
+Output:
+
+```json
+"tweet": "Hello world"
+
+"tweet": "こんにちは世界"
+```
+
+## API Reference
+
+- [JSONReader](/docs/api/classes/JSONReader)