From 20545ec01cb1ffc2e9dac59de7cd7f19dbae2121 Mon Sep 17 00:00:00 2001 From: jingyi Date: Sat, 23 Nov 2024 04:16:39 -0800 Subject: [PATCH] feat: data connector for obsidian note taking app --- examples/readers/package.json | 3 ++- examples/readers/src/obsidian.ts | 11 +++++++++++ packages/readers/package.json | 19 +++++++++++++++++++ packages/readers/src/obsidian.ts | 31 +++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 examples/readers/src/obsidian.ts create mode 100644 packages/readers/src/obsidian.ts diff --git a/examples/readers/package.json b/examples/readers/package.json index 4cc21d9429..092449c8eb 100644 --- a/examples/readers/package.json +++ b/examples/readers/package.json @@ -15,7 +15,8 @@ "start:llamaparse-dir": "node --import tsx ./src/simple-directory-reader-with-llamaparse.ts", "start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts", "start:discord": "node --import tsx ./src/discord.ts", - "start:json": "node --import tsx ./src/json.ts" + "start:json": "node --import tsx ./src/json.ts", + "start:obsidian": "node --import tsx ./src/obsidian.ts" }, "dependencies": { "@llamaindex/readers": "*", diff --git a/examples/readers/src/obsidian.ts b/examples/readers/src/obsidian.ts new file mode 100644 index 0000000000..ea9e17703d --- /dev/null +++ b/examples/readers/src/obsidian.ts @@ -0,0 +1,11 @@ +import { ObsidianReader } from "@llamaindex/readers/obsidian"; + +const obsidianReader = new ObsidianReader( + "/home/jingyi/Documents/jingyi-vault", +); + +obsidianReader.loadData().then((documents) => { + documents.forEach((doc) => { + console.log(`document (${doc.id_}):`, doc.getText()); + }); +}); diff --git a/packages/readers/package.json b/packages/readers/package.json index 4fe1ff4f9f..124c6c8950 100644 --- a/packages/readers/package.json +++ b/packages/readers/package.json @@ -222,6 +222,24 @@ "default": "./notion/dist/index.js" } }, + "./obsidian": { + "edge-light": { + "types": "./obsidian/dist/index.edge-light.d.ts", + "default": "./obsidian/dist/index.edge-light.js" + }, + "workerd": { + "types": "./obsidian/dist/index.workerd.d.ts", + "default": "./obsidian/dist/index.workerd.js" + }, + "require": { + "types": "./obsidian/dist/index.d.cts", + "default": "./obsidian/dist/index.cjs" + }, + "import": { + "types": "./obsidian/dist/index.d.ts", + "default": "./obsidian/dist/index.js" + } + }, "./pdf": { "edge-light": { "types": "./pdf/dist/index.edge-light.d.ts", @@ -272,6 +290,7 @@ "markdown", "mongo", "notion", + "obsidian", "pdf", "text", "node" diff --git a/packages/readers/src/obsidian.ts b/packages/readers/src/obsidian.ts new file mode 100644 index 0000000000..5aac27c4a2 --- /dev/null +++ b/packages/readers/src/obsidian.ts @@ -0,0 +1,31 @@ +import { type BaseReader, Document } from "@llamaindex/core/schema"; +import * as fs from "node:fs"; +import path from "node:path"; +import { MarkdownReader } from "./markdown"; + +export class ObsidianReader implements BaseReader { + protected inputDir: string; + protected docs: Document[] = []; + + constructor(inputDir: string) { + this.inputDir = inputDir; + } + + private traverse = async (dir: string) => { + const files = await fs.promises.readdir(dir, { withFileTypes: true }); + for (const file of files) { + const filepath = path.join(dir, file.name); + if (file.isDirectory() && !file.name.startsWith(".")) { + await this.traverse(filepath); + } else if (file.isFile() && file.name.endsWith(".md")) { + const content = await new MarkdownReader().loadData(filepath); + this.docs.push(...content); + } + } + }; + + async loadData(): Promise { + await this.traverse(this.inputDir); + return this.docs; + } +}