From dced862f3741f29ea200cd32971be303f87fd98c Mon Sep 17 00:00:00 2001 From: jingyi Date: Thu, 28 Nov 2024 13:38:06 -0500 Subject: [PATCH] chore: refactor the code in obsidian reader to make if else condition top level --- examples/readers/package.json | 3 ++- examples/readers/src/obsidian.ts | 12 ++++++++++ packages/readers/package.json | 19 +++++++++++++++ packages/readers/src/obsidian.ts | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 examples/readers/src/obsidian.ts create mode 100644 packages/readers/src/obsidian.ts diff --git a/examples/readers/package.json b/examples/readers/package.json index 4cc21d9429..092449c8eb 100644 --- a/examples/readers/package.json +++ b/examples/readers/package.json @@ -15,7 +15,8 @@ "start:llamaparse-dir": "node --import tsx ./src/simple-directory-reader-with-llamaparse.ts", "start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts", "start:discord": "node --import tsx ./src/discord.ts", - "start:json": "node --import tsx ./src/json.ts" + "start:json": "node --import tsx ./src/json.ts", + "start:obsidian": "node --import tsx ./src/obsidian.ts" }, "dependencies": { "@llamaindex/readers": "*", diff --git a/examples/readers/src/obsidian.ts b/examples/readers/src/obsidian.ts new file mode 100644 index 0000000000..7fc233d32c --- /dev/null +++ b/examples/readers/src/obsidian.ts @@ -0,0 +1,12 @@ +import { ObsidianReader } from "@llamaindex/readers/obsidian"; + +const obsidianReader = new ObsidianReader( + "/Users/jingyi/Documents/jingyi-vault", +); + +obsidianReader.loadData().then((documents) => { + console.log("documents:", documents.length); + documents.forEach((doc) => { + console.log(`document (${doc.id_}):`, doc.getText()); + }); +}); diff --git a/packages/readers/package.json b/packages/readers/package.json index 4fe1ff4f9f..fb85db5978 100644 --- a/packages/readers/package.json +++ b/packages/readers/package.json @@ -222,6 +222,24 @@ "default": "./notion/dist/index.js" } }, + "./obsidian": { + "edge-light": { + "types": "./obsidian/dist/index.edge-light.d.ts", + "default": "./obsidian/dist/index.edge-light.js" + }, + "workerd": { + "types": "./obsidian/dist/index.workerd.d.ts", + "default": "./obsidian/dist/index.workerd.js" + }, + "require": { + "types": "./obsidian/dist/index.d.cts", + "default": "./obsidian/dist/index.cjs" + }, + "import": { + "types": "./obsidian/dist/index.d.ts", + "default": "./obsidian/dist/index.js" + } + }, "./pdf": { "edge-light": { "types": "./pdf/dist/index.edge-light.d.ts", @@ -272,6 +290,7 @@ "markdown", "mongo", "notion", + "obsidian", "pdf", "text", "node" diff --git a/packages/readers/src/obsidian.ts b/packages/readers/src/obsidian.ts new file mode 100644 index 0000000000..9913fc75c4 --- /dev/null +++ b/packages/readers/src/obsidian.ts @@ -0,0 +1,41 @@ +import { type BaseReader, Document } from "@llamaindex/core/schema"; +import * as fs from "node:fs"; +import path from "node:path"; +import { MarkdownReader } from "./markdown"; + +export class ObsidianReader implements BaseReader { + protected inputDir: string; + protected docs: Document[] = []; + + constructor(inputDir: string) { + this.inputDir = inputDir; + } + + private async processPath(file: fs.Dirent, filepath: string) { + if (file.isDirectory() && !file.name.startsWith(".")) { + await this.readFromPath(filepath); + } else if (file.isFile() && file.name.endsWith(".md")) { + await this.convertToDocuments(filepath); + } else { + console.log(`Skipping ${filepath}`); + } + } + + private async readFromPath(dir: string) { + const files = await fs.promises.readdir(dir, { withFileTypes: true }); + for (const file of files) { + const filepath = path.join(dir, file.name); + await this.processPath(file, filepath); + } + } + + private async convertToDocuments(filepath: string) { + const content = await new MarkdownReader().loadData(filepath); + this.docs.push(...content); + } + + async loadData(): Promise { + await this.readFromPath(this.inputDir); + return this.docs; + } +}