From 45b11ea71b7daf9bb6efaaae71e3518d359beba0 Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Mon, 29 Jul 2024 11:26:09 -0400 Subject: [PATCH 1/7] Add Hello World Sample --- Llamaindex.sln | 7 +++ .../ParseDocuments/ParseDocuments.csproj | 18 ++++++++ .../GettingStarted/ParseDocuments/Program.cs | 45 +++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 samples/GettingStarted/ParseDocuments/ParseDocuments.csproj create mode 100644 samples/GettingStarted/ParseDocuments/Program.cs diff --git a/Llamaindex.sln b/Llamaindex.sln index 07f3b43..aead454 100644 --- a/Llamaindex.sln +++ b/Llamaindex.sln @@ -23,6 +23,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Llamaindex.ServiceDefaults" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LlamaParseAspire", "samples\Aspire\LlamaParseAspire\LlamaParseAspire.csproj", "{F721E5DD-3C0E-41A3-B030-913E4AE187F5}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParseDocuments", "samples\GettingStarted\ParseDocuments\ParseDocuments.csproj", "{B0CD869E-5DAD-4EA7-AB5D-68A3516DF8E1}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -57,6 +59,10 @@ Global {F721E5DD-3C0E-41A3-B030-913E4AE187F5}.Debug|Any CPU.Build.0 = Debug|Any CPU {F721E5DD-3C0E-41A3-B030-913E4AE187F5}.Release|Any CPU.ActiveCfg = Release|Any CPU {F721E5DD-3C0E-41A3-B030-913E4AE187F5}.Release|Any CPU.Build.0 = Release|Any CPU + {B0CD869E-5DAD-4EA7-AB5D-68A3516DF8E1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B0CD869E-5DAD-4EA7-AB5D-68A3516DF8E1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B0CD869E-5DAD-4EA7-AB5D-68A3516DF8E1}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B0CD869E-5DAD-4EA7-AB5D-68A3516DF8E1}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -69,6 +75,7 @@ Global {E6CAE94F-626F-4348-A062-EADA9CABDB7A} = {D41C4A39-8A5E-488C-A2AE-5B164F79B07C} {D31D3C9F-5055-4914-90C6-566E0EA46877} = {D41C4A39-8A5E-488C-A2AE-5B164F79B07C} {F721E5DD-3C0E-41A3-B030-913E4AE187F5} = {D41C4A39-8A5E-488C-A2AE-5B164F79B07C} + {B0CD869E-5DAD-4EA7-AB5D-68A3516DF8E1} = {D41C4A39-8A5E-488C-A2AE-5B164F79B07C} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {A523EAFD-F1D2-429A-97E2-F86406625D67} diff --git a/samples/GettingStarted/ParseDocuments/ParseDocuments.csproj b/samples/GettingStarted/ParseDocuments/ParseDocuments.csproj new file mode 100644 index 0000000..b328e36 --- /dev/null +++ b/samples/GettingStarted/ParseDocuments/ParseDocuments.csproj @@ -0,0 +1,18 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + diff --git a/samples/GettingStarted/ParseDocuments/Program.cs b/samples/GettingStarted/ParseDocuments/Program.cs new file mode 100644 index 0000000..e0fb6f3 --- /dev/null +++ b/samples/GettingStarted/ParseDocuments/Program.cs @@ -0,0 +1,45 @@ +using LlamaParse; +using System.Diagnostics.Contracts; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; + +// Configure LlamaParse client +var apiKey = Environment.GetEnvironmentVariable("LLAMACLOUD_API_KEY"); + +var parseConfig = new Configuration() +{ + ApiKey = apiKey?? string.Empty +}; + +var llamaParseClient = new LlamaParseClient(new HttpClient(), parseConfig); + +// Get document +var client = new HttpClient(); +var documentData = await client.GetByteArrayAsync("https://arxiv.org/pdf/1706.03762"); + +// Parse documents +var document = new InMemoryFile(documentData, "attention-is-all-you-need.pdf"); +var parsedDocs = llamaParseClient.LoadDataRawAsync(document, ResultType.Json); + +// Output parse results +await foreach (var parsedDoc in parsedDocs) +{ + var serializerOptions = new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true + }; + + var result = JsonSerializer.Deserialize(parsedDoc.Result, serializerOptions); + + foreach(var page in result.Pages) + { + Console.WriteLine($"Page {page.Page}"); + Console.WriteLine("-------------------"); + Console.WriteLine(page.Text); + Console.WriteLine("-------------------"); + } +} + +public record ParseResult(PageContent[] Pages); +public record PageContent(int Page, string Text); \ No newline at end of file From 741dea02eb621ec27ee0c2f0471833cb9fdbe868 Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Mon, 29 Jul 2024 11:52:04 -0400 Subject: [PATCH 2/7] Add samples README --- samples/Aspire/README.md | 65 +++++++++++++++++++ .../GettingStarted/ParseDocuments/README.md | 56 ++++++++++++++++ samples/README.md | 9 +++ 3 files changed, 130 insertions(+) create mode 100644 samples/GettingStarted/ParseDocuments/README.md create mode 100644 samples/README.md diff --git a/samples/Aspire/README.md b/samples/Aspire/README.md index 09b2945..12d3b65 100644 --- a/samples/Aspire/README.md +++ b/samples/Aspire/README.md @@ -18,3 +18,68 @@ This sample shows how to add a [LlamaParse](https://docs.llamaindex.ai/en/stable "ApiKey": "ADD-YOUR-KEY-HERE" } ``` + +## Guide + +1. In your Web API project `LlamaParseAspire`, add the following code to register the `LlamaParseClient`. + +```csharp +builder.AddLlamaParseClient(builder.Configuration.GetSection("LlamaParse").Get()!); +``` + +1. Use the `LlamaParseClient` just like you would in any other application. In this case, the `/parse` endpoint handler takes a file as input, uses LlamaParse to extract the data, and returns the parsed results back to the user for further downstream processing. + +```csharp +var fileUploadHandler = async (LlamaParseClient client, IFormFile file) => +{ + var fileName = file.FileName; + + // Read the file into a byte array + using var ms = new MemoryStream(); + file.CopyTo(ms); + + var inMemoryFile = new InMemoryFile(ms.ToArray(), fileName); + + var sb = new StringBuilder(); + await foreach (var doc in client.LoadDataAsync(inMemoryFile)) + { + if(doc is ImageDocument) + { + continue; + } + else + { + sb.AppendLine(doc.Text); + } + } + return Results.Ok(sb.ToString()); +}; +``` + +## Enable telemetry + +The LlamaParse .NET client SDK contains OpenTelemetry instrumentation to log traces and metrics related to LlamaParse jobs. + +To enable it: + +1. Add the following code to the `ConfigureOpenTelemetry` method in the `*.ServiceDefaults` project. + +```csharp +builder.Services.AddOpenTelemetry() + .WithMetrics(metrics => + { + //other metrics code... + + // Add a meter for the LlamaParse namespace + metrics.AddMeter("LlamaParse"); + }) + .WithTracing(tracing => + { + //other tracing code... + + // Add a source for the LlamaParse namespace + tracing.AddSource("LlamaParse"); + }); +``` + +Now that this is configured, traces and metrics will begin to display in the Aspire dasboard. For more details, on [Aspire telemetry](https://learn.microsoft.com/dotnet/aspire/fundamentals/telemetry) and the [dashboard](https://learn.microsoft.com/dotnet/aspire/fundamentals/dashboard/overview?tabs=bash), see the documentation. \ No newline at end of file diff --git a/samples/GettingStarted/ParseDocuments/README.md b/samples/GettingStarted/ParseDocuments/README.md new file mode 100644 index 0000000..8983c34 --- /dev/null +++ b/samples/GettingStarted/ParseDocuments/README.md @@ -0,0 +1,56 @@ +# ParseDocuments Sample + +This samples shows off the basics you need to get started parsing documents in .NET using the LlamaParse .NET client SDK inside of a console application. + +## Prerequisites + +- [.NET 8 SDK](https://dotnet.microsoft.com/download/dotnet/8.0) +- [LlamaCloud API Key](https://docs.cloud.llamaindex.ai/llamacloud/getting_started/api_key) +- [Visual Studio](https://visualstudio.microsoft.com/downloads/) or [Visual Studio Code](https://code.visualstudio.com/Download) + +## Guide + +1. Configure your client + + ```csharp + var apiKey = Environment.GetEnvironmentVariable("LLAMACLOUD_API_KEY"); + + var parseConfig = new Configuration() + { + ApiKey = apiKey?? string.Empty + }; + + var llamaParseClient = new LlamaParseClient(new HttpClient(), parseConfig); + ``` + +1. Use the client to parse your documents. In this case, we're using an `InMemoryFile`, which contains the document data `byte[]` from the paper [Attention is all you need](https://arxiv.org/pdf/1706.03762). For simplicity and further processing, we've opted to get the results in JSON format. + + ```csharp + var document = new InMemoryFile(documentData, "attention-is-all-you-need.pdf"); + var parsedDocs = llamaParseClient.LoadDataRawAsync(document, ResultType.Json); + ``` + +1. Extract parsed results and post-process. In this case, the code just takes the paginated results and prints them out to the console. + + ```csharp + await foreach (var parsedDoc in parsedDocs) + { + var serializerOptions = new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true + }; + + var result = JsonSerializer.Deserialize(parsedDoc.Result, serializerOptions); + + foreach(var page in result.Pages) + { + Console.WriteLine($"Page {page.Page}"); + Console.WriteLine("-------------------"); + Console.WriteLine(page.Text); + Console.WriteLine("-------------------"); + } + } + + public record ParseResult(PageContent[] Pages); + public record PageContent(int Page, string Text); + ``` \ No newline at end of file diff --git a/samples/README.md b/samples/README.md new file mode 100644 index 0000000..d0d8dee --- /dev/null +++ b/samples/README.md @@ -0,0 +1,9 @@ +# LlamaIndex .NET Samples + +This directory contains getting started for LlamaIndex + +| Sample | Type | Description | +| --- | --- | --- | +| [ParseDocuments](../samples/GettingStarted/ParseDocuments/README.md) | Console | Console application that shows the basics of getting started with LlamaParse .NET client SDK | +| [LlamaParseAspire](../samples/Aspire/README.md) | Web (Aspire) | ASP.NET Core Web API with Aspire Orchestration enabled. Uses LlamaParse to parse documents and return response back to users. | + From da33585a38bf4be2d27e71810552f3c5e4c5580f Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Tue, 30 Jul 2024 11:01:14 -0400 Subject: [PATCH 3/7] Added contributing and main README --- CONTRIBUTING.md | 25 +++++++++++++++++++ README.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..45d2129 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,25 @@ +# Contributing to LlamaIndex .NET + +## Issues + +Found bugs or have feature requests? File an issue. + +## Documentation & Samples + +We encourage community submitted samples. All of our samples are in the [samples](./samples/README.md) directory. + +To create new samples, submit a pull request. + +## Development + +### Project Structure + +- `LlamaIndex.Core`: Core types and abstractions for LlamaIndex. +- `LlamaIndex.Core.Tests`: Unit tests for `LlamaIndex.Core`. +- `LlamaParse`: LlamaParse .NET client SDK +- `LlamaParse.Test`: Unit tests for LlamaParse .NET client SDK + +### Configuration + +1. Install [.NET 8 SDK](https://dotnet.microsoft.com/download/dotnet/8.0) +1. Install [Visual Studio](https://visualstudio.microsoft.com/downloads/) or [Visual Studio Code](https://code.visualstudio.com/Download) \ No newline at end of file diff --git a/README.md b/README.md index 5977167..3d8ce7f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,63 @@ -# llamaindex.net -llamaindex interfaces for .net +# LlamaIndex.NET + +LlamaIndex.NET contains core types for working with LlamaIndex and client SDKs. + +At this time, the following are supported: + +- LlamaParse client SDK for .NET + +## What is LlamaIndex? + +[LlamaIndex](https://llamaindex.ai/) is a data framework for LLM applications. + +[LlamaCloud](https://docs.llamaindex.ai/en/stable/llama_cloud/) is a managed platfor for data parsing and ingestion. It consists of the following components: + +- [**LlamaParse**](https://docs.llamaindex.ai/en/stable/llama_cloud/llama_parse/): self-serve document parsing API +- **Ingestion and Retreival API**: Connect to 10+ data sources and sinks. Easily setup a data pipeline that can handle large volumes of data and incremental updates. +- **Evaluations and observability**: Run and track evaluations on your data and model + +## Important Links + +- Documentation: [https://docs.llamaindex.ai/en/stable/](https://docs.llamaindex.ai/en/stable/) +- Twitter: [https://twitter.com/llama_index](https://twitter.com/llama_index) +- Discord: [https://discord.gg/dGcwcsnxhU](https://discord.gg/dGcwcsnxhU) + +## Contributing + +Interested in contributing? See our [Contribution Guide](./CONTRIBUTING.md) for more details. + +## Example Usage + +Install the LlamaParse .NET SDK. + +You can find samples in the [samples directory](./samples/README.md). + +### Parse documents using the LlamaParse .NET SDK + +```csharp +using LlamaParse; + +// Initialize LlamaParse client +var parseConfig = new Configuration +{ + ApiKey = "YOUR-API-KEY"; +}; + +var client = new LlamaParseClient(new HttpClient(), parseConfig); + +// Get file info +var fileInfo = new FileInfo("attention-is-all-you-need.pdf"); + +// Parse document and format result as JSON +var documents = new List(); +await foreach(var document in client.LoadDataRawAsync(fileInfo, ResultType.Json) + { + documents.Add(document); + } + +// Output to console +foreach(var document in documents) +{ + Console.WriteLine(document); +} +``` \ No newline at end of file From b3145697544164fb78f5afe4c92d8de104ff57d0 Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Tue, 30 Jul 2024 11:04:22 -0400 Subject: [PATCH 4/7] Update license --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index b471743..9709184 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Diego Colombo +Copyright (c) 2024 LlamaIndex Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 122152b4d80e3e82492f523fb59aba1a2d5beb31 Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Tue, 30 Jul 2024 11:32:07 -0400 Subject: [PATCH 5/7] Add devcontainer config --- .devcontainer/base/configure.sh | 42 ++++++++++++++++++++++++++++ .devcontainer/base/devcontainer.json | 22 +++++++++++++++ .gitignore | 3 ++ 3 files changed, 67 insertions(+) create mode 100644 .devcontainer/base/configure.sh create mode 100644 .devcontainer/base/devcontainer.json diff --git a/.devcontainer/base/configure.sh b/.devcontainer/base/configure.sh new file mode 100644 index 0000000..4c9870e --- /dev/null +++ b/.devcontainer/base/configure.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Install .NET Aspire Workload +# See documentation for more details +# https://learn.microsoft.com/dotnet/aspire/fundamentals/setup-tooling?tabs=linux&pivots=vscode +if command -v dotnet &> /dev/null +then + echo "dotnet is installed." + + # Specify the workload you want to install + WORKLOAD="aspire" + + # Update workloads + sudo dotnet workload update + + # Install the workload + sudo dotnet workload install $WORKLOAD + + echo "Workload '$WORKLOAD' has been installed." +else + echo "dotnet is not installed. Please install dotnet first." +fi + +# Download data files for examples +#!/bin/bash + +# URL of the file to download +FILE_URL="https://arxiv.org/pdf/1706.03762" + +# Directory where you want to place the downloaded file +TARGET_DIR="./samples/data" + +# Name of the file after downloading +FILE_NAME="attention-is-all-you-need.pdf" + +# Create the target directory if it doesn't exist +mkdir -p $TARGET_DIR + +# Download the file and place it in the target directory +curl -o $TARGET_DIR/$FILE_NAME $FILE_URL + +echo "File downloaded to $TARGET_DIR/$FILE_NAME" diff --git a/.devcontainer/base/devcontainer.json b/.devcontainer/base/devcontainer.json new file mode 100644 index 0000000..32f1e13 --- /dev/null +++ b/.devcontainer/base/devcontainer.json @@ -0,0 +1,22 @@ +{ + "name": "LlamaParse .NET DevContainer", + "image": "mcr.microsoft.com/devcontainers/base:debian", + "features": { + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + "ghcr.io/devcontainers/features/dotnet:2": { + "version": "8.0" + } + }, + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode-remote.vscode-remote-extensionpack", + "ms-azuretools.vscode-docker", + "ms-dotnettools.csharp", + "ms-dotnettools.dotnet-interactive-vscode" + ] + } + }, + "postCreateCommand": "./.devcontainer/base/configure.sh" +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8a30d25..693b74a 100644 --- a/.gitignore +++ b/.gitignore @@ -396,3 +396,6 @@ FodyWeavers.xsd # JetBrains Rider *.sln.iml + +# Don't commit samples data files +/samples/data/* From 15e59a8e58624d8fb0f65222b934c3272ebf8a15 Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Tue, 30 Jul 2024 11:37:25 -0400 Subject: [PATCH 6/7] Add Codespaces and devcontainers badges --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3d8ce7f..4ebd99a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # LlamaIndex.NET +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/colombod/llamaindex.net) + +[![Open in Dev Containers](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://codespaces.new/colombod/llamaindex.net) + LlamaIndex.NET contains core types for working with LlamaIndex and client SDKs. At this time, the following are supported: From 05fe0cec9409669790ef5de8bead986ddd9ab3bc Mon Sep 17 00:00:00 2001 From: luisquintanilla Date: Tue, 30 Jul 2024 12:09:07 -0400 Subject: [PATCH 7/7] Added XML comments --- .../Retrievers/BaseRetriever.cs | 10 ++++++- .../Retrievers/BaseRetrieverClient.cs | 11 ++++++++ .../Schema/BaseNodeConverter.cs | 16 ++++++++++++ src/LlamaIndex.Core/Schema/Document.cs | 7 +++++ src/LlamaIndex.Core/Schema/ImageDocument.cs | 11 ++++++++ src/LlamaIndex.Core/Schema/NodeType.cs | 3 +++ src/LlamaIndex.Core/Schema/RelatedNodeInfo.cs | 6 +++++ .../Schema/RelationshipType.cs | 18 +++++++++++++ src/LlamaIndex.Core/Schema/TextNode.cs | 20 ++++++++++++++ src/LlamaParse/ItemType.cs | 3 +++ src/LlamaParse/Languages.cs | 3 +++ src/LlamaParse/LlamaParseClient.cs | 26 +++++++++++++++++-- src/LlamaParse/LlamaParseClientExtensions.cs | 3 +++ 13 files changed, 134 insertions(+), 3 deletions(-) diff --git a/src/LlamaIndex.Core/Retrievers/BaseRetriever.cs b/src/LlamaIndex.Core/Retrievers/BaseRetriever.cs index 1d3a192..d7c6061 100644 --- a/src/LlamaIndex.Core/Retrievers/BaseRetriever.cs +++ b/src/LlamaIndex.Core/Retrievers/BaseRetriever.cs @@ -5,9 +5,17 @@ namespace LlamaIndex.Core.Retrievers { + /// + /// Provides an abstraction for retreivers that retrieve nodes from a data source. + /// public abstract class BaseRetriever { - + /// + /// Given a query, retrieves nodes from the data source. + /// + /// An input query used to retreive similar nodes. + /// Propagates notification for operations to be cancelled. + /// A collection of nodes. See public Task RetrieveAsync(string query, CancellationToken cancellationToken = default) { return RetrieveNodesAsync(query, cancellationToken); diff --git a/src/LlamaIndex.Core/Retrievers/BaseRetrieverClient.cs b/src/LlamaIndex.Core/Retrievers/BaseRetrieverClient.cs index 632d145..1e4abe0 100644 --- a/src/LlamaIndex.Core/Retrievers/BaseRetrieverClient.cs +++ b/src/LlamaIndex.Core/Retrievers/BaseRetrieverClient.cs @@ -10,8 +10,19 @@ namespace LlamaIndex.Core.Retrievers; +/// +/// A client for retrieving nodes from a data source. +/// +/// The URI host for the data source +/// The name of the collection where nodes are stored. public class RetrieverClient(Uri host, string vectorDbCollectionName) : BaseRetriever { + /// + /// Retrieves nodes from the data source. + /// + /// An input query used to retreive similar nodes. + /// Propagates notification for operations to be cancelled. + /// A collection of nodes. See protected override async Task RetrieveNodesAsync(string query, CancellationToken cancellationToken) { var client = new HttpClient(); diff --git a/src/LlamaIndex.Core/Schema/BaseNodeConverter.cs b/src/LlamaIndex.Core/Schema/BaseNodeConverter.cs index d7c5433..224e735 100644 --- a/src/LlamaIndex.Core/Schema/BaseNodeConverter.cs +++ b/src/LlamaIndex.Core/Schema/BaseNodeConverter.cs @@ -5,13 +5,29 @@ namespace LlamaIndex.Core.Schema; +/// +/// Converts nodes to and from JSON. +/// public class BaseNodeConverter : JsonConverter { + /// + /// Checks whether a node can be converted into the specified type. + /// + /// The type to convert the node into. + /// Whether the node can be converted into the specified type public override bool CanConvert(Type typeToConvert) { return typeof(BaseNode).IsAssignableFrom(typeToConvert); } + /// + /// + /// + /// The JSON reader + /// The type to convert the node into. + /// JSON serialization options. + /// A + /// public override BaseNode Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { using var jsonDoc = JsonDocument.ParseValue(ref reader); diff --git a/src/LlamaIndex.Core/Schema/Document.cs b/src/LlamaIndex.Core/Schema/Document.cs index fd81c3a..55c6b02 100644 --- a/src/LlamaIndex.Core/Schema/Document.cs +++ b/src/LlamaIndex.Core/Schema/Document.cs @@ -2,6 +2,13 @@ namespace LlamaIndex.Core.Schema; +/// +/// Represents a document node. +/// +/// The node ID +/// The text contents of a node +/// The data type represented in the node +/// Additional metadata for the node public class Document( string id, string? text = null, diff --git a/src/LlamaIndex.Core/Schema/ImageDocument.cs b/src/LlamaIndex.Core/Schema/ImageDocument.cs index b409b5d..2d54511 100644 --- a/src/LlamaIndex.Core/Schema/ImageDocument.cs +++ b/src/LlamaIndex.Core/Schema/ImageDocument.cs @@ -2,6 +2,17 @@ namespace LlamaIndex.Core.Schema; +/// +/// Represents an image node. +/// +/// The node ID +/// A text description of the image. For example, alt-text. +/// A string representation of the image. +/// A file path where the image is located. +/// A URL where the image is located. +/// The mime type for the image. +/// The mime type for the node. +/// Additional node metadata. public class ImageDocument( string id, string? text = null, diff --git a/src/LlamaIndex.Core/Schema/NodeType.cs b/src/LlamaIndex.Core/Schema/NodeType.cs index f596bbc..351dda2 100644 --- a/src/LlamaIndex.Core/Schema/NodeType.cs +++ b/src/LlamaIndex.Core/Schema/NodeType.cs @@ -1,5 +1,8 @@ namespace LlamaIndex.Core.Schema; +/// +/// Represents the type of node. +/// public enum NodeType { TextNode = 1, diff --git a/src/LlamaIndex.Core/Schema/RelatedNodeInfo.cs b/src/LlamaIndex.Core/Schema/RelatedNodeInfo.cs index 3090504..29c12d3 100644 --- a/src/LlamaIndex.Core/Schema/RelatedNodeInfo.cs +++ b/src/LlamaIndex.Core/Schema/RelatedNodeInfo.cs @@ -2,6 +2,12 @@ namespace LlamaIndex.Core.Schema; +/// +/// Represents a related node. +/// +/// The node ID. +/// The node type. +/// Additional node metadata. public class RelatedNodeInfo(string nodeId, NodeType nodeType, Dictionary? metadata = null) { public string NodeId { get; } = nodeId; diff --git a/src/LlamaIndex.Core/Schema/RelationshipType.cs b/src/LlamaIndex.Core/Schema/RelationshipType.cs index 652b181..bfe8bf0 100644 --- a/src/LlamaIndex.Core/Schema/RelationshipType.cs +++ b/src/LlamaIndex.Core/Schema/RelationshipType.cs @@ -2,6 +2,9 @@ namespace LlamaIndex.Core.Schema; +/// +/// Represents the type of relationship between nodes. +/// public enum RelationshipType { Source = 1, @@ -11,8 +14,17 @@ public enum RelationshipType Child = 5 } +/// +/// Provides extension methods for . +/// public static class RelationshipTypeExtensions { + /// + /// Converts a to a relationship name. + /// + /// The + /// The name of the relationship type. + /// public static string ToRelationshipName(this RelationshipType relationshipType) { return relationshipType switch @@ -26,6 +38,12 @@ public static string ToRelationshipName(this RelationshipType relationshipType) }; } + /// + /// Converts a to a relationship key. + /// + /// The + /// The key of the relationship type. + /// public static string ToRelationshipKey(this RelationshipType relationshipType) { return relationshipType switch diff --git a/src/LlamaIndex.Core/Schema/TextNode.cs b/src/LlamaIndex.Core/Schema/TextNode.cs index 1e363ed..a5249f8 100644 --- a/src/LlamaIndex.Core/Schema/TextNode.cs +++ b/src/LlamaIndex.Core/Schema/TextNode.cs @@ -3,6 +3,15 @@ namespace LlamaIndex.Core.Schema; +/// +/// Represents a text node. +/// +/// The node ID. +/// The text contents of the node. +/// The index where the node starts. +/// The index where the node ends. +/// The node mime type. +/// Additional node metadata [JsonConverter(typeof(BaseNodeConverter))] public class TextNode( string id, @@ -19,6 +28,17 @@ public class TextNode( public string? MimeType { get; } = mimeType; } +/// +/// Represents an image node. +/// +/// The node ID +/// The text description of the image. +/// The representation of the image. +/// The file path image location. +/// The URL image location. +/// The mime type of the image. +/// The mime type of the node. +/// Additional node metadata. [JsonConverter(typeof(BaseNodeConverter))] public class ImageNode( string id, diff --git a/src/LlamaParse/ItemType.cs b/src/LlamaParse/ItemType.cs index 9d58411..b5a5528 100644 --- a/src/LlamaParse/ItemType.cs +++ b/src/LlamaParse/ItemType.cs @@ -2,6 +2,9 @@ namespace LlamaParse; +/// +/// Represents the type of item in a document. +/// [Flags] public enum ItemType { diff --git a/src/LlamaParse/Languages.cs b/src/LlamaParse/Languages.cs index b6491b1..80b3bd0 100644 --- a/src/LlamaParse/Languages.cs +++ b/src/LlamaParse/Languages.cs @@ -1,5 +1,8 @@ namespace LlamaParse; +/// +/// The languages supported by LlamaParse. +/// public enum Languages { Baza, diff --git a/src/LlamaParse/LlamaParseClient.cs b/src/LlamaParse/LlamaParseClient.cs index 0adc8f9..e087848 100644 --- a/src/LlamaParse/LlamaParseClient.cs +++ b/src/LlamaParse/LlamaParseClient.cs @@ -11,12 +11,21 @@ namespace LlamaParse; +/// +/// The LlamaParseClient class provides methods for parsing data from files using the LlamaParse service. +/// public partial class LlamaParseClient { internal Configuration Configuration { get; } private readonly LlamaParseApiClient _client; + /// + /// The LlamaParseClient constructor. + /// + /// The used to make requests to the LlamaParse service. + /// The LlamaParse + /// public LlamaParseClient(HttpClient client, Configuration configuration) { if (string.IsNullOrWhiteSpace(configuration.ApiKey)) @@ -112,7 +121,7 @@ public async IAsyncEnumerable LoadDataRawAsync( /// The type of result to retrieve. (Optional) /// Additional metadata for the document. (Optional) /// Language (Optional) - /// The cancellation token. (Optional) + /// The (Optional) /// An asynchronous enumerable of RawResult objects representing the loaded data. public async IAsyncEnumerable LoadDataRawAsync( IEnumerable files, @@ -156,7 +165,7 @@ public async IAsyncEnumerable LoadDataRawAsync( /// Loads images from a document asynchronously. /// /// The document containing the image metadata. - /// The cancellation token. + /// The /// An asynchronous enumerable of ImageDocument objects representing the loaded images. public async IAsyncEnumerable LoadImagesAsync(Document document, [EnumeratorCancellation] CancellationToken cancellationToken = default) { @@ -169,6 +178,12 @@ public async IAsyncEnumerable LoadImagesAsync(Document document, } } + /// + /// Loads images from a document asynchronously. + /// + /// The from a parsing job. + /// The + /// An asynchronous enumerable of ImageDocument objects representing the loaded images. public IAsyncEnumerable LoadImagesAsync(RawResult rawResult, CancellationToken cancellationToken = default) { var jobId = rawResult.JobId; @@ -176,6 +191,13 @@ public IAsyncEnumerable LoadImagesAsync(RawResult rawResult, Canc return LoadImagesAsync(jobId, rawResult.Metadata, cancellationToken); } + /// + /// Loads images from a document asynchronously. + /// + /// The parse job ID + /// Additional document metadata. + /// The + /// public async IAsyncEnumerable LoadImagesAsync(string jobId, Dictionary? documentMetadata = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var metadata = documentMetadata ?? new Dictionary(); diff --git a/src/LlamaParse/LlamaParseClientExtensions.cs b/src/LlamaParse/LlamaParseClientExtensions.cs index 2998a6d..4bb72c8 100644 --- a/src/LlamaParse/LlamaParseClientExtensions.cs +++ b/src/LlamaParse/LlamaParseClientExtensions.cs @@ -11,6 +11,9 @@ namespace LlamaParse; +/// +/// The LlamaParseClientExtensions class provides extension methods for the LlamaParseClient class. +/// public static class LlamaParseClientExtensions { ///