Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Core/src/Extensions/SourceExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static async Task<IReadOnlyList<Document>> LoadAndSplit(
textSplitter ??= new RecursiveCharacterTextSplitter();

var documents = await documentLoader.LoadAsync(dataSource, cancellationToken).ConfigureAwait(false);

return textSplitter.SplitDocuments(documents);
}
}
6 changes: 3 additions & 3 deletions src/Core/src/Extensions/VectorDatabaseExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public static async Task<IVectorCollection> AddDocumentsFromAsync<TLoader>(
where TLoader : IDocumentLoader, new()
{
vectorDatabase = vectorDatabase ?? throw new ArgumentNullException(paramName: nameof(vectorDatabase));

var vectorCollection = await vectorDatabase.GetOrCreateCollectionAsync(
collectionName: collectionName,
dimensions: dimensions,
Expand Down Expand Up @@ -74,15 +74,15 @@ public static async Task<IReadOnlyCollection<string>> AddDocumentsFromAsync<TLoa
var documents = await loader.LoadAsync(
dataSource: dataSource,
cancellationToken: cancellationToken).ConfigureAwait(false);

return await vectorCollection.AddSplitDocumentsAsync(
embeddingModel: embeddingModel,
documents: documents,
textSplitter: textSplitter,
embeddingSettings: embeddingSettings,
cancellationToken: cancellationToken).ConfigureAwait(false);
}

/// <summary>
/// Create a VectorDatabase table from documents.
/// </summary>
Expand Down
2 changes: 1 addition & 1 deletion src/Databases/Mongo/src/MongoVectorCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public async Task<bool> DeleteAsync(IEnumerable<string> ids, CancellationToken c
var result = await _mongoCollection.FindAsync(filter, cancellationToken: cancellationToken).ConfigureAwait(false);
return result.FirstOrDefault(cancellationToken: cancellationToken);
}

public async Task<bool> IsEmptyAsync(CancellationToken cancellationToken = default)
{
return await _mongoCollection.EstimatedDocumentCountAsync(cancellationToken: cancellationToken).ConfigureAwait(false) == 0;
Expand Down
18 changes: 9 additions & 9 deletions src/Sources/Abstractions/src/DataSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ public sealed class DataSource
public string? Value { get; init; }
public Stream? Stream { get; init; }
public Encoding Encoding { get; init; } = Encoding.UTF8;

public static DataSource FromPath(string path)
{
path = path ?? throw new ArgumentNullException(nameof(path));

return new DataSource
{
Type = DataSourceType.Path,
Value = path,
};
}

public static DataSource FromUri(Uri uri)
{
uri = uri ?? throw new ArgumentNullException(nameof(uri));

return new DataSource
{
Type = DataSourceType.Uri,
Expand All @@ -38,7 +38,7 @@ public static DataSource FromUri(Uri uri)
public static DataSource FromUrl(string url)
{
url = url ?? throw new ArgumentNullException(nameof(url));

return new DataSource
{
Type = DataSourceType.Uri,
Expand All @@ -49,7 +49,7 @@ public static DataSource FromUrl(string url)
public static DataSource FromStream(Stream stream)
{
stream = stream ?? throw new ArgumentNullException(nameof(stream));

return new DataSource
{
Type = DataSourceType.Stream,
Expand All @@ -60,14 +60,14 @@ public static DataSource FromStream(Stream stream)
public static DataSource FromBytes(byte[] bytes)
{
bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));

return new DataSource
{
Type = DataSourceType.Stream,
Stream = new MemoryStream(bytes),
};
}

public async Task<Stream> GetStreamAsync(CancellationToken cancellationToken = default)
{
if (Stream is not null)
Expand All @@ -86,7 +86,7 @@ public async Task<Stream> GetStreamAsync(CancellationToken cancellationToken = d
_ => new MemoryStream(Encoding.GetBytes(Value!)),
};
}

private static async Task<MemoryStream> DownloadAsMemoryStreamAsync(
Uri uri,
CancellationToken cancellationToken = default)
Expand Down
2 changes: 1 addition & 1 deletion src/Sources/Abstractions/src/FileLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public class FileLoader : IDocumentLoader
public async Task<IReadOnlyCollection<Document>> LoadAsync(DataSource dataSource, CancellationToken cancellationToken = default)
{
dataSource = dataSource ?? throw new ArgumentNullException(paramName: nameof(dataSource));

var content = await File2.ReadAllTextAsync(dataSource.Value!, dataSource.Encoding, cancellationToken).ConfigureAwait(false);

// It makes sense for agents, but we need tests for this
Expand Down
2 changes: 1 addition & 1 deletion src/Sources/Pdf/src/AsposePdfSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public sealed class AsposePdfLoader : IDocumentLoader
public async Task<IReadOnlyCollection<Document>> LoadAsync(DataSource dataSource, CancellationToken cancellationToken = default)
{
dataSource = dataSource ?? throw new ArgumentNullException(paramName: nameof(dataSource));

using var stream = await dataSource.GetStreamAsync(cancellationToken).ConfigureAwait(false);
using var pdfDocument = new Aspose.Pdf.Document(stream);
var textAbsorber = new TextAbsorber();
Expand Down
4 changes: 2 additions & 2 deletions src/Sources/Pdf/src/PdfPigPdfSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ public async Task<IReadOnlyCollection<Document>> LoadAsync(DataSource dataSource
CancellationToken cancellationToken = default)
{
dataSource = dataSource ?? throw new ArgumentNullException(paramName: nameof(dataSource));

using var stream = await dataSource.GetStreamAsync(cancellationToken).ConfigureAwait(false);
using var document = PdfDocument.Open(stream, new ParsingOptions());

return document
.GetPages()
.Select(page => new Document(page.Text, new Dictionary<string, object>
Expand Down
6 changes: 3 additions & 3 deletions src/Sources/WebBase/src/HtmlLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ public class HtmlLoader : IDocumentLoader
public async Task<IReadOnlyCollection<Document>> LoadAsync(DataSource dataSource, CancellationToken cancellationToken = default)
{
dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));

if (dataSource.Type != DataSourceType.Uri)
{
throw new NotSupportedException("Only Uri is supported");
}

var config = Configuration.Default.WithDefaultLoader();
var context = BrowsingContext.New(config);
var document = await context.OpenAsync(dataSource.Value!, cancellation: cancellationToken).ConfigureAwait(false);
Expand All @@ -29,7 +29,7 @@ public async Task<IReadOnlyCollection<Document>> LoadAsync(DataSource dataSource
var html =
document.QuerySelector("html") ??
throw new NotSupportedException("Not supported for pages without <html> tag");

return new Document[] { new(html.TextContent, new Dictionary<string, object>
{
{ "url", dataSource.Value! },
Expand Down
4 changes: 2 additions & 2 deletions src/Sources/Word/src/WordLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ public sealed class WordLoader : IDocumentLoader
public async Task<IReadOnlyCollection<Document>> LoadAsync(DataSource dataSource, CancellationToken cancellationToken = default)
{
dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));

using var stream = await dataSource.GetStreamAsync(cancellationToken).ConfigureAwait(false);
using var wordDocument = WordprocessingDocument.Open(stream, isEditable: false);

var documents = new List<string>();
foreach (var paragraph in wordDocument.MainDocumentPart?.Document.Body?.Elements<Paragraph>() ?? [])
{
Expand Down