From 0859f8d6feb08434206f78390cad6d0b371c2cc3 Mon Sep 17 00:00:00 2001 From: Mahdi Zand <80402823+mzand111@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:39:00 +0300 Subject: [PATCH] fix: Update MarkdownHeaderTextSplitter.cs (#485) This change prevents errors when the markdown file includes lines that start with # but have no following characters. Co-authored-by: Konstantin S. --- .../Abstractions/src/Text/MarkdownHeaderTextSplitter.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs b/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs index cf551724..18a4b706 100644 --- a/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs +++ b/src/Splitters/Abstractions/src/Text/MarkdownHeaderTextSplitter.cs @@ -122,6 +122,8 @@ private bool IsHeader(string line, out int len) len = 0; foreach (var header in _headersToSplitOn) { + if (line.Length <= header.Length + 1) + return false;//Empty lines starting with #s should not be considered as headers. Removing this line would result in exceptions in that conditions if (line.Trim().StartsWith(header, StringComparison.Ordinal) && line[header.Length] == ' ') { len = header.Length; @@ -131,4 +133,4 @@ private bool IsHeader(string line, out int len) return false; } -} \ No newline at end of file +}