Skip to content

Commit 4687450

Browse files
947961
1 parent 2959b6d commit 4687450

File tree

3 files changed

+35
-38
lines changed

3 files changed

+35
-38
lines changed

Smart_Summarize/AIAssitViewModel.cs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Syncfusion.Windows.PdfViewer;
33
using System.Collections.ObjectModel;
44
using System.ComponentModel;
5+
using System.Text;
56

67
namespace Smart_Summarize
78
{
@@ -13,6 +14,7 @@ internal class AIAssitViewModel : INotifyPropertyChanged
1314
private Author currentUser;
1415
private PdfViewerControl pdfViewer;
1516
MicrosoftAIExtension microsoftAIExtension;
17+
StringBuilder processedText = new StringBuilder();
1618
public event PropertyChangedEventHandler PropertyChanged;
1719
#endregion
1820

@@ -129,27 +131,51 @@ private async Task AddSuggestions(String text)
129131
/// <summary>
130132
/// Extracts text from each page of the PDF document.
131133
/// </summary>
132-
private async Task ExtractDetailsFromPDF()
134+
private async Task<string> ExtractDetailsFromPDF()
133135
{
134-
List<string> extractedText = new List<string>();
136+
StringBuilder extractedText = new StringBuilder();
135137
Syncfusion.Pdf.TextLines textLines = new Syncfusion.Pdf.TextLines();
136138
//Extract the text from the PDF document
137139
for (int pageIndex = 0; pageIndex < pdfViewer.PageCount; pageIndex++)
138140
{
139141
string text = $"... Page {pageIndex + 1} ...\n";
140142
text += pdfViewer.ExtractText(pageIndex, out textLines);
141-
extractedText.Add(text);
143+
extractedText.AppendLine(text);
142144
}
143145

144-
await microsoftAIExtension.CreateEmbeddedPage(extractedText.ToArray());
146+
return ProcessExtractedText(extractedText.ToString());
147+
}
148+
/// <summary>
149+
/// Processes the extracted full text from a document by splitting it into pages.
150+
/// </summary>
151+
/// <param name="fullText">The complete extracted text from the document.</param>
152+
/// <returns>A formatted string containing the processed text.</returns>
153+
private string ProcessExtractedText(string fullText)
154+
{
155+
string[] pages = fullText.Split(new string[] { "\f", "\n\nPage " }, StringSplitOptions.RemoveEmptyEntries);
156+
157+
for (int i = 0; i < pages.Length; i++)
158+
{
159+
processedText.AppendLine($"... Page {i + 1} ...");
160+
string[] lines = pages[i].Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
161+
int maxLines = Math.Min(1000, lines.Length);
162+
163+
for (int j = 0; j < maxLines; j++)
164+
{
165+
processedText.AppendLine(lines[j]);
166+
}
167+
processedText.AppendLine();
168+
}
169+
170+
return processedText.ToString();
145171
}
146172
/// <summary>
147173
/// Summarizes the extracted text from the PDF using Extension AI.
148174
/// </summary>
149175
private async Task<string> SummarizePDF()
150176
{
151177
//Summarize the text using the Semantic Kernel AI
152-
string summary = await microsoftAIExtension.GetAnswerFromGPT("You are a helpful assistant. Your task is to analyze the provided text and generate short summary as a plain text.");
178+
string summary = await microsoftAIExtension.GetAnswerFromGPT(processedText.ToString());
153179
return summary;
154180
}
155181
/// <summary>
Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
using Microsoft.Extensions.AI;
22
using Azure.AI.OpenAI;
33
using System.Text;
4-
using SmartComponents.LocalEmbeddings;
54
namespace Smart_Summarize
65
{
76
internal class MicrosoftAIExtension
@@ -12,7 +11,6 @@ internal class MicrosoftAIExtension
1211
internal string key = string.Empty;
1312

1413
private IChatClient clientAI;
15-
public Dictionary<string, EmbeddingF32>? PageEmbeddings { get; set; }
1614

1715
/// <summary>
1816
/// Initializes a new instance of the <see cref="MicrosoftAIExtension"/> class.
@@ -23,30 +21,20 @@ public MicrosoftAIExtension(string key)
2321
clientAI = new AzureOpenAIClient(new System.Uri(endpoint), new System.ClientModel.ApiKeyCredential(key)).AsChatClient(deploymentName);
2422
}
2523

26-
/// <summary>
27-
/// Create the embedded page from the extracted chunks in the PDF
28-
/// </summary>
29-
/// <param name="chunks">Extracted text from pdfViewer</param>
30-
/// <returns></returns>
31-
public async Task CreateEmbeddedPage(string[] chunks)
32-
{
33-
var embedder = new LocalEmbedder();
34-
PageEmbeddings = chunks.Select(x => KeyValuePair.Create(x, embedder.Embed(x))).ToDictionary(k => k.Key, v => v.Value);
35-
}
3624
/// <summary>
3725
/// Method to get the answer from GPT using the semantic kernel
3826
/// </summary>
39-
/// <param name="systemPrompt">Prompt for the system message</param>
27+
/// <param name="extractedText">Extracted text from the document</param>
4028
/// <returns>Returns the form data as a string</returns>
41-
public async Task<string> GetAnswerFromGPT(string systemPrompt)
29+
public async Task<string> GetAnswerFromGPT(string extractedText)
4230
{
4331
if (clientAI != null)
4432
{
45-
List<string> message = PageEmbeddings.Keys.Take(10).ToList();
33+
string systemPrompt = "You are a helpful assistant. Your task is to analyze the provided text and generate short summary as a plain text";
4634
var chatMessages = new List<ChatMessage>
4735
{
4836
new ChatMessage(ChatRole.System, systemPrompt),
49-
new ChatMessage(ChatRole.User, string.Join(" ", message))
37+
new ChatMessage(ChatRole.User, extractedText)
5038
};
5139
var response = await clientAI.GetResponseAsync(chatMessages);
5240
return response.ToString();
@@ -76,21 +64,5 @@ public async Task<string> GetAnswerFromGPT(string systemPrompt, string userText)
7664
}
7765
return string.Empty;
7866
}
79-
80-
public async Task<string> AnswerQuestion(string question)
81-
{
82-
var embedder = new LocalEmbedder();
83-
var questionEmbedding = embedder.Embed(question);
84-
var results = LocalEmbedder.FindClosestWithScore(questionEmbedding, PageEmbeddings.Select(x => (x.Key, x.Value)), 5, 0.5f);
85-
StringBuilder builder = new StringBuilder();
86-
foreach (var result in results)
87-
{
88-
builder.AppendLine(result.Item);
89-
}
90-
string message = builder.ToString();
91-
var answer = await GetAnswerFromGPT("You are a helpful assistant. Use the provided PDF document pages and pick a precise page to answer the user question. Provide the answer in plain text without any special formatting or Markdown syntax. Pages: " + message, question);
92-
93-
return answer;
94-
}
9567
}
9668
}

Smart_Summarize/Smart_Summarize.csproj

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
<ItemGroup>
1616

17-
<PackageReference Include="SmartComponents.LocalEmbeddings" Version="0.1.0-preview10148" />
1817
<PackageReference Include="Syncfusion.PdfViewer.WPF" Version="*" />
1918
<PackageReference Include="Syncfusion.SfBusyIndicator.WPF" Version="*" />
2019
<PackageReference Include="Syncfusion.SfChat.WPF" Version="*" />

0 commit comments

Comments
 (0)