-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarizer.py
51 lines (39 loc) · 1.32 KB
/
summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import requests
import json
from bs4 import BeautifulSoup
import re
from llm import llm
def html_to_text(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
text = soup.get_text()
# Replace sequences of whitespace characters with a single space or newline
def replacer(match):
# if the matched string contains a newline, replace with newline, otherwise replace with space
return '\n' if '\n' in match.group() else ' '
text = re.sub(r'[ \t\n]+', replacer, text)
return text.strip() # Removing any trailing or leading whitespace
def email_to_prompt(email):
subject = email.get('header', {}).get('subject', '')
author = email.get('header', {}).get('author', '')
body = html_to_text(email.get('body', ''))
return f"""
Subject: {subject}
Author: {author}
EMAIL BODY BELOW THIS LINE
{body}
"""
def summarize(email):
threshold = 150
prompt = email_to_prompt(email)
result, reason = llm(prompt)
if not result:
return None, reason
summary = result.get('summary', '')
body = html_to_text(email['body'])
if len(body) <= threshold:
result['summary'] = body
result['is_full_message'] = True
elif not summary:
result['summary'] = body[:threshold]
result['is_uprocessed_summary'] = True
return result, "Success"