-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlangtools.py
134 lines (115 loc) · 3.92 KB
/
langtools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Adjust the import as necessary
import os
import re
import requests
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain_community.document_loaders.llmsherpa import LLMSherpaFileLoader
from langchain_community.document_loaders import WebBaseLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
os.environ["USER_AGENT"] = "myagent"
def summarize_with_sherpa(url: str) -> str:
"""
Summarize a document from a URL using the LLM Sherpa API.
"""
try:
url = find_url(url)
response = requests.head(url)
response.raise_for_status() # Raise an HTTPError for bad responses
content_type = response.headers.get("content-type")
allowed_types = [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"text/html",
"text/plain",
"application/xml",
"application/pdf",
]
loader = (
LLMSherpaFileLoader(
file_path=url,
new_indent_parser=True,
apply_ocr=True,
strategy="text",
llmsherpa_api_url="https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all",
)
if content_type in allowed_types
else WebBaseLoader(url)
)
docs = loader.load()
return docs[0].page_content
except Exception as e:
# Log the exception if needed
print(f"An error occurred: {e}")
return ""
def generate_twitter_post(input_text: str) -> str:
"""
Generate a Twitter post using the Google Generative AI model.
"""
model = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
temperature=0.5,
max_tokens=None,
timeout=None,
max_retries=2,
)
prompt_template = """
Provide a tweet base on provided text.
自動加上一些 hastags, 然後口氣輕鬆一點的推廣:
"{text}"
Reply in ZH-TW"""
prompt = PromptTemplate.from_template(prompt_template)
chain = prompt | model
tweet = chain.invoke({"text": input_text})
return tweet.content
def generate_slack_post(input_text: str) -> str:
"""
Generate a Slack post using the Google Generative AI model.
"""
model = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
temperature=0.5,
max_tokens=None,
timeout=None,
max_retries=2,
)
prompt_template = """
Provide a slack post base on provided text.
多一點條例式,然後多一些 slack emoji:
"{text}"
Reply in ZH-TW"""
prompt = PromptTemplate.from_template(prompt_template)
chain = prompt | model
tweet = chain.invoke({"text": input_text})
return tweet.content
def summarize_text(text: str, max_tokens: int = 100) -> str:
"""
Summarize a text using the Google Generative AI model.
"""
llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
)
prompt_template = """
"{text}"
CONCISE SUMMARY:
Reply in ZH-TW"""
prompt = PromptTemplate.from_template(prompt_template)
summarize_chain = load_summarize_chain(llm=llm, chain_type="stuff", prompt=prompt)
document = Document(page_content=text)
summary = summarize_chain.invoke([document])
return summary["output_text"]
def find_url(input_string):
# Regular expression pattern to match URLs
url_pattern = r"https?://[^\s]+"
# Search for the pattern in the input string
match = re.search(url_pattern, input_string)
# If a match is found, return the URL, otherwise return an empty string
if match:
return match.group(0)
else:
return ""