-
Notifications
You must be signed in to change notification settings - Fork 0
/
knowledge_loader.py
130 lines (103 loc) · 4.17 KB
/
knowledge_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# knowledge_loader.py
import os
from typing import List, Dict, Any
import json
import logging
logger = logging.getLogger(__name__)
class KnowledgeLoader:
"""Loader for knowledge from text files"""
@staticmethod
def load_from_txt(filepath: str, chunk_size: int = 1000) -> List[Dict[str, Any]]:
"""
Load knowledge from a text file
Args:
filepath: Path to the file
chunk_size: Size of one text chunk
Returns:
List[Dict[str, Any]]: List of documents for the knowledge base
"""
try:
with open(filepath, 'r', encoding='utf-8') as file:
text = file.read()
# Split the text into parts considering paragraphs
paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
# Combine small paragraphs
chunks = []
current_chunk = ""
for paragraph in paragraphs:
if len(current_chunk) + len(paragraph) <= chunk_size:
current_chunk += paragraph + "\n\n"
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = paragraph + "\n\n"
if current_chunk:
chunks.append(current_chunk.strip())
# Create documents
documents = [
{
"text": chunk,
"metadata": {
"source": os.path.basename(filepath),
"chunk_id": i
}
}
for i, chunk in enumerate(chunks)
]
logger.info(f"Loaded {len(documents)} documents from {filepath}")
return documents
except Exception as e:
logger.error(f"Error loading file {filepath}: {str(e)}")
raise
class KnowledgeManager:
"""Knowledge base manager for small businesses"""
def __init__(self, chatbot):
self.chatbot = chatbot
self.loader = KnowledgeLoader()
def load_knowledge_directory(self, directory: str) -> None:
"""
Load all .txt files from a directory
Args:
directory: Path to the knowledge directory
"""
try:
for filename in os.listdir(directory):
if filename.endswith('.txt'):
filepath = os.path.join(directory, filename)
documents = self.loader.load_from_txt(filepath)
self.chatbot.add_knowledge(documents)
logger.info(f"All files from {directory} have been successfully loaded")
except Exception as e:
logger.error(f"Error loading directory {directory}: {str(e)}")
raise
# knowledge_format.txt - example structure of a knowledge file:
"""
# General Information
Our company "Example" has been operating in the market since 2010.
We specialize in selling electronics and household appliances.
# Working Hours
The store is open daily from 9:00 AM to 9:00 PM without breaks or days off.
Technical support is available on weekdays from 8:00 AM to 8:00 PM.
# Delivery
Delivery is carried out throughout the city within 1-2 business days.
Delivery cost depends on the area and ranges from 300 to 500 rubles.
"""
# Example usage:
if __name__ == "__main__":
import asyncio # Make sure to import asyncio
# Assuming Chatbot class is defined elsewhere
async def main():
# Initialize the bot
bot = Chatbot()
# Create the knowledge manager
knowledge_manager = KnowledgeManager(bot)
# Load knowledge from the directory
try:
knowledge_manager.load_knowledge_directory("./knowledge")
except Exception as e:
logger.error(f"Error loading knowledge: {str(e)}")
return
# Test query
response = await bot.process_message("When is the store open?")
print(f"Response: {response['response']}")
asyncio.run(main())