doredore/examples/python/csv_import.py at main · 0809android/doredore · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
doredore - CSV インポート/エクスポート サンプル

CSVファイルからナレッジをインポートし、
検索・エクスポート機能をデモンストレーションします。
"""

import csv
from doredore import PyDoredore as Doredore


def create_sample_csv(filename: str):
    """サンプルCSVファイルを作成"""
    print(f"📝 サンプルCSV作成中: {filename}")

    data = [
        {
            "question": "永代供養とは何ですか？",
            "answer": "永代供養とは、お墓の管理を寺院に委託する供養形態です。継承者がいない方でも安心して利用できます。",
            "category": "永代供養",
            "priority": "high"
        },
        {
            "question": "永代供養の費用は？",
            "answer": "永代供養の費用は、一般的に10万円〜150万円程度です。個別安置期間の長さにより価格が変動します。",
            "category": "料金",
            "priority": "high"
        },
        {
            "question": "納骨堂の種類は？",
            "answer": "納骨堂には、ロッカー式、仏壇式、自動搬送式などのタイプがあります。都市部で人気が高まっています。",
            "category": "納骨堂",
            "priority": "medium"
        },
        {
            "question": "樹木葬とは？",
            "answer": "樹木葬は、墓石の代わりに樹木を墓標とする自然葬の一種です。環境に優しく、費用も比較的安価です。",
            "category": "樹木葬",
            "priority": "medium"
        },
        {
            "question": "一般墓との違いは？",
            "answer": "一般墓は家族代々で受け継がれますが、永代供養墓は寺院が永続的に管理します。継承者不要が大きな違いです。",
            "category": "永代供養",
            "priority": "medium"
        },
    ]

    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=["question", "answer", "category", "priority"])
        writer.writeheader()
        writer.writerows(data)

    print(f"✅ {len(data)} 件のデータを含むCSVを作成\n")


def import_csv_demo(rag: Doredore):
    """CSVインポートのデモ"""
    print("=" * 60)
    print("📥 CSV インポート デモ")
    print("=" * 60)
    print()

    # サンプルCSVを作成
    csv_file = "./faq_data.csv"
    create_sample_csv(csv_file)

    # コレクション作成
    try:
        rag.create_collection("faq", "FAQデータ")
        print("✅ コレクション作成完了\n")
    except:
        print("⚠️  コレクションは既に存在します\n")

    # CSVインポート
    print(f"📥 CSVをインポート中: {csv_file}")

    try:
        count = rag.import_csv(
            file_path=csv_file,
            collection="faq",
            content_column="answer",  # この列をドキュメントとして使用
            metadata_columns=["category", "priority"]  # メタデータとして保存
        )
        print(f"✅ {count} 件のドキュメントをインポート完了\n")
    except Exception as e:
        print(f"❌ インポートエラー: {e}\n")
        return

    # コレクション情報を確認
    collection = rag.get_collection("faq")
    print("📊 コレクション情報:")
    print(f"  名前: {collection.name}")
    print(f"  ドキュメント数: {collection.document_count}")
    print()


def search_demo(rag: Doredore):
    """検索のデモ"""
    print("=" * 60)
    print("🔍 検索デモ")
    print("=" * 60)
    print()

    queries = [
        "永代供養の料金について知りたい",
        "樹木葬とは何ですか",
        "継承者がいない場合の選択肢は？"
    ]

    for i, query in enumerate(queries, 1):
        print(f"\n[質問 {i}] {query}")
        print("-" * 60)

        results = rag.search(
            query=query,
            collection="faq",
            top_k=2,
            threshold=0.0
        )

        for j, result in enumerate(results, 1):
            print(f"\n  結果 {j} (スコア: {result.score:.3f})")
            print(f"  {result.content}")

        print()


def export_csv_demo(rag: Doredore):
    """CSVエクスポートのデモ"""
    print("=" * 60)
    print("📤 CSV エクスポート デモ")
    print("=" * 60)
    print()

    export_file = "./faq_export.csv"

    print(f"📤 データをエクスポート中: {export_file}")

    try:
        count = rag.export_csv(
            file_path=export_file,
            collection="faq"
        )
        print(f"✅ {count} 件のドキュメントをエクスポート完了\n")

        # エクスポートしたCSVの内容を確認
        print("📄 エクスポートされたCSVの内容:")
        with open(export_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()[:6]  # 最初の6行のみ表示
            for line in lines:
                print(f"  {line.strip()}")

        print()

    except Exception as e:
        print(f"❌ エクスポートエラー: {e}\n")


def enrich_demo(rag: Doredore):
    """エンリッチのデモ"""
    print("=" * 60)
    print("✨ エンリッチ デモ (LLM へのコンテキスト生成)")
    print("=" * 60)
    print()

    question = "永代供養と樹木葬の違いを教えてください"
    print(f"💭 質問: {question}\n")

    result = rag.enrich(
        query=question,
        collection="faq",
        top_k=3,
        threshold=0.0
    )

    print("📋 生成されたコンテキスト:")
    print("=" * 60)
    print(result.context)
    print("=" * 60)
    print()

    print("💡 このコンテキストをLLMのプロンプトに含めることで、")
    print("   正確な情報に基づいた回答を生成できます。")
    print()


def main():
    print("🚀 doredore - CSV インポート/エクスポート デモ")
    print()

    # 初期化
    rag = Doredore(
        db_path="./csv_demo.db",
        model="bge-small-en-v1.5"
    )

    # 1. CSVインポート
    import_csv_demo(rag)

    # 2. 検索
    search_demo(rag)

    # 3. エンリッチ
    enrich_demo(rag)

    # 4. CSVエクスポート
    export_csv_demo(rag)

    print("🎉 デモ完了！")
    print()
    print("📁 生成されたファイル:")
    print("  - csv_demo.db      (SQLiteデータベース)")
    print("  - faq_data.csv     (インポート元CSV)")
    print("  - faq_export.csv   (エクスポートされたCSV)")
    print()


if __name__ == "__main__":
    main()