AI-Buddy-Catalyst-Labs · tahmidul612 · Oct 21, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,31 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  publish:
+    name: Publish to PyPI
+    runs-on: ubuntu-latest
+
+    permissions:
+      id-token: write
+
+    steps:
+      - uses: actions/checkout@v5
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+
+      - name: Set up Python
+        run: uv python install
+
+      - name: Build the distribution
+        run: uv build
+
+      - name: Publish to PyPI
+        run: uv publish
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -38,7 +38,9 @@ jobs:
           ref: ${{ github.head_ref }}
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
 
       - name: Set up Python and install ruff
         run: |
@@ -65,7 +67,7 @@ jobs:
 
       - name: Commit linting fixes
         # This step only runs if the previous 'ruff check' step succeeds.
-        uses: stefanzweifel/git-auto-commit-action@v6
+        uses: stefanzweifel/git-auto-commit-action@v7
         with:
           commit_message: |
             style: fix linting issues with ruff
@@ -79,7 +81,7 @@ jobs:
 
       - name: Commit formatting changes
         # This step only runs if the previous 'ruff format' step succeeds.
-        uses: stefanzweifel/git-auto-commit-action@v6
+        uses: stefanzweifel/git-auto-commit-action@v7
         with:
           commit_message: "style: format code with ruff"
           file_pattern: "*.py"
diff --git a/.gitignore b/.gitignore
@@ -191,9 +191,11 @@ celerybeat.pid
 .venv
 env/
 venv/
+virtualenv/
 ENV/
 env.bak/
 venv.bak/
+virtualenv.bak/
 
 # Spyder project settings
 .spyderproject
@@ -282,3 +284,6 @@ $RECYCLE.BIN/
 *.lnk
 
 # End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,linux,macos,python,git
+
+# Test environment file
+.env.test
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.1.0 (2025-10-20)
+## 0.1.0-beta.0 (2025-10-20)
 
 ### Features
 

diff --git a/examples/simple_test.py b/examples/simple_test.py
@@ -1,13 +1,21 @@
-"""Simple test script for insta_rag library."""
-
+import os
+import pytest
 from dotenv import load_dotenv
-
 from insta_rag import DocumentInput, RAGClient, RAGConfig
 
-# Load environment variables
+# Load environment variables from .env file
 load_dotenv()
 
+# Check for required environment variables
+skip_test = not all(
+    os.getenv(key) for key in ["QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"]
+)
+
 
+@pytest.mark.skipif(
+    skip_test,
+    reason="Skipping integration test: Required environment variables are not set.",
+)
 def test_basic_functionality():
     """Test basic RAG functionality with text input."""
 
@@ -51,34 +59,38 @@ def test_basic_functionality():
     print("   ✓ Document created\n")
 
     # Process document
-    print("3. Processing document...")
+    collection_name = "integration_test_collection"
+    print(f"3. Processing document into collection: {collection_name}...")
     response = client.add_documents(
         documents=[doc],
-        collection_name="test_collection",
+        collection_name=collection_name,
         metadata={"test_run": True},
     )
 
-    if response.success:
-        print("   ✓ Document processed successfully")
-        print(f"   - Chunks created: {response.total_chunks}")
-        print(f"   - Total tokens: {response.processing_stats.total_tokens}")
-        print(f"   - Processing time: {response.processing_stats.total_time_ms:.2f}ms\n")
-
-        # Show chunk details
-        print("4. Chunk details:")
-        for i, chunk in enumerate(response.chunks, 1):
-            print(f"\n   Chunk {i}:")
-            print(f"   - ID: {chunk.chunk_id}")
-            print(f"   - Tokens: {chunk.metadata.token_count}")
-            print(f"   - Method: {chunk.metadata.chunking_method}")
-            print(f"   - Content preview: {chunk.content[:80]}...")
-    else:
-        print("   ✗ Processing failed")
-        print("   Errors:", response.errors)
+    assert response.success, f"Processing failed with errors: {response.errors}"
+    print("   ✓ Document processed successfully")
+    print(f"   - Chunks created: {response.total_chunks}")
+    print(f"   - Total tokens: {response.processing_stats.total_tokens}")
+    print(f"   - Processing time: {response.processing_stats.total_time_ms:.2f}ms\n")
+
+    # Retrieve document
+    print("4. Retrieving document...")
+    retrieve_response = client.retrieve(
+        query="What is RAG?",
+        collection_name=collection_name,
+    )
 
-    print("\n" + "-" * 60)
-    print("Test completed!")
+    assert retrieve_response.success, (
+        f"Retrieval failed with errors: {retrieve_response.errors}"
+    )
+    assert len(retrieve_response.chunks) > 0, "No chunks were retrieved."
+    print("   ✓ Document retrieved successfully")
+    print(f"   - Retrieved {len(retrieve_response.chunks)} chunks.")
 
+    # Clean up
+    print("\n5. Cleaning up...")
+    client.vectordb.client.delete_collection(collection_name=collection_name)
+    print(f"   ✓ Collection '{collection_name}' deleted.")
 
-if __name__ == "__main__":
-    test_basic_functionality()
+    print("\n" + "-" * 60)
+    print("Test completed!")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "insta_rag"
-version = "0.1.0"
+version = "0.1.0-beta.0"
 description = "A RAG (Retrieval-Augmented Generation) library for document processing and retrieval."
 authors = [
     { name = "Aukik Aurnab", email = "[email protected]" },
@@ -9,8 +9,9 @@ authors = [
 ]
 requires-python = ">=3.9"
 readme = "README.md"
+license-files = ["LICENSE"]
 classifiers = [
-    "Development Status :: 2 - Pre-Alpha",
+    "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
     "Operating System :: OS Independent",
@@ -20,23 +21,32 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Text Processing :: Indexing",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Natural Language :: English",
 ]
 dependencies = [
     "openai>=1.12.0",
     "qdrant-client>=1.7.0",
-    "pymongo>=4.6.0",
     "pdfplumber>=0.10.3",
     "PyPDF2>=3.0.1",
     "tiktoken>=0.5.2",
     "numpy>=1.24.0",
     "python-dotenv>=1.0.0",
     "cohere>=4.47.0",
     "pydantic>=2.5.0",
+    "requests>=2.32.5",
+    "rank-bm25>=0.2.2",
 ]
 
 [project.urls]
 Homepage = "https://github.com/AI-Buddy-Catalyst-Labs/insta_rag"
 
+[build-system]
+requires = ["uv_build>=0.9.4,<0.10.0"]
+build-backend = "uv_build"
+
 [tool.ruff]
 # Exclude a variety of commonly ignored directories.
 exclude = [
@@ -118,7 +128,7 @@ docstring-code-line-length = "dynamic"
 
 [tool.commitizen]
 name = "cz_conventional_commits"
-tag_format = "$version"
+tag_format = "v$version"
 version_scheme = "semver2"
 version_provider = "uv"
 update_changelog_on_bump = true
@@ -129,4 +139,14 @@ dev = [
     "mdformat-ruff>=0.1.3",
     "pre-commit>=4.3.0",
     "ruff>=0.14.0",
+    "pytest>=8.3.2",
+    "pytest-cov>=5.0.0",
+    "pytest-mock>=3.14.0",
+    "pytest-dotenv>=0.5.2",
+]
+
+[tool.pytest.ini_options]
+testpaths = [
+    "tests",
+    "examples",
 ]