Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Publish to PyPI

on:
push:
tags:
- 'v*'

jobs:
publish:
name: Publish to PyPI
runs-on: ubuntu-latest

permissions:
id-token: write

steps:
- uses: actions/checkout@v5

- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true

- name: Set up Python
run: uv python install

- name: Build the distribution
run: uv build

- name: Publish to PyPI
run: uv publish
8 changes: 5 additions & 3 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ jobs:
ref: ${{ github.head_ref }}

- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v7
with:
enable-cache: true

- name: Set up Python and install ruff
run: |
Expand All @@ -65,7 +67,7 @@ jobs:

- name: Commit linting fixes
# This step only runs if the previous 'ruff check' step succeeds.
uses: stefanzweifel/git-auto-commit-action@v6
uses: stefanzweifel/git-auto-commit-action@v7
with:
commit_message: |
style: fix linting issues with ruff
Expand All @@ -79,7 +81,7 @@ jobs:

- name: Commit formatting changes
# This step only runs if the previous 'ruff format' step succeeds.
uses: stefanzweifel/git-auto-commit-action@v6
uses: stefanzweifel/git-auto-commit-action@v7
with:
commit_message: "style: format code with ruff"
file_pattern: "*.py"
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,11 @@ celerybeat.pid
.venv
env/
venv/
virtualenv/
ENV/
env.bak/
venv.bak/
virtualenv.bak/

# Spyder project settings
.spyderproject
Expand Down Expand Up @@ -282,3 +284,6 @@ $RECYCLE.BIN/
*.lnk

# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,linux,macos,python,git

# Test environment file
.env.test
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 0.1.0 (2025-10-20)
## 0.1.0-beta.0 (2025-10-20)

### Features

Expand Down
66 changes: 39 additions & 27 deletions examples/simple_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
"""Simple test script for insta_rag library."""

import os
import pytest
from dotenv import load_dotenv

from insta_rag import DocumentInput, RAGClient, RAGConfig

# Load environment variables
# Load environment variables from .env file
load_dotenv()

# Check for required environment variables
skip_test = not all(
os.getenv(key) for key in ["QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"]
)


@pytest.mark.skipif(
skip_test,
reason="Skipping integration test: Required environment variables are not set.",
)
def test_basic_functionality():
"""Test basic RAG functionality with text input."""

Expand Down Expand Up @@ -51,34 +59,38 @@ def test_basic_functionality():
print(" ✓ Document created\n")

# Process document
print("3. Processing document...")
collection_name = "integration_test_collection"
print(f"3. Processing document into collection: {collection_name}...")
response = client.add_documents(
documents=[doc],
collection_name="test_collection",
collection_name=collection_name,
metadata={"test_run": True},
)

if response.success:
print(" ✓ Document processed successfully")
print(f" - Chunks created: {response.total_chunks}")
print(f" - Total tokens: {response.processing_stats.total_tokens}")
print(f" - Processing time: {response.processing_stats.total_time_ms:.2f}ms\n")

# Show chunk details
print("4. Chunk details:")
for i, chunk in enumerate(response.chunks, 1):
print(f"\n Chunk {i}:")
print(f" - ID: {chunk.chunk_id}")
print(f" - Tokens: {chunk.metadata.token_count}")
print(f" - Method: {chunk.metadata.chunking_method}")
print(f" - Content preview: {chunk.content[:80]}...")
else:
print(" ✗ Processing failed")
print(" Errors:", response.errors)
assert response.success, f"Processing failed with errors: {response.errors}"
print(" ✓ Document processed successfully")
print(f" - Chunks created: {response.total_chunks}")
print(f" - Total tokens: {response.processing_stats.total_tokens}")
print(f" - Processing time: {response.processing_stats.total_time_ms:.2f}ms\n")

# Retrieve document
print("4. Retrieving document...")
retrieve_response = client.retrieve(
query="What is RAG?",
collection_name=collection_name,
)

print("\n" + "-" * 60)
print("Test completed!")
assert retrieve_response.success, (
f"Retrieval failed with errors: {retrieve_response.errors}"
)
assert len(retrieve_response.chunks) > 0, "No chunks were retrieved."
print(" ✓ Document retrieved successfully")
print(f" - Retrieved {len(retrieve_response.chunks)} chunks.")

# Clean up
print("\n5. Cleaning up...")
client.vectordb.client.delete_collection(collection_name=collection_name)
print(f" ✓ Collection '{collection_name}' deleted.")

if __name__ == "__main__":
test_basic_functionality()
print("\n" + "-" * 60)
print("Test completed!")
28 changes: 24 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "insta_rag"
version = "0.1.0"
version = "0.1.0-beta.0"
description = "A RAG (Retrieval-Augmented Generation) library for document processing and retrieval."
authors = [
{ name = "Aukik Aurnab", email = "[email protected]" },
Expand All @@ -9,8 +9,9 @@ authors = [
]
requires-python = ">=3.9"
readme = "README.md"
license-files = ["LICENSE"]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Operating System :: OS Independent",
Expand All @@ -20,23 +21,32 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Text Processing :: Indexing",
"Topic :: Software Development :: Libraries :: Python Modules",
"Natural Language :: English",
]
dependencies = [
"openai>=1.12.0",
"qdrant-client>=1.7.0",
"pymongo>=4.6.0",
"pdfplumber>=0.10.3",
"PyPDF2>=3.0.1",
"tiktoken>=0.5.2",
"numpy>=1.24.0",
"python-dotenv>=1.0.0",
"cohere>=4.47.0",
"pydantic>=2.5.0",
"requests>=2.32.5",
"rank-bm25>=0.2.2",
]

[project.urls]
Homepage = "https://github.com/AI-Buddy-Catalyst-Labs/insta_rag"

[build-system]
requires = ["uv_build>=0.9.4,<0.10.0"]
build-backend = "uv_build"

[tool.ruff]
# Exclude a variety of commonly ignored directories.
exclude = [
Expand Down Expand Up @@ -118,7 +128,7 @@ docstring-code-line-length = "dynamic"

[tool.commitizen]
name = "cz_conventional_commits"
tag_format = "$version"
tag_format = "v$version"
version_scheme = "semver2"
version_provider = "uv"
update_changelog_on_bump = true
Expand All @@ -129,4 +139,14 @@ dev = [
"mdformat-ruff>=0.1.3",
"pre-commit>=4.3.0",
"ruff>=0.14.0",
"pytest>=8.3.2",
"pytest-cov>=5.0.0",
"pytest-mock>=3.14.0",
"pytest-dotenv>=0.5.2",
]

[tool.pytest.ini_options]
testpaths = [
"tests",
"examples",
]
Loading