Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ A scalable, production-ready FastAPI backend for the Stellarts platform - connec
- **FastAPI Framework**: Modern, fast web framework for building APIs
- **Modular Architecture**: Clean separation of concerns with organized directory structure
- **Database Integration**: PostgreSQL with SQLAlchemy ORM
- **Semantic Search**: pgvector-powered artisan matching with OpenAI embeddings
- **Authentication**: JWT-based authentication system
- **API Versioning**: Versioned API endpoints for smooth upgrades
- **Containerized**: Docker and docker-compose for easy deployment
Expand Down Expand Up @@ -196,6 +197,7 @@ docker-compose up -d api-prod db

- `GET /` - Root endpoint with API information
- `GET /api/v1/health` - Health check with database status
- `GET /api/v1/search/semantic` - Semantic artisan search by natural-language query
- `GET /docs` - Interactive API documentation

## Security
Expand All @@ -217,8 +219,15 @@ DATABASE_URL=postgresql://user:pass@host:port/db
SECRET_KEY=secure-random-key
DEBUG=False
BACKEND_CORS_ORIGINS=["https://yourdomain.com"]
OPENAI_API_KEY=your-openai-api-key
SEMANTIC_CACHE_TTL=300
```

### Semantic Search Notes

- The local Docker database uses a pgvector-enabled image (`pgvector/pgvector:pg15`) so vector indexes and similarity operators are available.
- Use `GET /api/v1/search/semantic?q=historic%20restoration` to test natural-language ranking.

### Production Checklist

- [ ] Set strong `SECRET_KEY`
Expand Down
2 changes: 1 addition & 1 deletion backend/alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from app.models.booking import Booking
from app.models.payment import Payment
from app.models.review import Review
from app.models.portfolio import PortfolioItem
from app.models.portfolio import Portfolio

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down
5 changes: 3 additions & 2 deletions backend/app/api/v1/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import APIRouter

from app.api.v1.endpoints import admin, artisan, auth, booking, health, payments, user
from app.api.v1.endpoints import admin, artisan, auth, booking, health, payments, search, stats, user

api_router = APIRouter()

Expand All @@ -12,4 +12,5 @@
api_router.include_router(artisan.router, tags=["artisans"])
api_router.include_router(admin.router, tags=["admin"])
api_router.include_router(payments.router, prefix="/payments", tags=["payments"])
api_router.include_router(stats.router, tags=["stats"])
api_router.include_router(stats.router, tags=["stats"])
api_router.include_router(search.router, tags=["search"])
176 changes: 176 additions & 0 deletions backend/app/api/v1/endpoints/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
Semantic vector search endpoint for the StellArts artisan marketplace.

GET /search/semantic
- Converts a natural-language query into an OpenAI embedding.
- Performs a pgvector cosine-distance search against artisan embeddings.
- Re-ranks results with a hybrid score:
hybrid = semantic_weight * semantic_similarity
+ (1 - semantic_weight) * reputation_weight
where reputation_weight = artisan.rating / 5.0
"""
import logging
from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException, Query, status
from pydantic import BaseModel, ConfigDict
from sqlalchemy import select
from sqlalchemy.orm import Session

from app.core.cache import cache
from app.core.config import settings
from app.db.session import get_db
from app.models.artisan import Artisan
from app.services.embedding import get_query_embedding

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/search", tags=["search"])

# ---------------------------------------------------------------------------
# Response schemas
# ---------------------------------------------------------------------------

class ArtisanSearchResult(BaseModel):
model_config = ConfigDict(from_attributes=True)

id: int
business_name: str | None
description: str | None
specialties: str | None # raw JSON string; clients parse as needed
location: str | None
rating: float | None
total_reviews: int
is_available: bool
is_verified: bool
# Scores (higher is better, range 0-1)
semantic_similarity: float
reputation_weight: float
hybrid_score: float


class SemanticSearchResponse(BaseModel):
query: str
results: list[ArtisanSearchResult]
total: int


# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------

@router.get(
"/semantic",
response_model=SemanticSearchResponse,
summary="Semantic artisan search",
description=(
"Find artisans using natural-language queries. "
"Results are ranked by a hybrid of semantic similarity and on-chain "
"reputation weight."
),
)
async def semantic_search(
q: Annotated[str, Query(min_length=2, max_length=500, description="Natural-language search query")],
limit: Annotated[int, Query(ge=1, le=50)] = 10,
semantic_weight: Annotated[float, Query(ge=0.0, le=1.0, description="Weight for semantic similarity (0-1); remainder goes to reputation)")] = 0.7,
available_only: Annotated[bool, Query(description="When true, only return artisans who are currently available")] = False,
db: Session = Depends(get_db),
) -> SemanticSearchResponse:
if not settings.OPENAI_API_KEY:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Semantic search is not configured (OPENAI_API_KEY missing).",
)

# ------------------------------------------------------------------
# 1. Check result cache (keyed on all search params)
# ------------------------------------------------------------------
cache_key = (
f"search:semantic:{hash(q) & 0xFFFF_FFFF}"
f":lim{limit}:sw{semantic_weight}:av{int(available_only)}"
)
if cache.redis:
cached = await cache.get(cache_key)
if cached is not None:
return SemanticSearchResponse(**cached)

# ------------------------------------------------------------------
# 2. Embed the query
# ------------------------------------------------------------------
try:
query_vector = await get_query_embedding(q)
except RuntimeError as exc:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=str(exc),
)

# ------------------------------------------------------------------
# 3. Cosine-distance search via pgvector (<=> operator)
# Fetch 3x the requested limit so re-ranking has enough candidates.
# ------------------------------------------------------------------
cosine_dist = Artisan.embedding.op("<=>")(query_vector)

stmt = (
select(Artisan, cosine_dist.label("cosine_distance"))
.where(Artisan.embedding.isnot(None))
)
if available_only:
stmt = stmt.where(Artisan.is_available.is_(True))

stmt = stmt.order_by(cosine_dist).limit(limit * 3)

rows = db.execute(stmt).all()

if not rows:
return SemanticSearchResponse(query=q, results=[], total=0)

# ------------------------------------------------------------------
# 4. Hybrid re-ranking
# semantic_similarity = 1 - cosine_distance (OpenAI vecs are normalised)
# reputation_weight = rating / 5.0
# hybrid_score = w * sim + (1-w) * rep
# ------------------------------------------------------------------
reputation_weight_factor = 1.0 - semantic_weight
scored: list[tuple[Artisan, float, float, float]] = []

for artisan, cosine_distance in rows:
semantic_sim = max(0.0, 1.0 - float(cosine_distance))
rep = float(artisan.rating) / 5.0 if artisan.rating else 0.0
hybrid = semantic_weight * semantic_sim + reputation_weight_factor * rep
scored.append((artisan, semantic_sim, rep, hybrid))

scored.sort(key=lambda t: t[3], reverse=True)
top = scored[:limit]

results = [
ArtisanSearchResult(
id=a.id,
business_name=a.business_name,
description=a.description,
specialties=a.specialties,
location=a.location,
rating=float(a.rating) if a.rating is not None else None,
total_reviews=a.total_reviews or 0,
is_available=a.is_available,
is_verified=a.is_verified,
semantic_similarity=round(sim, 4),
reputation_weight=round(rep, 4),
hybrid_score=round(hybrid, 4),
)
for a, sim, rep, hybrid in top
]

response = SemanticSearchResponse(query=q, results=results, total=len(results))

# ------------------------------------------------------------------
# 5. Cache the serialised response
# ------------------------------------------------------------------
if cache.redis:
await cache.set(
cache_key,
response.model_dump(),
expire=settings.SEMANTIC_CACHE_TTL,
)

return response
6 changes: 5 additions & 1 deletion backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ def assemble_cors_origins(cls, v: str | list[str]) -> list[str] | str:
STRIPE_SECRET_KEY: str | None = None
STRIPE_PUBLISHABLE_KEY: str | None = None

# Soroban Configuration
# OpenAI
OPENAI_API_KEY: str | None = None
SEMANTIC_CACHE_TTL: int = 300 # seconds to cache semantic search results

# Soroban Configuration
SOROBAN_RPC_URL: str = "https://soroban-testnet.stellar.org"
ESCROW_CONTRACT_ID: str | None = None
REPUTATION_CONTRACT_ID: str | None = None
Expand Down
2 changes: 2 additions & 0 deletions backend/app/models/artisan.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
DECIMAL,
Boolean,
Expand Down Expand Up @@ -31,6 +32,7 @@ class Artisan(Base):
is_available = Column(Boolean, default=True)
rating = Column(DECIMAL(3, 2), default=0.0)
total_reviews = Column(Integer, default=0)
embedding = Column(Vector(1536), nullable=True)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
Expand Down
2 changes: 1 addition & 1 deletion backend/app/schemas/user.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import re
from enum import StrEnum
from enum import Enum, StrEnum

from pydantic import BaseModel, ConfigDict, EmailStr, Field, field_validator

Expand Down
Loading
Loading