diff --git a/backend/.env.example b/backend/.env.example index a42c3ac..d7bd42a 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -24,3 +24,17 @@ SMTP_PASSWORD= # External APIs (for future use) STRIPE_SECRET_KEY= STRIPE_PUBLISHABLE_KEY= + +# Google Calendar OAuth (read-only) - register at https://console.cloud.google.com/ +GOOGLE_CLIENT_ID= +GOOGLE_CLIENT_SECRET= + +# Microsoft Outlook OAuth (read-only) - register at https://portal.azure.com/ +MICROSOFT_CLIENT_ID= +MICROSOFT_CLIENT_SECRET= + +# OAuth callback URL (must match redirect URI registered with OAuth providers) +OAUTH_REDIRECT_URI=http://localhost:8000/api/v1/calendar/callback + +# Calendar sync lookahead window in days +CALENDAR_SYNC_LOOKAHEAD_DAYS=30 diff --git a/backend/README.md b/backend/README.md index 39ccb4b..3dd9800 100644 --- a/backend/README.md +++ b/backend/README.md @@ -7,6 +7,7 @@ A scalable, production-ready FastAPI backend for the Stellarts platform - connec - **FastAPI Framework**: Modern, fast web framework for building APIs - **Modular Architecture**: Clean separation of concerns with organized directory structure - **Database Integration**: PostgreSQL with SQLAlchemy ORM +- **Semantic Search**: pgvector-powered artisan matching with OpenAI embeddings - **Authentication**: JWT-based authentication system - **API Versioning**: Versioned API endpoints for smooth upgrades - **Containerized**: Docker and docker-compose for easy deployment @@ -196,6 +197,7 @@ docker-compose up -d api-prod db - `GET /` - Root endpoint with API information - `GET /api/v1/health` - Health check with database status +- `GET /api/v1/search/semantic` - Semantic artisan search by natural-language query - `GET /docs` - Interactive API documentation ## Security @@ -217,8 +219,15 @@ DATABASE_URL=postgresql://user:pass@host:port/db SECRET_KEY=secure-random-key DEBUG=False BACKEND_CORS_ORIGINS=["https://yourdomain.com"] +OPENAI_API_KEY=your-openai-api-key +SEMANTIC_CACHE_TTL=300 ``` +### Semantic Search Notes + +- The local Docker database uses a pgvector-enabled image (`pgvector/pgvector:pg15`) so vector indexes and similarity operators are available. +- Use `GET /api/v1/search/semantic?q=historic%20restoration` to test natural-language ranking. + ### Production Checklist - [ ] Set strong `SECRET_KEY` diff --git a/backend/alembic/env.py b/backend/alembic/env.py index ce20242..815b0b2 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -17,7 +17,7 @@ from app.models.booking import Booking from app.models.payment import Payment from app.models.review import Review -from app.models.portfolio import PortfolioItem +from app.models.portfolio import Portfolio # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/backend/alembic/versions/002_add_calendar_tables.py b/backend/alembic/versions/002_add_calendar_tables.py new file mode 100644 index 0000000..50275fd --- /dev/null +++ b/backend/alembic/versions/002_add_calendar_tables.py @@ -0,0 +1,91 @@ +"""Add artisan_calendar_tokens and calendar_events tables + +Revision ID: 002_add_calendar_tables +Revises: 001_pgvector_embedding +Create Date: 2026-03-28 +""" +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +revision = "002_add_calendar_tables" +down_revision = "001_pgvector_embedding" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + bind = op.get_bind() + inspector = sa.inspect(bind) + existing_tables = inspector.get_table_names() + + # ── artisan_calendar_tokens ─────────────────────────────────────────────── + if "artisan_calendar_tokens" not in existing_tables: + op.create_table( + "artisan_calendar_tokens", + sa.Column("id", sa.Integer(), primary_key=True, index=True), + sa.Column( + "artisan_id", + sa.Integer(), + sa.ForeignKey("artisans.id", ondelete="CASCADE"), + nullable=False, + index=True, + ), + sa.Column("provider", sa.String(20), nullable=False), + sa.Column("access_token", sa.Text(), nullable=False), + sa.Column("refresh_token", sa.Text(), nullable=True), + sa.Column("token_expiry", sa.DateTime(timezone=True), nullable=True), + sa.Column("scope", sa.Text(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + onupdate=sa.func.now(), + ), + ) + + # ── calendar_events ─────────────────────────────────────────────────────── + if "calendar_events" not in existing_tables: + op.create_table( + "calendar_events", + sa.Column("id", sa.Uuid(), primary_key=True, index=True), + sa.Column( + "artisan_id", + sa.Integer(), + sa.ForeignKey("artisans.id", ondelete="CASCADE"), + nullable=False, + index=True, + ), + sa.Column("provider", sa.String(20), nullable=False), + sa.Column("external_id", sa.String(500), nullable=False), + sa.Column("title", sa.String(500), nullable=True), + sa.Column("start_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("end_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("location", sa.Text(), nullable=True), + sa.Column("latitude", sa.String(20), nullable=True), + sa.Column("longitude", sa.String(20), nullable=True), + sa.Column("is_busy", sa.Boolean(), default=True), + sa.Column( + "synced_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + ), + ) + + +def downgrade() -> None: + bind = op.get_bind() + inspector = sa.inspect(bind) + existing_tables = inspector.get_table_names() + + if "calendar_events" in existing_tables: + op.drop_table("calendar_events") + + if "artisan_calendar_tokens" in existing_tables: + op.drop_table("artisan_calendar_tokens") diff --git a/backend/app/api/v1/api.py b/backend/app/api/v1/api.py index 7f241a7..c60fae4 100644 --- a/backend/app/api/v1/api.py +++ b/backend/app/api/v1/api.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from app.api.v1.endpoints import admin, artisan, auth, booking, health, payments, user +from app.api.v1.endpoints import admin, artisan, auth, booking, calendar, health, payments, scheduling, search, stats, user api_router = APIRouter() @@ -12,4 +12,7 @@ api_router.include_router(artisan.router, tags=["artisans"]) api_router.include_router(admin.router, tags=["admin"]) api_router.include_router(payments.router, prefix="/payments", tags=["payments"]) -api_router.include_router(stats.router, tags=["stats"]) \ No newline at end of file +api_router.include_router(stats.router, tags=["stats"]) +api_router.include_router(search.router, tags=["search"]) +api_router.include_router(calendar.router, tags=["calendar"]) +api_router.include_router(scheduling.router, tags=["scheduling"]) \ No newline at end of file diff --git a/backend/app/api/v1/endpoints/calendar.py b/backend/app/api/v1/endpoints/calendar.py new file mode 100644 index 0000000..a0996bc --- /dev/null +++ b/backend/app/api/v1/endpoints/calendar.py @@ -0,0 +1,192 @@ +""" +Calendar OAuth API endpoints. + +Artisan-only routes for connecting/disconnecting Google Calendar and +Microsoft Outlook (read-only) and listing synced events. +""" +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Query, status +from fastapi.responses import RedirectResponse +from sqlalchemy.orm import Session + +from app.core.auth import require_artisan, get_current_active_user +from app.core.config import settings +from app.db.session import get_db +from app.models.artisan import Artisan +from app.models.calendar import CalendarEvent +from app.models.user import User +from app.schemas.calendar import ( + CalendarConnectRequest, + CalendarConnectResponse, + CalendarEventResponse, + CalendarStatusResponse, +) +from app.services.calendar_oauth import calendar_oauth_service + +router = APIRouter(prefix="/calendar") + +SUPPORTED_PROVIDERS = {"google", "microsoft"} + + +def _get_artisan_or_404(db: Session, current_user: User) -> Artisan: + artisan = db.query(Artisan).filter(Artisan.user_id == current_user.id).first() + if not artisan: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Artisan profile not found for current user", + ) + return artisan + + +@router.post("/connect", response_model=CalendarConnectResponse) +def connect_calendar( + body: CalendarConnectRequest, + db: Session = Depends(get_db), + current_user: User = Depends(require_artisan), +): + """ + Initiate read-only OAuth flow for Google Calendar or Microsoft Outlook. + + Returns the provider authorization URL. The client must redirect the user + to this URL to grant calendar read access. + """ + if body.provider not in SUPPORTED_PROVIDERS: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=f"Unsupported provider '{body.provider}'. Choose: {sorted(SUPPORTED_PROVIDERS)}", + ) + + artisan = _get_artisan_or_404(db, current_user) + + try: + if body.provider == "google": + auth_url = calendar_oauth_service.get_google_auth_url(artisan.id) + else: + auth_url = calendar_oauth_service.get_microsoft_auth_url(artisan.id) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=str(exc), + ) from exc + + return CalendarConnectResponse(auth_url=auth_url, provider=body.provider) + + +@router.get("/callback") +def calendar_callback( + code: str = Query(...), + state: str = Query(...), + provider: str = Query(default="google"), + error: str | None = Query(default=None), + db: Session = Depends(get_db), +): + """ + OAuth callback endpoint. + + The OAuth provider redirects here after the user grants access. + The ``state`` parameter encodes ``{provider}:{artisan_id}``. + On success, stores the token and redirects to the frontend dashboard. + """ + if error: + redirect_url = f"{settings.FRONTEND_URL}/dashboard?calendar_error={error}" + return RedirectResponse(url=redirect_url) + + # state = "provider:artisan_id" or just "artisan_id" (Google flow) + # We accept both formats for robustness. + resolved_provider = provider + artisan_id_str = state + if ":" in state: + parts = state.split(":", 1) + resolved_provider = parts[0] + artisan_id_str = parts[1] + + try: + artisan_id = int(artisan_id_str) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid state parameter") from None + + if resolved_provider not in SUPPORTED_PROVIDERS: + raise HTTPException(status_code=400, detail=f"Unknown provider: {resolved_provider}") + + try: + if resolved_provider == "google": + calendar_oauth_service.exchange_google_code(code, artisan_id, db) + else: + calendar_oauth_service.exchange_microsoft_code(code, artisan_id, db) + except Exception as exc: + logger_msg = str(exc) + redirect_url = ( + f"{settings.FRONTEND_URL}/dashboard" + f"?calendar_error=token_exchange_failed&detail={logger_msg[:80]}" + ) + return RedirectResponse(url=redirect_url) + + redirect_url = f"{settings.FRONTEND_URL}/dashboard?calendar_connected={resolved_provider}" + return RedirectResponse(url=redirect_url) + + +@router.delete("/disconnect") +def disconnect_calendar( + provider: str = Query(..., description="Provider to disconnect: 'google' or 'microsoft'"), + db: Session = Depends(get_db), + current_user: User = Depends(require_artisan), +): + """Revoke and delete the stored calendar token for a provider.""" + if provider not in SUPPORTED_PROVIDERS: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=f"Unsupported provider '{provider}'", + ) + + artisan = _get_artisan_or_404(db, current_user) + deleted = calendar_oauth_service.revoke_token(artisan.id, provider, db) + + if not deleted: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"No {provider} calendar connection found", + ) + + return {"message": f"{provider} calendar disconnected successfully"} + + +@router.get("/events", response_model=list[CalendarEventResponse]) +def list_calendar_events( + db: Session = Depends(get_db), + current_user: User = Depends(require_artisan), + skip: int = 0, + limit: int = 50, +): + """List synced calendar events for the current artisan (cached locally).""" + artisan = _get_artisan_or_404(db, current_user) + + events = ( + db.query(CalendarEvent) + .filter(CalendarEvent.artisan_id == artisan.id) + .order_by(CalendarEvent.start_time.asc()) + .offset(skip) + .limit(limit) + .all() + ) + return events + + +@router.get("/status", response_model=CalendarStatusResponse) +def calendar_status( + db: Session = Depends(get_db), + current_user: User = Depends(require_artisan), +): + """Check which calendar providers are connected and local event count.""" + artisan = _get_artisan_or_404(db, current_user) + providers = calendar_oauth_service.get_connected_providers(artisan.id, db) + event_count = ( + db.query(CalendarEvent) + .filter(CalendarEvent.artisan_id == artisan.id) + .count() + ) + return CalendarStatusResponse( + connected=len(providers) > 0, + providers=providers, + event_count=event_count, + ) diff --git a/backend/app/api/v1/endpoints/scheduling.py b/backend/app/api/v1/endpoints/scheduling.py new file mode 100644 index 0000000..7c0f5e7 --- /dev/null +++ b/backend/app/api/v1/endpoints/scheduling.py @@ -0,0 +1,53 @@ +""" +Scheduling / slot-suggestion API endpoint. + +Open to both clients (requesting a service) and artisans (checking their +own schedule). Results are derived from the artisan's synced calendar +events and existing DB bookings. +""" +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session + +from app.core.auth import require_client_or_artisan +from app.db.session import get_db +from app.models.artisan import Artisan +from app.models.user import User +from app.schemas.scheduling import SlotSuggestionRequest, SlotSuggestionResponse +from app.services.scheduling import scheduling_service + +router = APIRouter(prefix="/scheduling") + + +@router.post("/suggest", response_model=SlotSuggestionResponse) +async def suggest_slots( + request: SlotSuggestionRequest, + db: Session = Depends(get_db), + current_user: User = Depends(require_client_or_artisan), +): + """ + Suggest optimal booking time slots for an artisan on a given day. + + The engine: + - Syncs the artisan's calendar (if connected via OAuth) before computing. + - Filters out any slots that overlap existing calendar events or confirmed + bookings (double-booking prevention). + - Scores remaining slots by travel distance from the artisan's last known + job location (geographic grouping). + + Returns slots sorted best-first (lowest travel = highest score). + """ + artisan = db.query(Artisan).filter(Artisan.id == request.artisan_id).first() + if not artisan: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Artisan {request.artisan_id} not found", + ) + + result = await scheduling_service.suggest_slots( + request=request, + artisan=artisan, + db=db, + ) + return result diff --git a/backend/app/api/v1/endpoints/search.py b/backend/app/api/v1/endpoints/search.py new file mode 100644 index 0000000..ddbfb8b --- /dev/null +++ b/backend/app/api/v1/endpoints/search.py @@ -0,0 +1,176 @@ +""" +Semantic vector search endpoint for the StellArts artisan marketplace. + +GET /search/semantic + - Converts a natural-language query into an OpenAI embedding. + - Performs a pgvector cosine-distance search against artisan embeddings. + - Re-ranks results with a hybrid score: + hybrid = semantic_weight * semantic_similarity + + (1 - semantic_weight) * reputation_weight + where reputation_weight = artisan.rating / 5.0 +""" +import logging +from typing import Annotated + +from fastapi import APIRouter, Depends, HTTPException, Query, status +from pydantic import BaseModel, ConfigDict +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.cache import cache +from app.core.config import settings +from app.db.session import get_db +from app.models.artisan import Artisan +from app.services.embedding import get_query_embedding + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/search", tags=["search"]) + +# --------------------------------------------------------------------------- +# Response schemas +# --------------------------------------------------------------------------- + +class ArtisanSearchResult(BaseModel): + model_config = ConfigDict(from_attributes=True) + + id: int + business_name: str | None + description: str | None + specialties: str | None # raw JSON string; clients parse as needed + location: str | None + rating: float | None + total_reviews: int + is_available: bool + is_verified: bool + # Scores (higher is better, range 0-1) + semantic_similarity: float + reputation_weight: float + hybrid_score: float + + +class SemanticSearchResponse(BaseModel): + query: str + results: list[ArtisanSearchResult] + total: int + + +# --------------------------------------------------------------------------- +# Endpoint +# --------------------------------------------------------------------------- + +@router.get( + "/semantic", + response_model=SemanticSearchResponse, + summary="Semantic artisan search", + description=( + "Find artisans using natural-language queries. " + "Results are ranked by a hybrid of semantic similarity and on-chain " + "reputation weight." + ), +) +async def semantic_search( + q: Annotated[str, Query(min_length=2, max_length=500, description="Natural-language search query")], + limit: Annotated[int, Query(ge=1, le=50)] = 10, + semantic_weight: Annotated[float, Query(ge=0.0, le=1.0, description="Weight for semantic similarity (0-1); remainder goes to reputation)")] = 0.7, + available_only: Annotated[bool, Query(description="When true, only return artisans who are currently available")] = False, + db: Session = Depends(get_db), +) -> SemanticSearchResponse: + if not settings.OPENAI_API_KEY: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Semantic search is not configured (OPENAI_API_KEY missing).", + ) + + # ------------------------------------------------------------------ + # 1. Check result cache (keyed on all search params) + # ------------------------------------------------------------------ + cache_key = ( + f"search:semantic:{hash(q) & 0xFFFF_FFFF}" + f":lim{limit}:sw{semantic_weight}:av{int(available_only)}" + ) + if cache.redis: + cached = await cache.get(cache_key) + if cached is not None: + return SemanticSearchResponse(**cached) + + # ------------------------------------------------------------------ + # 2. Embed the query + # ------------------------------------------------------------------ + try: + query_vector = await get_query_embedding(q) + except RuntimeError as exc: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=str(exc), + ) + + # ------------------------------------------------------------------ + # 3. Cosine-distance search via pgvector (<=> operator) + # Fetch 3x the requested limit so re-ranking has enough candidates. + # ------------------------------------------------------------------ + cosine_dist = Artisan.embedding.op("<=>")(query_vector) + + stmt = ( + select(Artisan, cosine_dist.label("cosine_distance")) + .where(Artisan.embedding.isnot(None)) + ) + if available_only: + stmt = stmt.where(Artisan.is_available.is_(True)) + + stmt = stmt.order_by(cosine_dist).limit(limit * 3) + + rows = db.execute(stmt).all() + + if not rows: + return SemanticSearchResponse(query=q, results=[], total=0) + + # ------------------------------------------------------------------ + # 4. Hybrid re-ranking + # semantic_similarity = 1 - cosine_distance (OpenAI vecs are normalised) + # reputation_weight = rating / 5.0 + # hybrid_score = w * sim + (1-w) * rep + # ------------------------------------------------------------------ + reputation_weight_factor = 1.0 - semantic_weight + scored: list[tuple[Artisan, float, float, float]] = [] + + for artisan, cosine_distance in rows: + semantic_sim = max(0.0, 1.0 - float(cosine_distance)) + rep = float(artisan.rating) / 5.0 if artisan.rating else 0.0 + hybrid = semantic_weight * semantic_sim + reputation_weight_factor * rep + scored.append((artisan, semantic_sim, rep, hybrid)) + + scored.sort(key=lambda t: t[3], reverse=True) + top = scored[:limit] + + results = [ + ArtisanSearchResult( + id=a.id, + business_name=a.business_name, + description=a.description, + specialties=a.specialties, + location=a.location, + rating=float(a.rating) if a.rating is not None else None, + total_reviews=a.total_reviews or 0, + is_available=a.is_available, + is_verified=a.is_verified, + semantic_similarity=round(sim, 4), + reputation_weight=round(rep, 4), + hybrid_score=round(hybrid, 4), + ) + for a, sim, rep, hybrid in top + ] + + response = SemanticSearchResponse(query=q, results=results, total=len(results)) + + # ------------------------------------------------------------------ + # 5. Cache the serialised response + # ------------------------------------------------------------------ + if cache.redis: + await cache.set( + cache_key, + response.model_dump(), + expire=settings.SEMANTIC_CACHE_TTL, + ) + + return response diff --git a/backend/app/core/config.py b/backend/app/core/config.py index a0c92e0..432f0d9 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -55,7 +55,25 @@ def assemble_cors_origins(cls, v: str | list[str]) -> list[str] | str: STRIPE_SECRET_KEY: str | None = None STRIPE_PUBLISHABLE_KEY: str | None = None - # Soroban Configuration + # Google Calendar OAuth (read-only) + GOOGLE_CLIENT_ID: str | None = None + GOOGLE_CLIENT_SECRET: str | None = None + + # Microsoft Outlook OAuth (read-only) + MICROSOFT_CLIENT_ID: str | None = None + MICROSOFT_CLIENT_SECRET: str | None = None + + # OAuth shared settings + OAUTH_REDIRECT_URI: str = "http://localhost:8000/api/v1/calendar/callback" + + # Calendar sync – how many days ahead to look for events + CALENDAR_SYNC_LOOKAHEAD_DAYS: int = 30 + + # OpenAI + OPENAI_API_KEY: str | None = None + SEMANTIC_CACHE_TTL: int = 300 # seconds to cache semantic search results + + # Soroban Configuration SOROBAN_RPC_URL: str = "https://soroban-testnet.stellar.org" ESCROW_CONTRACT_ID: str | None = None REPUTATION_CONTRACT_ID: str | None = None diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index fd5dcbe..2fc34df 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -1,5 +1,8 @@ # Import database components from app.db.base import Base +# Import models so Alembic/Base.metadata discovers them +from app.models import calendar as _calendar_models # noqa: F401 + # Re-export for convenience __all__ = ["Base"] diff --git a/backend/app/models/artisan.py b/backend/app/models/artisan.py index e60fbd5..494b58d 100644 --- a/backend/app/models/artisan.py +++ b/backend/app/models/artisan.py @@ -1,3 +1,4 @@ +from pgvector.sqlalchemy import Vector from sqlalchemy import ( DECIMAL, Boolean, @@ -31,6 +32,7 @@ class Artisan(Base): is_available = Column(Boolean, default=True) rating = Column(DECIMAL(3, 2), default=0.0) total_reviews = Column(Integer, default=0) + embedding = Column(Vector(1536), nullable=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now() diff --git a/backend/app/models/calendar.py b/backend/app/models/calendar.py new file mode 100644 index 0000000..62f1117 --- /dev/null +++ b/backend/app/models/calendar.py @@ -0,0 +1,60 @@ +""" +SQLAlchemy models for calendar integration. + +ArtisanCalendarToken – stores OAuth tokens per artisan per provider. +CalendarEvent – local read-only cache of synced calendar events. +""" +from __future__ import annotations + +import uuid + +from sqlalchemy import ( + Boolean, + Column, + DateTime, + ForeignKey, + Integer, + String, + Text, + Uuid, +) +from sqlalchemy.sql import func + +from app.db.base import Base + + +class ArtisanCalendarToken(Base): + """Stores OAuth refresh/access tokens for a connected calendar provider.""" + + __tablename__ = "artisan_calendar_tokens" + + id = Column(Integer, primary_key=True, index=True) + artisan_id = Column(Integer, ForeignKey("artisans.id"), nullable=False, index=True) + provider = Column(String(20), nullable=False) # 'google' | 'microsoft' + access_token = Column(Text, nullable=False) + refresh_token = Column(Text, nullable=True) + token_expiry = Column(DateTime(timezone=True), nullable=True) + scope = Column(Text, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + +class CalendarEvent(Base): + """Read-only local cache of calendar events fetched from connected providers.""" + + __tablename__ = "calendar_events" + + id = Column(Uuid, primary_key=True, default=uuid.uuid4, index=True) + artisan_id = Column(Integer, ForeignKey("artisans.id"), nullable=False, index=True) + provider = Column(String(20), nullable=False) # 'google' | 'microsoft' + external_id = Column(String(500), nullable=False) # provider's event id + title = Column(String(500), nullable=True) + start_time = Column(DateTime(timezone=True), nullable=False) + end_time = Column(DateTime(timezone=True), nullable=False) + location = Column(Text, nullable=True) + latitude = Column(String(20), nullable=True) # stored as string for portability + longitude = Column(String(20), nullable=True) + is_busy = Column(Boolean, default=True) # free vs busy events + synced_at = Column(DateTime(timezone=True), server_default=func.now()) diff --git a/backend/app/schemas/calendar.py b/backend/app/schemas/calendar.py new file mode 100644 index 0000000..a1fa22b --- /dev/null +++ b/backend/app/schemas/calendar.py @@ -0,0 +1,57 @@ +"""Pydantic schemas for calendar OAuth and event endpoints.""" +from __future__ import annotations + +from datetime import datetime +from uuid import UUID + +from pydantic import BaseModel, ConfigDict, Field + + +class CalendarConnectRequest(BaseModel): + """Request to initiate an OAuth connection.""" + + provider: str = Field( + ..., description="Calendar provider: 'google' or 'microsoft'" + ) + + +class CalendarConnectResponse(BaseModel): + """Response containing the provider's OAuth authorization URL.""" + + auth_url: str = Field(..., description="Redirect the user to this URL to authorize") + provider: str + + +class CalendarCallbackQuery(BaseModel): + """Query parameters returned by the OAuth provider callback.""" + + code: str + state: str # artisan_id encoded as string + error: str | None = None + + +class CalendarEventResponse(BaseModel): + """A single cached calendar event.""" + + model_config = ConfigDict(from_attributes=True) + + id: UUID + artisan_id: int + provider: str + external_id: str + title: str | None + start_time: datetime + end_time: datetime + location: str | None + latitude: str | None + longitude: str | None + is_busy: bool + synced_at: datetime + + +class CalendarStatusResponse(BaseModel): + """Current calendar connection status for the artisan.""" + + connected: bool + providers: list[str] = Field(default_factory=list) + event_count: int = 0 diff --git a/backend/app/schemas/scheduling.py b/backend/app/schemas/scheduling.py new file mode 100644 index 0000000..429d882 --- /dev/null +++ b/backend/app/schemas/scheduling.py @@ -0,0 +1,47 @@ +"""Pydantic schemas for the scheduling / slot-suggestion endpoint.""" +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, Field + + +class SlotSuggestionRequest(BaseModel): + """Parameters needed to suggest optimal booking slots for an artisan.""" + + artisan_id: int = Field(..., description="ID of the artisan to schedule") + client_lat: float = Field(..., description="Client job latitude") + client_lon: float = Field(..., description="Client job longitude") + service_duration_hours: float = Field( + ..., gt=0, description="Estimated service duration in hours" + ) + preferred_date: datetime = Field( + ..., description="Preferred date (time component ignored – full day is searched)" + ) + + +class SlotSuggestion(BaseModel): + """A single ranked slot suggestion.""" + + start_time: datetime + end_time: datetime + travel_time_minutes: float = Field( + ..., description="Estimated travel time from artisan's prior location" + ) + travel_km: float = Field( + ..., description="Estimated travel distance in km from artisan's prior location" + ) + score: float = Field( + ..., description="Scheduling score – higher is better (less travel waste)" + ) + prior_job_location: str | None = Field( + None, description="Description of where the artisan is coming from" + ) + + +class SlotSuggestionResponse(BaseModel): + """Response containing ranked slot suggestions.""" + + artisan_id: int + preferred_date: datetime + suggestions: list[SlotSuggestion] diff --git a/backend/app/schemas/user.py b/backend/app/schemas/user.py index 4b443f4..eef67ba 100644 --- a/backend/app/schemas/user.py +++ b/backend/app/schemas/user.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from enum import StrEnum +from enum import Enum, StrEnum from pydantic import BaseModel, ConfigDict, EmailStr, Field, field_validator diff --git a/backend/app/services/calendar_oauth.py b/backend/app/services/calendar_oauth.py new file mode 100644 index 0000000..80cdcd9 --- /dev/null +++ b/backend/app/services/calendar_oauth.py @@ -0,0 +1,320 @@ +""" +Calendar OAuth service. + +Handles the read-only OAuth 2.0 flow for Google Calendar and Microsoft Outlook. +Tokens are stored in the ``artisan_calendar_tokens`` table. +""" +from __future__ import annotations + +import logging +from datetime import datetime, timezone + +from sqlalchemy.orm import Session + +from app.core.config import settings +from app.models.calendar import ArtisanCalendarToken + +logger = logging.getLogger(__name__) + +GOOGLE_SCOPES = [ + "https://www.googleapis.com/auth/calendar.readonly", + "https://www.googleapis.com/auth/calendar.events.readonly", +] + +MICROSOFT_SCOPES = [ + "https://graph.microsoft.com/Calendars.Read", + "offline_access", +] + + +class CalendarOAuthService: + """Manages OAuth token lifecycle for calendar providers.""" + + # ------------------------------------------------------------------ + # Google + # ------------------------------------------------------------------ + + def get_google_auth_url(self, artisan_id: int) -> str: + """Return Google OAuth2 authorization URL.""" + from google_auth_oauthlib.flow import Flow # type: ignore[import-untyped] + + if not settings.GOOGLE_CLIENT_ID or not settings.GOOGLE_CLIENT_SECRET: + raise ValueError("Google OAuth credentials not configured") + + client_config = { + "web": { + "client_id": settings.GOOGLE_CLIENT_ID, + "client_secret": settings.GOOGLE_CLIENT_SECRET, + "redirect_uris": [settings.OAUTH_REDIRECT_URI], + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + } + } + + flow = Flow.from_client_config( + client_config, + scopes=GOOGLE_SCOPES, + redirect_uri=settings.OAUTH_REDIRECT_URI, + ) + + auth_url, _ = flow.authorization_url( + access_type="offline", + include_granted_scopes="true", + state=str(artisan_id), + prompt="consent", + ) + return auth_url + + def exchange_google_code(self, code: str, artisan_id: int, db: Session) -> ArtisanCalendarToken: + """Exchange Google auth code for tokens and persist them.""" + from google_auth_oauthlib.flow import Flow # type: ignore[import-untyped] + + if not settings.GOOGLE_CLIENT_ID or not settings.GOOGLE_CLIENT_SECRET: + raise ValueError("Google OAuth credentials not configured") + + client_config = { + "web": { + "client_id": settings.GOOGLE_CLIENT_ID, + "client_secret": settings.GOOGLE_CLIENT_SECRET, + "redirect_uris": [settings.OAUTH_REDIRECT_URI], + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + } + } + + flow = Flow.from_client_config( + client_config, + scopes=GOOGLE_SCOPES, + redirect_uri=settings.OAUTH_REDIRECT_URI, + ) + flow.fetch_token(code=code) + credentials = flow.credentials + + expiry_dt = credentials.expiry # already a datetime or None + if expiry_dt and expiry_dt.tzinfo is None: + expiry_dt = expiry_dt.replace(tzinfo=timezone.utc) + + return self._upsert_token( + db=db, + artisan_id=artisan_id, + provider="google", + access_token=credentials.token, + refresh_token=credentials.refresh_token, + token_expiry=expiry_dt, + scope=" ".join(credentials.scopes or []), + ) + + def refresh_google_token(self, token_row: ArtisanCalendarToken, db: Session) -> str: + """Refresh an expired Google access token in-place and return new access token.""" + from google.auth.transport.requests import Request # type: ignore[import-untyped] + from google.oauth2.credentials import Credentials # type: ignore[import-untyped] + + if not settings.GOOGLE_CLIENT_ID or not settings.GOOGLE_CLIENT_SECRET: + raise ValueError("Google OAuth credentials not configured") + + creds = Credentials( + token=token_row.access_token, + refresh_token=token_row.refresh_token, + token_uri="https://oauth2.googleapis.com/token", + client_id=settings.GOOGLE_CLIENT_ID, + client_secret=settings.GOOGLE_CLIENT_SECRET, + ) + creds.refresh(Request()) + + expiry_dt = creds.expiry + if expiry_dt and expiry_dt.tzinfo is None: + expiry_dt = expiry_dt.replace(tzinfo=timezone.utc) + + token_row.access_token = creds.token + token_row.token_expiry = expiry_dt + db.commit() + return creds.token + + # ------------------------------------------------------------------ + # Microsoft + # ------------------------------------------------------------------ + + def get_microsoft_auth_url(self, artisan_id: int) -> str: + """Return Microsoft OAuth2 authorization URL via MSAL.""" + import msal # type: ignore[import-untyped] + + if not settings.MICROSOFT_CLIENT_ID or not settings.MICROSOFT_CLIENT_SECRET: + raise ValueError("Microsoft OAuth credentials not configured") + + app = msal.ConfidentialClientApplication( + settings.MICROSOFT_CLIENT_ID, + authority="https://login.microsoftonline.com/common", + client_credential=settings.MICROSOFT_CLIENT_SECRET, + ) + auth_url = app.get_authorization_request_url( + scopes=MICROSOFT_SCOPES, + redirect_uri=settings.OAUTH_REDIRECT_URI, + state=str(artisan_id), + ) + return auth_url + + def exchange_microsoft_code(self, code: str, artisan_id: int, db: Session) -> ArtisanCalendarToken: + """Exchange Microsoft auth code for tokens and persist them.""" + import msal # type: ignore[import-untyped] + + if not settings.MICROSOFT_CLIENT_ID or not settings.MICROSOFT_CLIENT_SECRET: + raise ValueError("Microsoft OAuth credentials not configured") + + msal_app = msal.ConfidentialClientApplication( + settings.MICROSOFT_CLIENT_ID, + authority="https://login.microsoftonline.com/common", + client_credential=settings.MICROSOFT_CLIENT_SECRET, + ) + result = msal_app.acquire_token_by_authorization_code( + code=code, + scopes=MICROSOFT_SCOPES, + redirect_uri=settings.OAUTH_REDIRECT_URI, + ) + if "error" in result: + raise ValueError(f"Microsoft token exchange failed: {result.get('error_description')}") + + access_token: str = result["access_token"] + refresh_token: str | None = result.get("refresh_token") + + expires_in: int = result.get("expires_in", 3600) + expiry_dt = datetime.now(timezone.utc).replace( + second=datetime.now(timezone.utc).second + expires_in + ) + + return self._upsert_token( + db=db, + artisan_id=artisan_id, + provider="microsoft", + access_token=access_token, + refresh_token=refresh_token, + token_expiry=expiry_dt, + scope=" ".join(MICROSOFT_SCOPES), + ) + + def refresh_microsoft_token(self, token_row: ArtisanCalendarToken, db: Session) -> str: + """Refresh a Microsoft access token using the stored refresh token.""" + import msal # type: ignore[import-untyped] + + if not settings.MICROSOFT_CLIENT_ID or not settings.MICROSOFT_CLIENT_SECRET: + raise ValueError("Microsoft OAuth credentials not configured") + + msal_app = msal.ConfidentialClientApplication( + settings.MICROSOFT_CLIENT_ID, + authority="https://login.microsoftonline.com/common", + client_credential=settings.MICROSOFT_CLIENT_SECRET, + ) + result = msal_app.acquire_token_by_refresh_token( + token_row.refresh_token, + scopes=MICROSOFT_SCOPES, + ) + if "error" in result: + raise ValueError(f"Microsoft token refresh failed: {result.get('error_description')}") + + access_token: str = result["access_token"] + token_row.access_token = access_token + db.commit() + return access_token + + # ------------------------------------------------------------------ + # Shared helpers + # ------------------------------------------------------------------ + + def get_valid_access_token( + self, artisan_id: int, provider: str, db: Session + ) -> str | None: + """Return a valid (refreshed if needed) access token or None if not connected.""" + token_row = ( + db.query(ArtisanCalendarToken) + .filter( + ArtisanCalendarToken.artisan_id == artisan_id, + ArtisanCalendarToken.provider == provider, + ) + .first() + ) + if not token_row: + return None + + now = datetime.now(timezone.utc) + is_expired = ( + token_row.token_expiry is not None + and token_row.token_expiry.replace(tzinfo=timezone.utc) <= now + ) + + if is_expired and token_row.refresh_token: + try: + if provider == "google": + return self.refresh_google_token(token_row, db) + elif provider == "microsoft": + return self.refresh_microsoft_token(token_row, db) + except Exception as exc: + logger.warning("Token refresh failed for artisan %s / %s: %s", artisan_id, provider, exc) + return None + + return token_row.access_token + + def revoke_token(self, artisan_id: int, provider: str, db: Session) -> bool: + """Delete the stored token for a provider (disconnect).""" + rows_deleted = ( + db.query(ArtisanCalendarToken) + .filter( + ArtisanCalendarToken.artisan_id == artisan_id, + ArtisanCalendarToken.provider == provider, + ) + .delete() + ) + db.commit() + return rows_deleted > 0 + + def get_connected_providers(self, artisan_id: int, db: Session) -> list[str]: + """Return list of providers the artisan has connected ('google', 'microsoft').""" + rows = ( + db.query(ArtisanCalendarToken.provider) + .filter(ArtisanCalendarToken.artisan_id == artisan_id) + .all() + ) + return [r.provider for r in rows] + + def _upsert_token( + self, + db: Session, + artisan_id: int, + provider: str, + access_token: str, + refresh_token: str | None, + token_expiry: datetime | None, + scope: str | None, + ) -> ArtisanCalendarToken: + existing = ( + db.query(ArtisanCalendarToken) + .filter( + ArtisanCalendarToken.artisan_id == artisan_id, + ArtisanCalendarToken.provider == provider, + ) + .first() + ) + if existing: + existing.access_token = access_token + if refresh_token: + existing.refresh_token = refresh_token + existing.token_expiry = token_expiry + existing.scope = scope + db.commit() + db.refresh(existing) + return existing + + token_row = ArtisanCalendarToken( + artisan_id=artisan_id, + provider=provider, + access_token=access_token, + refresh_token=refresh_token, + token_expiry=token_expiry, + scope=scope, + ) + db.add(token_row) + db.commit() + db.refresh(token_row) + return token_row + + +# Module-level singleton +calendar_oauth_service = CalendarOAuthService() diff --git a/backend/app/services/calendar_sync.py b/backend/app/services/calendar_sync.py new file mode 100644 index 0000000..35609ab --- /dev/null +++ b/backend/app/services/calendar_sync.py @@ -0,0 +1,256 @@ +""" +Calendar sync service. + +Fetches calendar events from connected providers (Google / Microsoft) and +caches them locally in the ``calendar_events`` table. Only free/busy +information is stored – the full event content is never persisted. +""" +from __future__ import annotations + +import logging +import uuid +from datetime import datetime, timedelta, timezone + +from sqlalchemy.orm import Session + +from app.core.config import settings +from app.models.calendar import ArtisanCalendarToken, CalendarEvent +from app.services.calendar_oauth import calendar_oauth_service +from app.services.geolocation import geolocation_service + +logger = logging.getLogger(__name__) + + +class CalendarSyncService: + """Syncs artisan calendar events from connected providers.""" + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def sync_artisan_events(self, artisan_id: int, db: Session) -> int: + """ + Fetch events from all connected providers for the next + ``CALENDAR_SYNC_LOOKAHEAD_DAYS`` days and upsert them into + ``calendar_events``. + + Returns the total number of events upserted. + """ + providers = calendar_oauth_service.get_connected_providers(artisan_id, db) + total = 0 + for provider in providers: + access_token = calendar_oauth_service.get_valid_access_token( + artisan_id, provider, db + ) + if not access_token: + logger.warning("No valid token for artisan %s / %s", artisan_id, provider) + continue + try: + if provider == "google": + events = await self._fetch_google_events(access_token) + elif provider == "microsoft": + events = await self._fetch_microsoft_events(access_token) + else: + continue + total += await self._upsert_events(artisan_id, provider, events, db) + except Exception as exc: + logger.exception("Error syncing %s events for artisan %s: %s", provider, artisan_id, exc) + return total + + def get_busy_slots( + self, artisan_id: int, date: datetime, db: Session + ) -> list[tuple[datetime, datetime]]: + """ + Return a list of (start, end) busy windows for the given calendar date. + Only returns ``is_busy=True`` events. + """ + day_start = date.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) + day_end = day_start + timedelta(days=1) + + events = ( + db.query(CalendarEvent) + .filter( + CalendarEvent.artisan_id == artisan_id, + CalendarEvent.is_busy.is_(True), + CalendarEvent.start_time >= day_start, + CalendarEvent.start_time < day_end, + ) + .all() + ) + return [(e.start_time.replace(tzinfo=timezone.utc), e.end_time.replace(tzinfo=timezone.utc)) for e in events] + + def get_events_before_time( + self, artisan_id: int, before: datetime, db: Session + ) -> list[CalendarEvent]: + """Return calendar events that end at or before *before*.""" + ts = before.replace(tzinfo=timezone.utc) if before.tzinfo is None else before + return ( + db.query(CalendarEvent) + .filter( + CalendarEvent.artisan_id == artisan_id, + CalendarEvent.end_time <= ts, + ) + .order_by(CalendarEvent.end_time.desc()) + .all() + ) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + async def _fetch_google_events(self, access_token: str) -> list[dict]: + """Call the Google Calendar API and return a normalised list of events.""" + from googleapiclient.discovery import build # type: ignore[import-untyped] + from google.oauth2.credentials import Credentials # type: ignore[import-untyped] + + creds = Credentials(token=access_token) + service = build("calendar", "v3", credentials=creds, cache_discovery=False) + + now = datetime.now(timezone.utc) + time_max = now + timedelta(days=settings.CALENDAR_SYNC_LOOKAHEAD_DAYS) + + events_result = ( + service.events() + .list( + calendarId="primary", + timeMin=now.isoformat(), + timeMax=time_max.isoformat(), + maxResults=250, + singleEvents=True, + orderBy="startTime", + ) + .execute() + ) + + raw_events = events_result.get("items", []) + normalised = [] + for ev in raw_events: + start_str = ev.get("start", {}).get("dateTime") or ev.get("start", {}).get("date") + end_str = ev.get("end", {}).get("dateTime") or ev.get("end", {}).get("date") + if not start_str or not end_str: + continue + normalised.append( + { + "external_id": ev["id"], + "title": ev.get("summary"), + "start_time": self._parse_dt(start_str), + "end_time": self._parse_dt(end_str), + "location": ev.get("location"), + "is_busy": ev.get("transparency", "opaque") != "transparent", + } + ) + return normalised + + async def _fetch_microsoft_events(self, access_token: str) -> list[dict]: + """Call Microsoft Graph and return a normalised list of events.""" + import aiohttp + + now = datetime.now(timezone.utc) + time_max = now + timedelta(days=settings.CALENDAR_SYNC_LOOKAHEAD_DAYS) + + headers = {"Authorization": f"Bearer {access_token}"} + params = { + "$select": "id,subject,start,end,location,showAs", + "startDateTime": now.isoformat(), + "endDateTime": time_max.isoformat(), + "$top": 250, + } + + normalised = [] + async with aiohttp.ClientSession() as session: + async with session.get( + "https://graph.microsoft.com/v1.0/me/calendarView", + headers=headers, + params=params, + ) as resp: + if resp.status != 200: + logger.warning("Microsoft Graph returned %s", resp.status) + return [] + data = await resp.json() + + for ev in data.get("value", []): + start_str = ev.get("start", {}).get("dateTime") + end_str = ev.get("end", {}).get("dateTime") + if not start_str or not end_str: + continue + normalised.append( + { + "external_id": ev["id"], + "title": ev.get("subject"), + "start_time": self._parse_dt(start_str), + "end_time": self._parse_dt(end_str), + "location": ev.get("location", {}).get("displayName"), + "is_busy": ev.get("showAs", "busy") not in ("free", "tentative"), + } + ) + return normalised + + async def _upsert_events( + self, artisan_id: int, provider: str, events: list[dict], db: Session + ) -> int: + count = 0 + for ev_data in events: + existing = ( + db.query(CalendarEvent) + .filter( + CalendarEvent.artisan_id == artisan_id, + CalendarEvent.provider == provider, + CalendarEvent.external_id == ev_data["external_id"], + ) + .first() + ) + + # Geocode the location if present (best-effort, no error on failure) + lat_str = lon_str = None + if ev_data.get("location"): + geo = await geolocation_service.geocode_address(ev_data["location"]) + if geo: + lat_str = str(geo.latitude) + lon_str = str(geo.longitude) + + if existing: + existing.title = ev_data.get("title") + existing.start_time = ev_data["start_time"] + existing.end_time = ev_data["end_time"] + existing.location = ev_data.get("location") + existing.latitude = lat_str + existing.longitude = lon_str + existing.is_busy = ev_data["is_busy"] + existing.synced_at = datetime.now(timezone.utc) + else: + new_event = CalendarEvent( + id=uuid.uuid4(), + artisan_id=artisan_id, + provider=provider, + external_id=ev_data["external_id"], + title=ev_data.get("title"), + start_time=ev_data["start_time"], + end_time=ev_data["end_time"], + location=ev_data.get("location"), + latitude=lat_str, + longitude=lon_str, + is_busy=ev_data["is_busy"], + ) + db.add(new_event) + count += 1 + + db.commit() + return count + + @staticmethod + def _parse_dt(value: str) -> datetime: + """Parse ISO-8601 datetime string (with or without timezone).""" + # Handle date-only strings (all-day events) + if len(value) == 10: + return datetime.fromisoformat(value).replace(tzinfo=timezone.utc) + try: + dt = datetime.fromisoformat(value.replace("Z", "+00:00")) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt + except ValueError: + return datetime.now(timezone.utc) + + +# Module-level singleton +calendar_sync_service = CalendarSyncService() diff --git a/backend/app/services/embedding.py b/backend/app/services/embedding.py new file mode 100644 index 0000000..3bf266d --- /dev/null +++ b/backend/app/services/embedding.py @@ -0,0 +1,162 @@ +""" +Embedding service for StellArts semantic search. + +Responsibilities: +- Build a rich descriptive text for each artisan from their profile and on-chain stats. +- Generate OpenAI text-embedding-3-small embeddings (1536 dims). +- Cache embeddings in Redis to avoid redundant API calls. +""" +import json +import logging +from typing import Optional + +import openai + +from app.core.cache import cache +from app.core.config import settings +from app.models.artisan import Artisan + +logger = logging.getLogger(__name__) + +EMBEDDING_MODEL = "text-embedding-3-small" +EMBEDDING_DIM = 1536 +_ARTISAN_CACHE_TTL = 86_400 # 24 h; invalidate manually on profile update +_QUERY_CACHE_TTL = 3_600 # 1 h for query embeddings + +_openai_client: Optional[openai.AsyncOpenAI] = None + + +def _get_client() -> openai.AsyncOpenAI: + global _openai_client + if _openai_client is None: + if not settings.OPENAI_API_KEY: + raise RuntimeError( + "OPENAI_API_KEY is not set. Add it to your .env file to enable semantic search." + ) + _openai_client = openai.AsyncOpenAI(api_key=settings.OPENAI_API_KEY) + return _openai_client + + +# --------------------------------------------------------------------------- +# Text enrichment +# --------------------------------------------------------------------------- + +def build_artisan_text( + artisan: Artisan, + reputation_score: int = 0, + total_jobs: int = 0, +) -> str: + """ + Construct a single descriptive string that captures everything meaningful + about an artisan for embedding purposes. + + Args: + artisan: SQLAlchemy Artisan model instance. + reputation_score: On-chain score scaled by 100 (e.g. 9250 = 92.5%). + Pass 0 when unavailable; the field is omitted from text. + total_jobs: Total on-chain job count. Pass 0 when unavailable. + """ + parts: list[str] = [] + + if artisan.business_name: + parts.append(f"Business: {artisan.business_name}") + + if artisan.description: + parts.append(f"Description: {artisan.description}") + + if artisan.specialties: + try: + specialties = json.loads(artisan.specialties) + if isinstance(specialties, list) and specialties: + parts.append(f"Specialties: {', '.join(str(s) for s in specialties)}") + except (json.JSONDecodeError, TypeError): + parts.append(f"Specialties: {artisan.specialties}") + + if artisan.location: + parts.append(f"Location: {artisan.location}") + + if artisan.experience_years: + parts.append(f"Experience: {artisan.experience_years} years") + + if artisan.rating is not None: + parts.append( + f"Rating: {float(artisan.rating):.1f}/5 from {artisan.total_reviews} reviews" + ) + + # On-chain reputation stats (from Soroban contract via get_reputation_stats). + # Integrate by passing artisan.user.stellar_address once that field is added. + if total_jobs > 0: + success_rate = reputation_score / 100.0 + parts.append( + f"On-chain reputation: {success_rate:.1f}% success rate over {total_jobs} jobs" + ) + + return ". ".join(parts) + + +# --------------------------------------------------------------------------- +# Embedding generation +# --------------------------------------------------------------------------- + +async def _call_openai(text: str) -> list[float]: + """Raw call to the OpenAI embeddings API (no caching).""" + client = _get_client() + response = await client.embeddings.create( + model=EMBEDDING_MODEL, + input=text.replace("\n", " "), + ) + return response.data[0].embedding + + +async def get_query_embedding(query: str) -> list[float]: + """ + Return the embedding for a search query string, using Redis as a short-lived + cache so repeated identical queries don't burn API quota. + """ + cache_key = f"embedding:query:{hash(query) & 0xFFFF_FFFF}" + + if cache.redis: + cached = await cache.get(cache_key) + if cached is not None: + return cached + + embedding = await _call_openai(query) + + if cache.redis: + await cache.set(cache_key, embedding, expire=_QUERY_CACHE_TTL) + + return embedding + + +async def generate_artisan_embedding(artisan: Artisan) -> list[float]: + """ + Return (and cache) the embedding for a full artisan profile. + + On-chain stats are not fetched here because get_reputation_stats in + soroban.py requires a source Keypair and currently returns a stub (0, 0). + Pass reputation_score/total_jobs explicitly if you have them available. + """ + cache_key = f"embedding:artisan:{artisan.id}" + + if cache.redis: + cached = await cache.get(cache_key) + if cached is not None: + return cached + + text = build_artisan_text(artisan) + embedding = await _call_openai(text) + + if cache.redis: + await cache.set(cache_key, embedding, expire=_ARTISAN_CACHE_TTL) + + return embedding + + +async def invalidate_artisan_embedding(artisan_id: int) -> None: + """ + Evict the cached embedding for an artisan. + Call this whenever an artisan's profile is updated so stale vectors + don't pollute future searches. + """ + if cache.redis: + await cache.delete(f"embedding:artisan:{artisan_id}") diff --git a/backend/app/services/scheduling.py b/backend/app/services/scheduling.py new file mode 100644 index 0000000..5879a61 --- /dev/null +++ b/backend/app/services/scheduling.py @@ -0,0 +1,240 @@ +""" +Scheduling service. + +Proposes optimal booking time slots for an artisan by: +1. Syncing calendar events (if connected). +2. Considering confirmed DB bookings. +3. Scoring candidate time windows by travel distance from the artisan's + last known job location (lower travel = higher score). +4. Filtering any slot that overlaps a busy window (double-booking prevention). +""" +from __future__ import annotations + +import logging +import math +from datetime import datetime, timedelta, timezone +from decimal import Decimal + +from sqlalchemy.orm import Session + +from app.models.artisan import Artisan +from app.models.booking import Booking, BookingStatus +from app.models.calendar import CalendarEvent +from app.schemas.scheduling import SlotSuggestion, SlotSuggestionRequest, SlotSuggestionResponse +from app.services.calendar_sync import calendar_sync_service + +logger = logging.getLogger(__name__) + +# Working-day window for candidate slots (inclusive start hour, exclusive end) +DAY_START_HOUR = 8 +DAY_END_HOUR = 20 + +# Assumed average urban travel speed in km/h (used for travel time estimates) +URBAN_AVERAGE_SPEED_KMH = 30.0 + +# Maximum candidate score (used when artisan has no prior location on a given day) +MAX_SCORE = 100.0 + + +class SchedulingService: + """Proposes ranked booking slots for an artisan on a given day.""" + + async def suggest_slots( + self, + request: SlotSuggestionRequest, + artisan: Artisan, + db: Session, + ) -> SlotSuggestionResponse: + """ + Core scheduling algorithm: + + 1. Sync Google/Outlook events (best-effort). + 2. Collect busy windows from calendar + DB bookings. + 3. Enumerate candidate start times in 1-hour increments. + 4. For each candidate, check overlap (double-booking guard). + 5. Score based on travel distance from artisan's prior location. + 6. Return sorted suggestions (best first). + """ + # ── 1. Sync calendar events (best-effort; errors are swallowed) ────── + try: + await calendar_sync_service.sync_artisan_events(artisan.id, db) + except Exception as exc: + logger.warning("Calendar sync failed for artisan %s: %s", artisan.id, exc) + + # ── 2. Gather busy windows ─────────────────────────────────────────── + preferred_date = request.preferred_date.replace(tzinfo=timezone.utc) + busy_windows = self._get_all_busy_windows(artisan.id, preferred_date, db) + + # ── 3. Candidate slots ─────────────────────────────────────────────── + duration = timedelta(hours=request.service_duration_hours) + candidates = self._build_candidate_slots(preferred_date, duration) + + # ── 4 & 5. Filter overlaps and score ──────────────────────────────── + suggestions: list[SlotSuggestion] = [] + for start, end in candidates: + if self._overlaps_any(start, end, busy_windows): + continue # double-booking prevention + + prior_event = self._find_prior_event(artisan.id, start, db) + travel_km, travel_minutes, prior_loc_desc = self._compute_travel( + artisan, prior_event, request.client_lat, request.client_lon + ) + score = self._score(travel_km) + + suggestions.append( + SlotSuggestion( + start_time=start, + end_time=end, + travel_time_minutes=round(travel_minutes, 1), + travel_km=round(travel_km, 2), + score=round(score, 2), + prior_job_location=prior_loc_desc, + ) + ) + + # Sort best (highest score) first + suggestions.sort(key=lambda s: s.score, reverse=True) + + return SlotSuggestionResponse( + artisan_id=artisan.id, + preferred_date=preferred_date, + suggestions=suggestions, + ) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _get_all_busy_windows( + self, artisan_id: int, date: datetime, db: Session + ) -> list[tuple[datetime, datetime]]: + """Collect busy windows from both calendar events and DB bookings.""" + windows: list[tuple[datetime, datetime]] = [] + + # ── Calendar events ────────────────────────────────────────────────── + windows.extend(calendar_sync_service.get_busy_slots(artisan_id, date, db)) + + # ── DB bookings (confirmed or in-progress) ─────────────────────────── + day_start = date.replace(hour=0, minute=0, second=0, microsecond=0) + day_end = day_start + timedelta(days=1) + + db_bookings = ( + db.query(Booking) + .filter( + Booking.artisan_id == artisan_id, + Booking.status.in_([BookingStatus.CONFIRMED, BookingStatus.IN_PROGRESS]), + Booking.date >= day_start, + Booking.date < day_end, + ) + .all() + ) + for b in db_bookings: + if b.date: + start = b.date.replace(tzinfo=timezone.utc) if b.date.tzinfo is None else b.date + hours = float(b.estimated_hours or 1.0) + end = start + timedelta(hours=hours) + windows.append((start, end)) + + return windows + + def _build_candidate_slots( + self, date: datetime, duration: timedelta + ) -> list[tuple[datetime, datetime]]: + """Enumerate start times from DAY_START_HOUR to DAY_END_HOUR in 1-hour steps.""" + slots = [] + day_base = date.replace(hour=0, minute=0, second=0, microsecond=0) + for hour in range(DAY_START_HOUR, DAY_END_HOUR): + start = day_base.replace(hour=hour) + end = start + duration + if end.hour > DAY_END_HOUR or (end.hour == DAY_END_HOUR and end.minute > 0): + break + slots.append((start, end)) + return slots + + @staticmethod + def _overlaps_any( + start: datetime, end: datetime, windows: list[tuple[datetime, datetime]] + ) -> bool: + """Return True if [start, end) overlaps any window in *windows*.""" + for w_start, w_end in windows: + if start < w_end and end > w_start: + return True + return False + + def _find_prior_event( + self, artisan_id: int, slot_start: datetime, db: Session + ) -> CalendarEvent | None: + """Return the most recent calendar event ending before *slot_start*.""" + return ( + db.query(CalendarEvent) + .filter( + CalendarEvent.artisan_id == artisan_id, + CalendarEvent.end_time <= slot_start, + ) + .order_by(CalendarEvent.end_time.desc()) + .first() + ) + + def _compute_travel( + self, + artisan: Artisan, + prior_event: CalendarEvent | None, + client_lat: float, + client_lon: float, + ) -> tuple[float, float, str | None]: + """ + Return (distance_km, travel_minutes, description). + + Priority: + - prior_event with geocoded lat/lon + - artisan's own home location (latitude/longitude on artisan profile) + - 0 km if no location data + """ + from_lat: float | None = None + from_lon: float | None = None + prior_loc_desc: str | None = None + + if prior_event and prior_event.latitude and prior_event.longitude: + try: + from_lat = float(prior_event.latitude) + from_lon = float(prior_event.longitude) + prior_loc_desc = prior_event.location or "Previous job" + except (TypeError, ValueError): + pass + + if from_lat is None and artisan.latitude and artisan.longitude: + from_lat = float(artisan.latitude) + from_lon = float(artisan.longitude) + prior_loc_desc = artisan.location or "Artisan base" + + if from_lat is None or from_lon is None: + return 0.0, 0.0, None + + distance_km = self._haversine(from_lat, from_lon, client_lat, client_lon) + travel_minutes = (distance_km / URBAN_AVERAGE_SPEED_KMH) * 60.0 + return distance_km, travel_minutes, prior_loc_desc + + @staticmethod + def _haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """Haversine great-circle distance in km.""" + R = 6371.0 + phi1, phi2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlambda = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2) ** 2 + return R * 2 * math.asin(math.sqrt(a)) + + @staticmethod + def _score(distance_km: float) -> float: + """ + Score a slot inversely proportional to travel distance. + Score = 100 / (1 + distance_km). So 0 km → 100, 99 km → ~1. + When distance is 0 (no prior location) the score is MAX_SCORE. + """ + if distance_km == 0.0: + return MAX_SCORE + return MAX_SCORE / (1.0 + distance_km) + + +# Module-level singleton +scheduling_service = SchedulingService() diff --git a/backend/app/tests/test_calendar_oauth.py b/backend/app/tests/test_calendar_oauth.py new file mode 100644 index 0000000..1c87c67 --- /dev/null +++ b/backend/app/tests/test_calendar_oauth.py @@ -0,0 +1,205 @@ +""" +Tests for CalendarOAuthService – Google and Microsoft OAuth flows. + +All external OAuth library calls are mocked so no network access is required. +""" +from __future__ import annotations + +import os +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest + +os.environ.setdefault("SECRET_KEY", "test-secret-key") +os.environ.setdefault("DATABASE_URL", "sqlite:///./test.db") +os.environ.setdefault("REQUIRE_EMAIL_VERIFICATION", "False") + +from app.models.calendar import ArtisanCalendarToken # noqa: E402 – env must be set first +from app.services.calendar_oauth import CalendarOAuthService # noqa: E402 + + +@pytest.fixture +def oauth_service(): + return CalendarOAuthService() + + +@pytest.fixture +def db_session(): + """In-memory SQLite session with calendar tables.""" + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + from app.db.base import Base + from app.models import calendar as _cal_models # noqa: F401 – register models + + engine = create_engine("sqlite:///:memory:", connect_args={"check_same_thread": False}) + Base.metadata.create_all(bind=engine) + Session = sessionmaker(bind=engine) + session = Session() + try: + yield session + finally: + session.close() + Base.metadata.drop_all(bind=engine) + + +# ── Google tests ───────────────────────────────────────────────────────────── + +class TestGetGoogleAuthUrl: + def test_returns_url_when_credentials_configured(self, oauth_service): + """get_google_auth_url() returns a non-empty string when credentials are set.""" + mock_flow = MagicMock() + mock_flow.authorization_url.return_value = ("https://accounts.google.com/o/oauth2/auth?foo=bar", "state123") + + with ( + patch("app.services.calendar_oauth.settings") as mock_settings, + patch("app.services.calendar_oauth.Flow", mock_flow, create=True), + ): + # Patch Flow inside the method's lazy import namespace + mock_settings.GOOGLE_CLIENT_ID = "test-client-id" + mock_settings.GOOGLE_CLIENT_SECRET = "test-secret" + mock_settings.OAUTH_REDIRECT_URI = "http://localhost:8000/api/v1/calendar/callback" + + with patch("google_auth_oauthlib.flow.Flow.from_client_config") as mock_from_config: + mock_flow_instance = MagicMock() + mock_flow_instance.authorization_url.return_value = ( + "https://accounts.google.com/o/oauth2/auth?foo=bar", + "state123", + ) + mock_from_config.return_value = mock_flow_instance + + url = oauth_service.get_google_auth_url(artisan_id=42) + + assert url.startswith("https://") + + def test_raises_when_credentials_missing(self, oauth_service): + with patch("app.services.calendar_oauth.settings") as mock_settings: + mock_settings.GOOGLE_CLIENT_ID = None + mock_settings.GOOGLE_CLIENT_SECRET = None + with pytest.raises(ValueError, match="Google OAuth credentials not configured"): + oauth_service.get_google_auth_url(artisan_id=1) + + +class TestExchangeGoogleCode: + def test_stores_token_in_db(self, oauth_service, db_session): + """exchange_google_code() creates an ArtisanCalendarToken row.""" + mock_creds = MagicMock() + mock_creds.token = "access-token-abc" + mock_creds.refresh_token = "refresh-token-xyz" + mock_creds.expiry = datetime(2026, 4, 1, tzinfo=timezone.utc) + mock_creds.scopes = ["https://www.googleapis.com/auth/calendar.readonly"] + + mock_flow_instance = MagicMock() + mock_flow_instance.credentials = mock_creds + + with patch("google_auth_oauthlib.flow.Flow.from_client_config", return_value=mock_flow_instance): + with patch("app.services.calendar_oauth.settings") as mock_settings: + mock_settings.GOOGLE_CLIENT_ID = "cid" + mock_settings.GOOGLE_CLIENT_SECRET = "csecret" + mock_settings.OAUTH_REDIRECT_URI = "http://localhost:8000/api/v1/calendar/callback" + + token_row = oauth_service.exchange_google_code("auth-code", artisan_id=5, db=db_session) + + assert token_row is not None + assert token_row.provider == "google" + assert token_row.artisan_id == 5 + assert token_row.access_token == "access-token-abc" + + # Also verify DB row persisted + from_db = db_session.query(ArtisanCalendarToken).filter_by(artisan_id=5).first() + assert from_db is not None + assert from_db.refresh_token == "refresh-token-xyz" + + +# ── Microsoft tests ─────────────────────────────────────────────────────────── + +class TestGetMicrosoftAuthUrl: + def test_returns_url_when_credentials_configured(self, oauth_service): + mock_msal_app = MagicMock() + mock_msal_app.get_authorization_request_url.return_value = ( + "https://login.microsoftonline.com/common/oauth2/v2.0/authorize?client_id=x" + ) + + with ( + patch("msal.ConfidentialClientApplication", return_value=mock_msal_app), + patch("app.services.calendar_oauth.settings") as mock_settings, + ): + mock_settings.MICROSOFT_CLIENT_ID = "ms-client-id" + mock_settings.MICROSOFT_CLIENT_SECRET = "ms-secret" + mock_settings.OAUTH_REDIRECT_URI = "http://localhost:8000/api/v1/calendar/callback" + + url = oauth_service.get_microsoft_auth_url(artisan_id=7) + + assert "microsoftonline.com" in url or url.startswith("https://") + + def test_raises_when_credentials_missing(self, oauth_service): + with patch("app.services.calendar_oauth.settings") as mock_settings: + mock_settings.MICROSOFT_CLIENT_ID = None + mock_settings.MICROSOFT_CLIENT_SECRET = None + with pytest.raises(ValueError, match="Microsoft OAuth credentials not configured"): + oauth_service.get_microsoft_auth_url(artisan_id=1) + + +# ── Revoke / disconnect tests ───────────────────────────────────────────────── + +class TestRevokeToken: + def test_deletes_token_row(self, oauth_service, db_session): + """revoke_token() removes the token row and returns True.""" + row = ArtisanCalendarToken( + artisan_id=10, + provider="google", + access_token="tok", + refresh_token="ref", + ) + db_session.add(row) + db_session.commit() + + result = oauth_service.revoke_token(artisan_id=10, provider="google", db=db_session) + assert result is True + + remaining = db_session.query(ArtisanCalendarToken).filter_by(artisan_id=10).first() + assert remaining is None + + def test_returns_false_when_not_found(self, oauth_service, db_session): + result = oauth_service.revoke_token(artisan_id=999, provider="google", db=db_session) + assert result is False + + +# ── Token refresh tests ─────────────────────────────────────────────────────── + +class TestGetValidAccessToken: + def test_returns_none_when_not_connected(self, oauth_service, db_session): + token = oauth_service.get_valid_access_token(artisan_id=99, provider="google", db=db_session) + assert token is None + + def test_returns_token_when_not_expired(self, oauth_service, db_session): + row = ArtisanCalendarToken( + artisan_id=20, + provider="google", + access_token="valid-token", + refresh_token="ref", + token_expiry=datetime(2099, 1, 1, tzinfo=timezone.utc), + ) + db_session.add(row) + db_session.commit() + + token = oauth_service.get_valid_access_token(artisan_id=20, provider="google", db=db_session) + assert token == "valid-token" + + def test_refreshes_expired_google_token(self, oauth_service, db_session): + row = ArtisanCalendarToken( + artisan_id=30, + provider="google", + access_token="old-token", + refresh_token="ref-token", + token_expiry=datetime(2000, 1, 1, tzinfo=timezone.utc), # expired + ) + db_session.add(row) + db_session.commit() + + with patch.object(oauth_service, "refresh_google_token", return_value="new-token") as mock_refresh: + token = oauth_service.get_valid_access_token(artisan_id=30, provider="google", db=db_session) + + mock_refresh.assert_called_once() + assert token == "new-token" diff --git a/backend/app/tests/test_calendar_sync.py b/backend/app/tests/test_calendar_sync.py new file mode 100644 index 0000000..730f2dc --- /dev/null +++ b/backend/app/tests/test_calendar_sync.py @@ -0,0 +1,185 @@ +""" +Tests for CalendarSyncService. + +All external API calls (Google, Microsoft Graph) are mocked. +Geolocation is also mocked to avoid network calls. +""" +from __future__ import annotations + +import os +import uuid +from datetime import datetime, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +os.environ.setdefault("SECRET_KEY", "test-secret-key") +os.environ.setdefault("DATABASE_URL", "sqlite:///./test.db") +os.environ.setdefault("REQUIRE_EMAIL_VERIFICATION", "False") + +from app.models.calendar import ArtisanCalendarToken, CalendarEvent # noqa: E402 +from app.services.calendar_sync import CalendarSyncService # noqa: E402 + + +@pytest.fixture +def sync_service(): + return CalendarSyncService() + + +@pytest.fixture +def db_session(): + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + from app.db.base import Base + from app.models import calendar as _cal_models # noqa: F401 + + engine = create_engine("sqlite:///:memory:", connect_args={"check_same_thread": False}) + Base.metadata.create_all(bind=engine) + Session = sessionmaker(bind=engine) + session = Session() + try: + yield session + finally: + session.close() + Base.metadata.drop_all(bind=engine) + + +# ── _parse_dt tests ─────────────────────────────────────────────────────────── + +class TestParseDt: + def test_parses_full_iso_string(self, sync_service): + dt = sync_service._parse_dt("2026-03-28T10:00:00+00:00") + assert dt.year == 2026 + assert dt.tzinfo is not None + + def test_parses_date_only_string(self, sync_service): + dt = sync_service._parse_dt("2026-03-28") + assert dt.year == 2026 + assert dt.month == 3 + assert dt.day == 28 + + def test_parses_Z_suffix(self, sync_service): + dt = sync_service._parse_dt("2026-03-28T12:00:00Z") + assert dt.tzinfo is not None + + +# ── get_busy_slots tests ────────────────────────────────────────────────────── + +class TestGetBusySlots: + def test_returns_busy_window_for_day(self, sync_service, db_session): + event = CalendarEvent( + id=uuid.uuid4(), + artisan_id=1, + provider="google", + external_id="evt-1", + title="Meeting", + start_time=datetime(2026, 5, 1, 9, 0, tzinfo=timezone.utc), + end_time=datetime(2026, 5, 1, 10, 0, tzinfo=timezone.utc), + is_busy=True, + ) + db_session.add(event) + db_session.commit() + + slots = sync_service.get_busy_slots( + artisan_id=1, + date=datetime(2026, 5, 1, tzinfo=timezone.utc), + db=db_session, + ) + assert len(slots) == 1 + start, end = slots[0] + assert start.hour == 9 + assert end.hour == 10 + + def test_excludes_free_events(self, sync_service, db_session): + event = CalendarEvent( + id=uuid.uuid4(), + artisan_id=2, + provider="google", + external_id="evt-free", + title="Free block", + start_time=datetime(2026, 5, 1, 9, 0, tzinfo=timezone.utc), + end_time=datetime(2026, 5, 1, 10, 0, tzinfo=timezone.utc), + is_busy=False, + ) + db_session.add(event) + db_session.commit() + + slots = sync_service.get_busy_slots( + artisan_id=2, + date=datetime(2026, 5, 1, tzinfo=timezone.utc), + db=db_session, + ) + assert len(slots) == 0 + + +# ── sync_artisan_events / upsert tests ──────────────────────────────────────── + +class TestSyncArtisanEvents: + @pytest.mark.asyncio + async def test_sync_inserts_new_events(self, sync_service, db_session): + """When no events exist, sync inserts new CalendarEvent rows.""" + mock_events = [ + { + "external_id": "google-evt-001", + "title": "Client A", + "start_time": datetime(2026, 5, 1, 9, 0, tzinfo=timezone.utc), + "end_time": datetime(2026, 5, 1, 11, 0, tzinfo=timezone.utc), + "location": None, + "is_busy": True, + } + ] + + with ( + patch("app.services.calendar_sync.calendar_oauth_service.get_connected_providers", return_value=["google"]), + patch("app.services.calendar_sync.calendar_oauth_service.get_valid_access_token", return_value="tok"), + patch.object(sync_service, "_fetch_google_events", new=AsyncMock(return_value=mock_events)), + patch("app.services.calendar_sync.geolocation_service.geocode_address", new=AsyncMock(return_value=None)), + ): + count = await sync_service.sync_artisan_events(artisan_id=1, db=db_session) + + assert count == 1 + stored = db_session.query(CalendarEvent).filter_by(artisan_id=1).first() + assert stored is not None + assert stored.external_id == "google-evt-001" + + @pytest.mark.asyncio + async def test_sync_upserts_existing_events(self, sync_service, db_session): + """Re-syncing the same external_id updates the row, not inserts a duplicate.""" + existing = CalendarEvent( + id=uuid.uuid4(), + artisan_id=3, + provider="google", + external_id="evt-dup", + title="Old Title", + start_time=datetime(2026, 5, 1, 9, 0, tzinfo=timezone.utc), + end_time=datetime(2026, 5, 1, 10, 0, tzinfo=timezone.utc), + is_busy=True, + ) + db_session.add(existing) + db_session.commit() + + updated_events = [ + { + "external_id": "evt-dup", + "title": "Updated Title", + "start_time": datetime(2026, 5, 1, 9, 0, tzinfo=timezone.utc), + "end_time": datetime(2026, 5, 1, 11, 0, tzinfo=timezone.utc), # end changed + "location": None, + "is_busy": True, + } + ] + + with ( + patch("app.services.calendar_sync.calendar_oauth_service.get_connected_providers", return_value=["google"]), + patch("app.services.calendar_sync.calendar_oauth_service.get_valid_access_token", return_value="tok"), + patch.object(sync_service, "_fetch_google_events", new=AsyncMock(return_value=updated_events)), + patch("app.services.calendar_sync.geolocation_service.geocode_address", new=AsyncMock(return_value=None)), + ): + await sync_service.sync_artisan_events(artisan_id=3, db=db_session) + + rows = db_session.query(CalendarEvent).filter_by(artisan_id=3).all() + assert len(rows) == 1 # still exactly one row + db_session.refresh(rows[0]) + assert rows[0].title == "Updated Title" + assert rows[0].end_time.hour == 11 diff --git a/backend/app/tests/test_scheduling.py b/backend/app/tests/test_scheduling.py new file mode 100644 index 0000000..7b13148 --- /dev/null +++ b/backend/app/tests/test_scheduling.py @@ -0,0 +1,277 @@ +""" +Tests for SchedulingService. + +Validates: +- Double-booking prevention (AC #3) +- Geographic grouping scoring (AC #2) +- Graceful operation without calendar connection +- Correct slot enumeration on empty day +""" +from __future__ import annotations + +import os +import uuid +from datetime import datetime, timedelta, timezone +from decimal import Decimal +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +os.environ.setdefault("SECRET_KEY", "test-secret-key") +os.environ.setdefault("DATABASE_URL", "sqlite:///./test.db") +os.environ.setdefault("REQUIRE_EMAIL_VERIFICATION", "False") + +from app.models.artisan import Artisan # noqa: E402 +from app.models.booking import Booking, BookingStatus # noqa: E402 +from app.models.calendar import CalendarEvent # noqa: E402 +from app.schemas.scheduling import SlotSuggestionRequest # noqa: E402 +from app.services.scheduling import SchedulingService # noqa: E402 + + +def _make_artisan(artisan_id: int, lat=None, lon=None, location=None): + """Return a plain-object artisan stub (avoids SQLAlchemy instrumentation).""" + from types import SimpleNamespace + from decimal import Decimal + return SimpleNamespace( + id=artisan_id, + latitude=Decimal(str(lat)) if lat is not None else None, + longitude=Decimal(str(lon)) if lon is not None else None, + location=location, + ) + + +@pytest.fixture +def scheduler(): + return SchedulingService() + + +@pytest.fixture +def db_session(): + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + from app.db.base import Base + from app.models import calendar as _cal_models # noqa: F401 + + engine = create_engine("sqlite:///:memory:", connect_args={"check_same_thread": False}) + Base.metadata.create_all(bind=engine) + Session = sessionmaker(bind=engine) + session = Session() + try: + yield session + finally: + session.close() + Base.metadata.drop_all(bind=engine) + + +@pytest.fixture +def artisan_no_location(): + """Minimal artisan with no lat/lon.""" + return _make_artisan(artisan_id=1) + + +@pytest.fixture +def artisan_with_location(): + """Artisan based near London (51.5, -0.12).""" + return _make_artisan(artisan_id=2, lat=51.5074, lon=-0.1278, location="London, UK") + + +def _make_request(artisan_id: int, date: datetime, duration: float = 2.0) -> SlotSuggestionRequest: + return SlotSuggestionRequest( + artisan_id=artisan_id, + client_lat=51.52, + client_lon=-0.09, + service_duration_hours=duration, + preferred_date=date, + ) + + +DATE = datetime(2026, 5, 15, tzinfo=timezone.utc) + + +class TestDoubleBookingPrevention: + """AC #3: Artisan double-bookings are successfully avoided automatically.""" + + @pytest.mark.asyncio + async def test_calendar_busy_slot_is_excluded(self, scheduler, artisan_no_location, db_session): + """A slot that overlaps a calendar event must NOT appear in suggestions.""" + # Insert a busy calendar event from 09:00 – 11:00 + event = CalendarEvent( + id=uuid.uuid4(), + artisan_id=1, + provider="google", + external_id="busy-evt", + title="Existing job", + start_time=DATE.replace(hour=9), + end_time=DATE.replace(hour=11), + is_busy=True, + ) + db_session.add(event) + db_session.commit() + + request = _make_request(artisan_id=1, date=DATE, duration=2.0) + + with patch( + "app.services.scheduling.calendar_sync_service.sync_artisan_events", + new=AsyncMock(), + ): + result = await scheduler.suggest_slots(request, artisan_no_location, db_session) + + overlapping_starts = [ + s for s in result.suggestions + if s.start_time.replace(tzinfo=timezone.utc) < DATE.replace(hour=11) + and s.end_time.replace(tzinfo=timezone.utc) > DATE.replace(hour=9) + ] + assert len(overlapping_starts) == 0, ( + f"Expected no overlap with 09:00-11:00 busy block, but got: {overlapping_starts}" + ) + + @pytest.mark.asyncio + async def test_db_booking_busy_slot_is_excluded(self, scheduler, artisan_no_location, db_session): + """A slot that overlaps a confirmed DB booking must NOT appear in suggestions.""" + # Insert a minimal booking row using the ORM properly + booking = Booking( + client_id=99, + artisan_id=1, + status=BookingStatus.CONFIRMED, + date=DATE.replace(hour=10), + estimated_hours=2.0, + service="Test", + estimated_cost=100.00, + ) + db_session.add(booking) + db_session.commit() + + request = _make_request(artisan_id=1, date=DATE, duration=2.0) + + with patch( + "app.services.scheduling.calendar_sync_service.sync_artisan_events", + new=AsyncMock(), + ): + result = await scheduler.suggest_slots(request, artisan_no_location, db_session) + + # The 10:00-12:00 slot should be excluded + ten_o_clock_slots = [ + s for s in result.suggestions + if s.start_time.hour == 10 + ] + assert len(ten_o_clock_slots) == 0 + + +class TestGeographicGrouping: + """AC #2: Matching engine prioritises slots that group geographic regions together.""" + + @pytest.mark.asyncio + async def test_nearby_prior_job_scores_higher(self, scheduler, db_session): + """ + Insert two calendar events ending before the target slots. + The slot after the event near the client should score higher than the + slot after the event far away from the client. + """ + artisan = _make_artisan(artisan_id=5) + + # Near event – is_busy=False so it marks the artisan's LOCATION but doesn't block the 08:00 slot + near_event = CalendarEvent( + id=uuid.uuid4(), + artisan_id=5, + provider="google", + external_id="near-evt", + title="Near job", + start_time=DATE.replace(hour=6), + end_time=DATE.replace(hour=8), + is_busy=False, # location-only, does NOT block the 08:00 slot + latitude="51.521", # very close to client_lat=51.52 + longitude="-0.090", + ) + # Far event – also is_busy=False, ends at 09:00, location ~343 km from client (London → Paris) + far_event = CalendarEvent( + id=uuid.uuid4(), + artisan_id=5, + provider="google", + external_id="far-evt", + title="Far job", + start_time=DATE.replace(hour=7), + end_time=DATE.replace(hour=9), + is_busy=False, # location-only, does NOT block the 09:00 slot + latitude="48.8566", # Paris + longitude="2.3522", + ) + db_session.add(near_event) + db_session.add(far_event) + db_session.commit() + + request = _make_request(artisan_id=5, date=DATE, duration=1.0) + + with patch( + "app.services.scheduling.calendar_sync_service.sync_artisan_events", + new=AsyncMock(), + ): + result = await scheduler.suggest_slots(request, artisan, db_session) + + # Find the slot at 8:00 (right after near_event ends) + # and at 9:00 (right after far_event ends) – both should be in suggestions + slots_by_hour = {s.start_time.hour: s for s in result.suggestions} + + assert 8 in slots_by_hour, "Expected a slot at 08:00" + assert 9 in slots_by_hour, "Expected a slot at 09:00" + + # The 08:00 slot should score higher (travel_km << far slot) + assert slots_by_hour[8].score > slots_by_hour[9].score, ( + f"Near slot (8:00, km={slots_by_hour[8].travel_km}) should score higher " + f"than far slot (9:00, km={slots_by_hour[9].travel_km})" + ) + + +class TestNoCalendarConnected: + """Scheduling works even when no calendar is connected.""" + + @pytest.mark.asyncio + async def test_returns_suggestions_without_calendar(self, scheduler, artisan_no_location, db_session): + """When no calendar connection exists, slots are still returned from the DB booking check.""" + request = _make_request(artisan_id=1, date=DATE, duration=1.0) + + with patch( + "app.services.scheduling.calendar_sync_service.sync_artisan_events", + new=AsyncMock(), + ): + result = await scheduler.suggest_slots(request, artisan_no_location, db_session) + + # With no busy events or bookings, all candidate hours (08-19 for 1h slot) should appear + assert len(result.suggestions) > 0 + assert result.artisan_id == 1 + + +class TestEmptyDay: + """On a completely free day, all candidate windows are proposed.""" + + @pytest.mark.asyncio + async def test_full_day_free_returns_all_candidate_slots(self, scheduler, artisan_no_location, db_session): + request = _make_request(artisan_id=1, date=DATE, duration=1.0) + + with patch( + "app.services.scheduling.calendar_sync_service.sync_artisan_events", + new=AsyncMock(), + ): + result = await scheduler.suggest_slots(request, artisan_no_location, db_session) + + # 08:00 – 19:00 with 1h slots = 12 candidate windows + assert len(result.suggestions) == 12 + + +class TestHaversine: + def test_same_point_zero_distance(self, scheduler): + assert scheduler._haversine(51.5, -0.12, 51.5, -0.12) == 0.0 + + def test_london_to_paris_approximately_340km(self, scheduler): + dist = scheduler._haversine(51.5074, -0.1278, 48.8566, 2.3522) + assert 330 < dist < 360, f"Unexpected distance: {dist}" + + +class TestScoring: + def test_zero_distance_gives_max_score(self, scheduler): + assert scheduler._score(0.0) == 100.0 + + def test_score_decreases_with_distance(self, scheduler): + score_near = scheduler._score(1.0) + score_far = scheduler._score(50.0) + assert score_near > score_far diff --git a/backend/app/tests/test_semantic_search_feature.py b/backend/app/tests/test_semantic_search_feature.py new file mode 100644 index 0000000..dc78fde --- /dev/null +++ b/backend/app/tests/test_semantic_search_feature.py @@ -0,0 +1,166 @@ +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest +from fastapi import HTTPException + +from app.api.v1.endpoints import search as search_endpoint +from app.core.cache import cache +from app.core.config import settings +from app.services import embedding as embedding_service + + +class _DummyResult: + def __init__(self, rows): + self._rows = rows + + def all(self): + return self._rows + + +class _DummyDB: + def __init__(self, rows): + self.rows = rows + + def execute(self, stmt): + return _DummyResult(self.rows) + + +def _artisan(artisan_id: int, rating: float | None = None): + return SimpleNamespace( + id=artisan_id, + business_name=f"Artisan {artisan_id}", + description="Historic restoration specialist", + specialties='["historic restoration"]', + location="Old Town", + rating=rating, + total_reviews=12, + is_available=True, + is_verified=True, + ) + + +@pytest.mark.asyncio +async def test_semantic_search_requires_openai_key(monkeypatch): + monkeypatch.setattr(settings, "OPENAI_API_KEY", None) + monkeypatch.setattr(cache, "redis", None) + + with pytest.raises(HTTPException) as exc: + await search_endpoint.semantic_search( + q="historic restoration", + db=_DummyDB([]), + ) + + assert exc.value.status_code == 503 + assert "OPENAI_API_KEY" in exc.value.detail + + +@pytest.mark.asyncio +async def test_semantic_search_returns_cached_payload(monkeypatch): + cached = { + "query": "historic restoration", + "results": [ + { + "id": 7, + "business_name": "Cached Artisan", + "description": "already cached", + "specialties": "[]", + "location": "City", + "rating": 4.5, + "total_reviews": 3, + "is_available": True, + "is_verified": True, + "semantic_similarity": 0.9, + "reputation_weight": 0.8, + "hybrid_score": 0.87, + } + ], + "total": 1, + } + + monkeypatch.setattr(settings, "OPENAI_API_KEY", "test-key") + monkeypatch.setattr(cache, "redis", object()) + monkeypatch.setattr(cache, "get", AsyncMock(return_value=cached)) + monkeypatch.setattr(cache, "set", AsyncMock()) + + embed_mock = AsyncMock() + monkeypatch.setattr(search_endpoint, "get_query_embedding", embed_mock) + + response = await search_endpoint.semantic_search( + q="historic restoration", + db=_DummyDB([]), + ) + + assert response.total == 1 + assert response.results[0].id == 7 + embed_mock.assert_not_called() + + +@pytest.mark.asyncio +async def test_semantic_search_hybrid_reranking(monkeypatch): + # Lower semantic similarity but stronger reputation should win at low semantic weight. + rows = [ + (_artisan(1, rating=1.0), 0.05), # sim=0.95, rep=0.2 + (_artisan(2, rating=4.5), 0.20), # sim=0.80, rep=0.9 + ] + + monkeypatch.setattr(settings, "OPENAI_API_KEY", "test-key") + monkeypatch.setattr(cache, "redis", object()) + monkeypatch.setattr(cache, "get", AsyncMock(return_value=None)) + cache_set = AsyncMock() + monkeypatch.setattr(cache, "set", cache_set) + + monkeypatch.setattr( + search_endpoint, + "get_query_embedding", + AsyncMock(return_value=[0.1] * 1536), + ) + + response = await search_endpoint.semantic_search( + q="historic restoration", + limit=2, + semantic_weight=0.3, + db=_DummyDB(rows), + ) + + assert response.total == 2 + assert response.results[0].id == 2 + assert response.results[1].id == 1 + assert response.results[0].hybrid_score > response.results[1].hybrid_score + cache_set.assert_called_once() + + +def test_build_artisan_text_includes_reputation_stats(): + artisan = SimpleNamespace( + business_name="Stone & Lime Works", + description="Conservation masonry", + specialties='["historic restoration", "stone masonry"]', + location="Lagos Island", + experience_years=11, + rating=4.8, + total_reviews=31, + ) + + text = embedding_service.build_artisan_text( + artisan=artisan, + reputation_score=9250, + total_jobs=20, + ) + + assert "Specialties: historic restoration, stone masonry" in text + assert "On-chain reputation: 92.5% success rate over 20 jobs" in text + + +def test_build_artisan_text_falls_back_for_raw_specialties(): + artisan = SimpleNamespace( + business_name="Woodline", + description="Custom carpentry", + specialties="woodwork", + location="Abuja", + experience_years=6, + rating=4.2, + total_reviews=14, + ) + + text = embedding_service.build_artisan_text(artisan=artisan) + assert "Specialties: woodwork" in text diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 99b3696..693a982 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,6 +1,6 @@ services: db: - image: postgres:15-alpine + image: pgvector/pgvector:pg15 environment: POSTGRES_USER: stellarts POSTGRES_PASSWORD: stellarts_dev @@ -8,7 +8,7 @@ services: volumes: - postgres_data:/var/lib/postgresql/data ports: - - "5432:5432" + - "5433:5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U stellarts"] interval: 30s diff --git a/backend/env.example b/backend/env.example index 87b046a..a4b7c51 100644 --- a/backend/env.example +++ b/backend/env.example @@ -27,3 +27,7 @@ FRONTEND_URL=http://localhost:3000 # External APIs (for future use) STRIPE_SECRET_KEY= STRIPE_PUBLISHABLE_KEY= + +# Semantic search +OPENAI_API_KEY= +SEMANTIC_CACHE_TTL=300 diff --git a/backend/pytest.ini b/backend/pytest.ini index 52a3ffb..2ebd8f1 100644 --- a/backend/pytest.ini +++ b/backend/pytest.ini @@ -11,3 +11,6 @@ addopts = markers = slow: marks tests as slow (deselect with '-m "not slow"') integration: marks tests as integration tests +filterwarnings = + ignore:Using extra keyword arguments on `Field` is deprecated.*Extra keys: 'deprecated'.*:pydantic.warnings.PydanticDeprecatedSince20 + ignore:It looks like you haven't set a TimeBounds for the transaction.*:UserWarning diff --git a/backend/requirements.txt b/backend/requirements.txt index 797734b..10c0be3 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -21,3 +21,8 @@ aiohttp==3.9.1 stellar-sdk==13.1.0 argon2-cffi==23.1.0 fastapi-mail==1.4.1 +openai>=1.35.0 +pgvector>=0.2.0 +google-auth-oauthlib>=1.2.0 +google-api-python-client>=2.130.0 +msal>=1.28.0 diff --git a/frontend/next-env.d.ts b/frontend/next-env.d.ts index 4f11a03..40c3d68 100644 --- a/frontend/next-env.d.ts +++ b/frontend/next-env.d.ts @@ -2,4 +2,4 @@ /// // NOTE: This file should not be edited -// see https://nextjs.org/docs/basic-features/typescript for more information. +// see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information. diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 21ae49b..51323a0 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1774,28 +1774,6 @@ "license": "Apache-2.0", "peer": true }, - "node_modules/@near-js/providers/node_modules/node-fetch": { - "version": "2.6.7", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", - "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, "node_modules/@near-js/signers": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/@near-js/signers/-/signers-0.2.2.tgz",