Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
import json
import logging
import os
import re
from datetime import date, datetime
from typing import Annotated, Any, Dict, List, Optional

import numpy as np
from fastapi import FastAPI, HTTPException, Query, Request, WebSocket, WebSocketDisconnect
from fastapi.responses import JSONResponse, PlainTextResponse
from fastapi import Depends, FastAPI, HTTPException, Query, Request, WebSocket, WebSocketDisconnect
from fastapi.responses import FileResponse, JSONResponse, PlainTextResponse
from fastapi.staticfiles import StaticFiles
from slowapi.errors import RateLimitExceeded

from src.auth.dependencies import require_admin_key

from src.analytics.service import analytics_service
from src.chat import ChatMessage, EscalationEvent, chat_manager
from src.config import get_settings
Expand Down Expand Up @@ -44,7 +47,10 @@
_query_daily_sales,
_query_invalid_scans,
_query_transfer_stats,
create_generated_reports_table,
generate_daily_report_csv,
list_reports,
scan_and_populate_reports,
)
from src.revenue_sharing_models import (
EventRevenueInput,
Expand Down Expand Up @@ -85,6 +91,8 @@
QRValidateResponse,
RecommendRequest,
RecommendResponse,
ReportItem,
ReportsListResponse,
RootResponse,
SearchEventsRequest,
SearchEventsResponse,
Expand Down Expand Up @@ -166,6 +174,13 @@ def on_startup() -> None:
if not settings.SKIP_MODEL_TRAINING:
model_pipeline = train_logistic_regression_pipeline()

# Ensure the generated_reports table exists and backfill from disk.
try:
create_generated_reports_table()
scan_and_populate_reports()
except Exception as exc:
logger.warning("Report metadata init failed (non-fatal): %s", exc)

if settings.ENABLE_ETL_SCHEDULER and BackgroundScheduler is not None:
etl_scheduler = BackgroundScheduler(timezone="UTC")
cron = settings.ETL_CRON
Expand Down Expand Up @@ -532,6 +547,52 @@ def generate_daily_report(payload: DailyReportRequest) -> Any:
return JSONResponse(status_code=500, content={"detail": f"Report generation failed: {exc}"})


@app.get("/reports", response_model=ReportsListResponse)
def get_reports_list(
_: str = Depends(require_admin_key),
) -> ReportsListResponse:
"""List up to 100 most recently generated reports (ADMIN)."""
log_info("Reports list requested")
try:
rows = list_reports()
items = [ReportItem(**row) for row in rows]
return ReportsListResponse(reports=items)
except Exception as exc:
log_error("Failed to list reports", {"error": str(exc)})
raise HTTPException(status_code=500, detail=f"Failed to list reports: {exc}")


# Safe filename pattern — must match what generate_daily_report_csv produces
_SAFE_REPORT_FILENAME = re.compile(r"^daily_report_\d{4}-\d{2}-\d{2}_\d{8}_\d{6}\.(csv|json)$")


@app.get("/reports/download/{filename}")
def download_report(
filename: str,
_: str = Depends(require_admin_key),
) -> FileResponse:
"""Stream a previously generated report file (ADMIN)."""
if not _SAFE_REPORT_FILENAME.match(filename):
raise HTTPException(status_code=400, detail="Invalid report filename")

from src.report_service import REPORTS_DIR
filepath = REPORTS_DIR / filename
# Resolve to an absolute path and confirm it stays inside REPORTS_DIR
try:
resolved = filepath.resolve()
reports_resolved = REPORTS_DIR.resolve()
resolved.relative_to(reports_resolved)
except (ValueError, OSError):
raise HTTPException(status_code=400, detail="Invalid report filename")

if not resolved.is_file():
raise HTTPException(status_code=404, detail="Report not found")

media_type = "application/json" if filename.endswith(".json") else "text/csv"
log_info("Report download requested", {"filename": filename})
return FileResponse(path=str(resolved), media_type=media_type, filename=filename)


# ---------------------------------------------------------------------------
# Chat — HTTP endpoints
# ---------------------------------------------------------------------------
Expand Down
121 changes: 121 additions & 0 deletions src/report_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import json
import logging
import re
from datetime import date, datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
Expand All @@ -13,6 +14,110 @@

REPORTS_DIR = Path("reports")

# ---------------------------------------------------------------------------
# generated_reports table helpers
# ---------------------------------------------------------------------------

_FILENAME_RE = re.compile(r"^daily_report_(\d{4}-\d{2}-\d{2})_\d{8}_\d{6}\.(csv|json)$")


def create_generated_reports_table() -> None:
"""Create the generated_reports table if it does not yet exist."""
engine = _pg_engine()
if engine is None:
logger.info("Skipping generated_reports table creation — no DB engine")
return
with engine.begin() as conn:
conn.execute(text("""
CREATE TABLE IF NOT EXISTS generated_reports (
id SERIAL PRIMARY KEY,
filename TEXT NOT NULL UNIQUE,
report_date DATE NOT NULL,
format TEXT NOT NULL,
size_bytes BIGINT NOT NULL,
generated_at TIMESTAMP NOT NULL
)
"""))
logger.info("generated_reports table ready")


def insert_report_metadata(
filename: str,
report_date: date,
fmt: str,
size_bytes: int,
generated_at: datetime,
) -> None:
"""Insert a single report row, silently ignoring duplicate filenames."""
engine = _pg_engine()
if engine is None:
return
with engine.begin() as conn:
conn.execute(
text("""
INSERT INTO generated_reports (filename, report_date, format, size_bytes, generated_at)
VALUES (:filename, :report_date, :format, :size_bytes, :generated_at)
ON CONFLICT (filename) DO NOTHING
"""),
{
"filename": filename,
"report_date": report_date,
"format": fmt,
"size_bytes": size_bytes,
"generated_at": generated_at,
},
)


def list_reports() -> List[Dict[str, Any]]:
"""Return up to 100 most recently generated reports from the DB."""
engine = _pg_engine()
if engine is None:
return []
with engine.connect() as conn:
result = conn.execute(text("""
SELECT filename, report_date, format, size_bytes, generated_at
FROM generated_reports
ORDER BY generated_at DESC
LIMIT 100
"""))
rows: List[Dict[str, Any]] = []
for row in result:
filename = row[0]
rows.append({
"filename": filename,
"report_date": str(row[1]),
"format": row[2],
"size_bytes": row[3],
"generated_at": row[4].isoformat() if hasattr(row[4], "isoformat") else str(row[4]),
"download_url": f"/reports/download/{filename}",
})
return rows


def scan_and_populate_reports() -> None:
"""Scan the reports/ directory and insert metadata for any file not yet in the DB."""
engine = _pg_engine()
if engine is None:
return
_ensure_reports_dir()
for filepath in sorted(REPORTS_DIR.iterdir()):
if not filepath.is_file():
continue
m = _FILENAME_RE.match(filepath.name)
if not m:
continue
try:
report_date = date.fromisoformat(m.group(1))
fmt = m.group(2)
size_bytes = filepath.stat().st_size
# Use file modification time as a best-effort generated_at
generated_at = datetime.utcfromtimestamp(filepath.stat().st_mtime)
insert_report_metadata(filepath.name, report_date, fmt, size_bytes, generated_at)
except Exception as exc:
logger.warning("Skipping %s during scan: %s", filepath.name, exc)
logger.info("reports/ directory scan complete")


def _pg_engine():
return _db.get_engine()
Expand Down Expand Up @@ -133,6 +238,14 @@ def generate_daily_report_csv(
with open(filepath, "w") as f:
json.dump(report_data, f, indent=2)

generated_at = datetime.utcnow()
insert_report_metadata(
filename=filename,
report_date=target_date,
fmt="json",
size_bytes=filepath.stat().st_size,
generated_at=generated_at,
)
logger.info("Generated JSON report: %s", filepath)
return str(filepath)

Expand Down Expand Up @@ -164,5 +277,13 @@ def generate_daily_report_csv(
f"${row['revenue']:.2f}",
])

generated_at = datetime.utcnow()
insert_report_metadata(
filename=filename,
report_date=target_date,
fmt="csv",
size_bytes=filepath.stat().st_size,
generated_at=generated_at,
)
logger.info("Generated CSV report: %s", filepath)
return str(filepath)
15 changes: 15 additions & 0 deletions src/types_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,18 @@ class HealthResponse(BaseModel):
status: str
service: str
api_version: str


class ReportItem(BaseModel):
model_config = ConfigDict(extra="forbid")
filename: str = Field(..., description="Report filename")
report_date: str = Field(..., description="Date the report covers (YYYY-MM-DD)")
format: str = Field(..., description="File format: csv or json")
size_bytes: int = Field(..., description="File size in bytes")
generated_at: str = Field(..., description="ISO timestamp when the report was generated")
download_url: str = Field(..., description="Relative URL to download the report")


class ReportsListResponse(BaseModel):
model_config = ConfigDict(extra="forbid")
reports: List[ReportItem] = Field(..., description="List of generated reports (up to 100)")
Loading
Loading