diff --git a/src/main.py b/src/main.py index 3904d00..45c9a8d 100644 --- a/src/main.py +++ b/src/main.py @@ -4,15 +4,18 @@ import json import logging import os +import re from datetime import date, datetime from typing import Annotated, Any, Dict, List, Optional import numpy as np -from fastapi import FastAPI, HTTPException, Query, Request, WebSocket, WebSocketDisconnect -from fastapi.responses import JSONResponse, PlainTextResponse +from fastapi import Depends, FastAPI, HTTPException, Query, Request, WebSocket, WebSocketDisconnect +from fastapi.responses import FileResponse, JSONResponse, PlainTextResponse from fastapi.staticfiles import StaticFiles from slowapi.errors import RateLimitExceeded +from src.auth.dependencies import require_admin_key + from src.analytics.service import analytics_service from src.chat import ChatMessage, EscalationEvent, chat_manager from src.config import get_settings @@ -44,7 +47,10 @@ _query_daily_sales, _query_invalid_scans, _query_transfer_stats, + create_generated_reports_table, generate_daily_report_csv, + list_reports, + scan_and_populate_reports, ) from src.revenue_sharing_models import ( EventRevenueInput, @@ -85,6 +91,8 @@ QRValidateResponse, RecommendRequest, RecommendResponse, + ReportItem, + ReportsListResponse, RootResponse, SearchEventsRequest, SearchEventsResponse, @@ -166,6 +174,13 @@ def on_startup() -> None: if not settings.SKIP_MODEL_TRAINING: model_pipeline = train_logistic_regression_pipeline() + # Ensure the generated_reports table exists and backfill from disk. + try: + create_generated_reports_table() + scan_and_populate_reports() + except Exception as exc: + logger.warning("Report metadata init failed (non-fatal): %s", exc) + if settings.ENABLE_ETL_SCHEDULER and BackgroundScheduler is not None: etl_scheduler = BackgroundScheduler(timezone="UTC") cron = settings.ETL_CRON @@ -532,6 +547,52 @@ def generate_daily_report(payload: DailyReportRequest) -> Any: return JSONResponse(status_code=500, content={"detail": f"Report generation failed: {exc}"}) +@app.get("/reports", response_model=ReportsListResponse) +def get_reports_list( + _: str = Depends(require_admin_key), +) -> ReportsListResponse: + """List up to 100 most recently generated reports (ADMIN).""" + log_info("Reports list requested") + try: + rows = list_reports() + items = [ReportItem(**row) for row in rows] + return ReportsListResponse(reports=items) + except Exception as exc: + log_error("Failed to list reports", {"error": str(exc)}) + raise HTTPException(status_code=500, detail=f"Failed to list reports: {exc}") + + +# Safe filename pattern — must match what generate_daily_report_csv produces +_SAFE_REPORT_FILENAME = re.compile(r"^daily_report_\d{4}-\d{2}-\d{2}_\d{8}_\d{6}\.(csv|json)$") + + +@app.get("/reports/download/{filename}") +def download_report( + filename: str, + _: str = Depends(require_admin_key), +) -> FileResponse: + """Stream a previously generated report file (ADMIN).""" + if not _SAFE_REPORT_FILENAME.match(filename): + raise HTTPException(status_code=400, detail="Invalid report filename") + + from src.report_service import REPORTS_DIR + filepath = REPORTS_DIR / filename + # Resolve to an absolute path and confirm it stays inside REPORTS_DIR + try: + resolved = filepath.resolve() + reports_resolved = REPORTS_DIR.resolve() + resolved.relative_to(reports_resolved) + except (ValueError, OSError): + raise HTTPException(status_code=400, detail="Invalid report filename") + + if not resolved.is_file(): + raise HTTPException(status_code=404, detail="Report not found") + + media_type = "application/json" if filename.endswith(".json") else "text/csv" + log_info("Report download requested", {"filename": filename}) + return FileResponse(path=str(resolved), media_type=media_type, filename=filename) + + # --------------------------------------------------------------------------- # Chat — HTTP endpoints # --------------------------------------------------------------------------- diff --git a/src/report_service.py b/src/report_service.py index e4e98c9..69d2295 100644 --- a/src/report_service.py +++ b/src/report_service.py @@ -1,6 +1,7 @@ import csv import json import logging +import re from datetime import date, datetime from pathlib import Path from typing import Any, Dict, List, Optional @@ -13,6 +14,110 @@ REPORTS_DIR = Path("reports") +# --------------------------------------------------------------------------- +# generated_reports table helpers +# --------------------------------------------------------------------------- + +_FILENAME_RE = re.compile(r"^daily_report_(\d{4}-\d{2}-\d{2})_\d{8}_\d{6}\.(csv|json)$") + + +def create_generated_reports_table() -> None: + """Create the generated_reports table if it does not yet exist.""" + engine = _pg_engine() + if engine is None: + logger.info("Skipping generated_reports table creation — no DB engine") + return + with engine.begin() as conn: + conn.execute(text(""" + CREATE TABLE IF NOT EXISTS generated_reports ( + id SERIAL PRIMARY KEY, + filename TEXT NOT NULL UNIQUE, + report_date DATE NOT NULL, + format TEXT NOT NULL, + size_bytes BIGINT NOT NULL, + generated_at TIMESTAMP NOT NULL + ) + """)) + logger.info("generated_reports table ready") + + +def insert_report_metadata( + filename: str, + report_date: date, + fmt: str, + size_bytes: int, + generated_at: datetime, +) -> None: + """Insert a single report row, silently ignoring duplicate filenames.""" + engine = _pg_engine() + if engine is None: + return + with engine.begin() as conn: + conn.execute( + text(""" + INSERT INTO generated_reports (filename, report_date, format, size_bytes, generated_at) + VALUES (:filename, :report_date, :format, :size_bytes, :generated_at) + ON CONFLICT (filename) DO NOTHING + """), + { + "filename": filename, + "report_date": report_date, + "format": fmt, + "size_bytes": size_bytes, + "generated_at": generated_at, + }, + ) + + +def list_reports() -> List[Dict[str, Any]]: + """Return up to 100 most recently generated reports from the DB.""" + engine = _pg_engine() + if engine is None: + return [] + with engine.connect() as conn: + result = conn.execute(text(""" + SELECT filename, report_date, format, size_bytes, generated_at + FROM generated_reports + ORDER BY generated_at DESC + LIMIT 100 + """)) + rows: List[Dict[str, Any]] = [] + for row in result: + filename = row[0] + rows.append({ + "filename": filename, + "report_date": str(row[1]), + "format": row[2], + "size_bytes": row[3], + "generated_at": row[4].isoformat() if hasattr(row[4], "isoformat") else str(row[4]), + "download_url": f"/reports/download/{filename}", + }) + return rows + + +def scan_and_populate_reports() -> None: + """Scan the reports/ directory and insert metadata for any file not yet in the DB.""" + engine = _pg_engine() + if engine is None: + return + _ensure_reports_dir() + for filepath in sorted(REPORTS_DIR.iterdir()): + if not filepath.is_file(): + continue + m = _FILENAME_RE.match(filepath.name) + if not m: + continue + try: + report_date = date.fromisoformat(m.group(1)) + fmt = m.group(2) + size_bytes = filepath.stat().st_size + # Use file modification time as a best-effort generated_at + generated_at = datetime.utcfromtimestamp(filepath.stat().st_mtime) + insert_report_metadata(filepath.name, report_date, fmt, size_bytes, generated_at) + except Exception as exc: + logger.warning("Skipping %s during scan: %s", filepath.name, exc) + logger.info("reports/ directory scan complete") + def _pg_engine(): return _db.get_engine() @@ -133,6 +238,14 @@ def generate_daily_report_csv( with open(filepath, "w") as f: json.dump(report_data, f, indent=2) + generated_at = datetime.utcnow() + insert_report_metadata( + filename=filename, + report_date=target_date, + fmt="json", + size_bytes=filepath.stat().st_size, + generated_at=generated_at, + ) logger.info("Generated JSON report: %s", filepath) return str(filepath) @@ -164,5 +277,13 @@ def generate_daily_report_csv( f"${row['revenue']:.2f}", ]) + generated_at = datetime.utcnow() + insert_report_metadata( + filename=filename, + report_date=target_date, + fmt="csv", + size_bytes=filepath.stat().st_size, + generated_at=generated_at, + ) logger.info("Generated CSV report: %s", filepath) return str(filepath) \ No newline at end of file diff --git a/src/types_custom.py b/src/types_custom.py index d480fd8..cab483b 100644 --- a/src/types_custom.py +++ b/src/types_custom.py @@ -254,3 +254,18 @@ class HealthResponse(BaseModel): status: str service: str api_version: str + + +class ReportItem(BaseModel): + model_config = ConfigDict(extra="forbid") + filename: str = Field(..., description="Report filename") + report_date: str = Field(..., description="Date the report covers (YYYY-MM-DD)") + format: str = Field(..., description="File format: csv or json") + size_bytes: int = Field(..., description="File size in bytes") + generated_at: str = Field(..., description="ISO timestamp when the report was generated") + download_url: str = Field(..., description="Relative URL to download the report") + + +class ReportsListResponse(BaseModel): + model_config = ConfigDict(extra="forbid") + reports: List[ReportItem] = Field(..., description="List of generated reports (up to 100)") diff --git a/tests/test_reports_list.py b/tests/test_reports_list.py new file mode 100644 index 0000000..8aa812b --- /dev/null +++ b/tests/test_reports_list.py @@ -0,0 +1,285 @@ +"""Tests for GET /reports and GET /reports/download/{filename} endpoints. + +Closes #152 +""" +import os +from datetime import date, datetime +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from src.config import settings +from src.main import app +from src.report_service import REPORTS_DIR + +client = TestClient(app) + +ADMIN_HEADERS = {"Authorization": f"Bearer {settings.ADMIN_API_KEY}"} + +# --------------------------------------------------------------------------- +# Helpers / fixtures +# --------------------------------------------------------------------------- + +SAMPLE_REPORT_ROW = { + "filename": "daily_report_2025-01-01_20250101_120000.csv", + "report_date": "2025-01-01", + "format": "csv", + "size_bytes": 4096, + "generated_at": "2025-01-01T12:00:00", + "download_url": "/reports/download/daily_report_2025-01-01_20250101_120000.csv", +} + + +@pytest.fixture(autouse=True) +def cleanup_reports(): + yield + if REPORTS_DIR.exists(): + for f in REPORTS_DIR.glob("daily_report_*"): + f.unlink(missing_ok=True) + + +# --------------------------------------------------------------------------- +# GET /reports +# --------------------------------------------------------------------------- + +class TestGetReportsList: + def test_returns_empty_list_when_no_reports(self): + with patch("src.report_service.list_reports", return_value=[]): + resp = client.get("/reports", headers=ADMIN_HEADERS) + assert resp.status_code == 200 + body = resp.json() + assert "reports" in body + assert body["reports"] == [] + + def test_returns_report_items(self): + with patch("src.report_service.list_reports", return_value=[SAMPLE_REPORT_ROW]): + resp = client.get("/reports", headers=ADMIN_HEADERS) + assert resp.status_code == 200 + body = resp.json() + assert len(body["reports"]) == 1 + item = body["reports"][0] + assert item["filename"] == SAMPLE_REPORT_ROW["filename"] + assert item["report_date"] == SAMPLE_REPORT_ROW["report_date"] + assert item["format"] == SAMPLE_REPORT_ROW["format"] + assert item["size_bytes"] == SAMPLE_REPORT_ROW["size_bytes"] + assert item["generated_at"] == SAMPLE_REPORT_ROW["generated_at"] + assert item["download_url"] == SAMPLE_REPORT_ROW["download_url"] + + def test_returns_multiple_reports(self): + rows = [ + {**SAMPLE_REPORT_ROW, "filename": f"daily_report_2025-01-0{i}_20250101_12000{i}.csv", + "download_url": f"/reports/download/daily_report_2025-01-0{i}_20250101_12000{i}.csv"} + for i in range(1, 4) + ] + with patch("src.report_service.list_reports", return_value=rows): + resp = client.get("/reports", headers=ADMIN_HEADERS) + assert resp.status_code == 200 + assert len(resp.json()["reports"]) == 3 + + def test_requires_admin_auth(self): + resp = client.get("/reports") + assert resp.status_code == 401 + + def test_rejects_invalid_admin_key(self): + resp = client.get("/reports", headers={"Authorization": "Bearer wrong_key"}) + assert resp.status_code == 403 + + def test_handles_db_error_gracefully(self): + with patch("src.report_service.list_reports", side_effect=Exception("DB down")): + resp = client.get("/reports", headers=ADMIN_HEADERS) + assert resp.status_code == 500 + assert "Failed to list reports" in resp.json()["detail"] + + +# --------------------------------------------------------------------------- +# GET /reports/download/{filename} +# --------------------------------------------------------------------------- + +class TestDownloadReport: + def _make_report_file(self, filename: str, content: str = "col1,col2\nval1,val2\n") -> Path: + REPORTS_DIR.mkdir(exist_ok=True) + fp = REPORTS_DIR / filename + fp.write_text(content) + return fp + + def test_download_existing_csv(self): + filename = "daily_report_2025-01-01_20250101_120000.csv" + self._make_report_file(filename, "data") + resp = client.get(f"/reports/download/{filename}", headers=ADMIN_HEADERS) + assert resp.status_code == 200 + assert "text/csv" in resp.headers["content-type"] + + def test_download_existing_json(self): + filename = "daily_report_2025-02-14_20250214_080000.json" + self._make_report_file(filename, '{"report_date": "2025-02-14"}') + resp = client.get(f"/reports/download/{filename}", headers=ADMIN_HEADERS) + assert resp.status_code == 200 + assert "application/json" in resp.headers["content-type"] + + def test_returns_404_for_missing_file(self): + filename = "daily_report_2025-01-01_20250101_999999.csv" + resp = client.get(f"/reports/download/{filename}", headers=ADMIN_HEADERS) + assert resp.status_code == 404 + assert resp.json()["detail"] == "Report not found" + + def test_returns_400_for_invalid_filename(self): + resp = client.get("/reports/download/../../etc/passwd", headers=ADMIN_HEADERS) + assert resp.status_code in (400, 422) + + def test_returns_400_for_arbitrary_filename(self): + resp = client.get("/reports/download/malicious_file.sh", headers=ADMIN_HEADERS) + assert resp.status_code == 400 + assert resp.json()["detail"] == "Invalid report filename" + + def test_requires_admin_auth(self): + filename = "daily_report_2025-01-01_20250101_120000.csv" + resp = client.get(f"/reports/download/{filename}") + assert resp.status_code == 401 + + def test_rejects_invalid_admin_key(self): + filename = "daily_report_2025-01-01_20250101_120000.csv" + resp = client.get(f"/reports/download/{filename}", headers={"Authorization": "Bearer bad"}) + assert resp.status_code == 403 + + +# --------------------------------------------------------------------------- +# report_service unit tests +# --------------------------------------------------------------------------- + +class TestCreateGeneratedReportsTable: + def test_no_op_when_no_engine(self): + with patch("src.report_service._pg_engine", return_value=None): + from src.report_service import create_generated_reports_table + # Should not raise + create_generated_reports_table() + + def test_executes_create_table(self): + mock_conn = MagicMock() + mock_engine = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + with patch("src.report_service._pg_engine", return_value=mock_engine): + from src.report_service import create_generated_reports_table + create_generated_reports_table() + mock_conn.execute.assert_called_once() + call_args = str(mock_conn.execute.call_args) + assert "generated_reports" in call_args + + +class TestInsertReportMetadata: + def test_no_op_when_no_engine(self): + with patch("src.report_service._pg_engine", return_value=None): + from src.report_service import insert_report_metadata + insert_report_metadata("f.csv", date(2025, 1, 1), "csv", 100, datetime.utcnow()) + + def test_inserts_row(self): + mock_conn = MagicMock() + mock_engine = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + with patch("src.report_service._pg_engine", return_value=mock_engine): + from src.report_service import insert_report_metadata + insert_report_metadata( + "daily_report_2025-01-01_20250101_120000.csv", + date(2025, 1, 1), + "csv", + 512, + datetime(2025, 1, 1, 12, 0, 0), + ) + mock_conn.execute.assert_called_once() + + +class TestListReports: + def test_returns_empty_when_no_engine(self): + with patch("src.report_service._pg_engine", return_value=None): + from src.report_service import list_reports + assert list_reports() == [] + + def test_returns_formatted_rows(self): + mock_row = ( + "daily_report_2025-01-01_20250101_120000.csv", + date(2025, 1, 1), + "csv", + 4096, + datetime(2025, 1, 1, 12, 0, 0), + ) + mock_result = MagicMock() + mock_result.__iter__ = MagicMock(return_value=iter([mock_row])) + mock_conn = MagicMock() + mock_conn.execute.return_value = mock_result + mock_engine = MagicMock() + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + with patch("src.report_service._pg_engine", return_value=mock_engine): + from src.report_service import list_reports + rows = list_reports() + assert len(rows) == 1 + assert rows[0]["filename"] == "daily_report_2025-01-01_20250101_120000.csv" + assert rows[0]["report_date"] == "2025-01-01" + assert rows[0]["format"] == "csv" + assert rows[0]["size_bytes"] == 4096 + assert rows[0]["download_url"] == "/reports/download/daily_report_2025-01-01_20250101_120000.csv" + + +class TestScanAndPopulateReports: + def test_no_op_when_no_engine(self): + with patch("src.report_service._pg_engine", return_value=None): + from src.report_service import scan_and_populate_reports + scan_and_populate_reports() + + def test_skips_non_matching_files(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.report_service.REPORTS_DIR", tmp_path) + (tmp_path / "random_file.txt").write_text("data") + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + with patch("src.report_service._pg_engine", return_value=mock_engine): + from src.report_service import scan_and_populate_reports + scan_and_populate_reports() + mock_conn.execute.assert_not_called() + + def test_inserts_matching_files(self, tmp_path, monkeypatch): + monkeypatch.setattr("src.report_service.REPORTS_DIR", tmp_path) + (tmp_path / "daily_report_2025-03-15_20250315_080000.csv").write_text("data") + mock_conn = MagicMock() + mock_engine = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + with patch("src.report_service._pg_engine", return_value=mock_engine): + from src.report_service import scan_and_populate_reports + scan_and_populate_reports() + mock_conn.execute.assert_called_once() + + +class TestGenerateDailyReportInsertsMetadata: + """Verify that generate_daily_report_csv calls insert_report_metadata.""" + + def test_csv_inserts_metadata(self): + sales = [{"event_id": "E1", "sale_date": "2025-06-01", "tickets_sold": 5, "revenue": 50.0}] + with patch("src.report_service._query_daily_sales", return_value=sales), \ + patch("src.report_service._query_event_names", return_value={"E1": "Evt"}), \ + patch("src.report_service._query_transfer_stats", return_value={"total_transfers": 0}), \ + patch("src.report_service._query_invalid_scans", return_value={"invalid_scans": 0}), \ + patch("src.report_service.insert_report_metadata") as mock_insert: + from src.report_service import generate_daily_report_csv + generate_daily_report_csv(target_date=date(2025, 6, 1), output_format="csv") + mock_insert.assert_called_once() + _, kwargs = mock_insert.call_args[0], mock_insert.call_args[1] if mock_insert.call_args[1] else {} + args = mock_insert.call_args[0] + assert args[2] == "csv" # fmt + + def test_json_inserts_metadata(self): + sales = [{"event_id": "E1", "sale_date": "2025-06-01", "tickets_sold": 5, "revenue": 50.0}] + with patch("src.report_service._query_daily_sales", return_value=sales), \ + patch("src.report_service._query_event_names", return_value={"E1": "Evt"}), \ + patch("src.report_service._query_transfer_stats", return_value={"total_transfers": 0}), \ + patch("src.report_service._query_invalid_scans", return_value={"invalid_scans": 0}), \ + patch("src.report_service.insert_report_metadata") as mock_insert: + from src.report_service import generate_daily_report_csv + generate_daily_report_csv(target_date=date(2025, 6, 1), output_format="json") + mock_insert.assert_called_once() + args = mock_insert.call_args[0] + assert args[2] == "json" # fmt