diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..b39e483 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,16 @@ +## SUMMARY + +## TEST PLAN + +--- + +## Pre-merge author checklist + +- [ ] I've clearly explained: + - [ ] What problem this PR is solving. + - [ ] How this problem was solved. + - [ ] How reviewers can test my changes. +- [ ] I've indicated what Jira issue(s) this PR is linked to. +- [ ] I've included tests I've run to ensure my changes work. +- [ ] I've added unit tests for any new code, if applicable. +- [ ] I've documented any added code. diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 0000000..ef29cfd --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,61 @@ +# Security + +This document summarizes the key security controls and best practices for the AugMed App (frontend + backend). + +## 1. Transport Security +- **HTTPS only** + All traffic to `https://augmed1.dhep.org` is encrypted with TLS. +- **HSTS** + The backend API enforces HTTP Strict Transport Security to prevent downgrade attacks. + +## 2. Authentication & Authorization +- **JWT-based auth** + Users authenticate via a JSON Web Token (JWT) issued by the backend. +- **httpOnly cookies** + JWTs are stored in httpOnly cookies to mitigate XSS-based token theft. +- **Route protection** + All API endpoints under `/api/*` require a valid JWT and check user ownership. + +## 3. CORS +- **Restricted origin** + Backend CORS policy only allows requests from the official frontend origin (`https://augmed1.dhep.org`). +- **Preflight checks** + `OPTIONS` requests are handled and validated before allowing any state-changing method. + +## 4. Secrets & Config +- **Environment variables** + All secrets (database URLs, JWT signing keys, third-party API keys) are injected via environment variables—never checked into source control. +- **.env exclusions** + The repository’s `.gitignore` excludes any local `.env` or secret files. + +## 5. Dependency Management +- **Regular audits** + - Frontend: `npm audit` (or `yarn audit`) run on each CI build. + - Backend: `pip-audit` (or `safety`) scans Python dependencies for known vulnerabilities. +- **Pinned versions** + `package.json` and `requirements.txt` use exact version pins to ensure reproducible installs. + +## 6. Input Validation & Output Encoding +- **Schema validation** + Backend request bodies are validated against JSON schemas via `flask_json_schema`. +- **ORM usage** + All database access uses SQLAlchemy with parameterized queries to prevent SQL injection. +- **Escape output** + Frontend templates escape any user-provided content to avoid XSS. + +## 7. Content Security Policy (CSP) +- The frontend sets a strict CSP header to disallow inline scripts and only allow trusted script sources. + +## 8. Logging & Monitoring +- **Audit logs** + Security-related events (login, token validation failures, analytics submissions) are logged centrally. +- **Error handling** + Stack traces and internal errors are never exposed to end users; they are captured in server logs only. + +## 9. Database Migrations +- **Alembic migrations** + Schema changes are tracked and applied via Alembic; no manual DDL in production. + +--- + +> For any security concerns, please contact the DHEP Lab’s security team at `dhep.lab@gmail.com`. diff --git a/README.md b/README.md index 6b3bd0c..f5e0d73 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,11 @@ AugMed is a web application, built for the UNC-Chapel Hill DHEP Lab, that allows ![Python](https://img.shields.io/badge/Python-3776AB?style=for-the-badge&logo=python&logoColor=white) ![Flask](https://img.shields.io/badge/Flask-000000?style=for-the-badge&logo=flask&logoColor=white) ![PostgreSQL](https://img.shields.io/badge/PostgreSQL-4169E1?style=for-the-badge&logo=postgresql&logoColor=white) +![AWS RDS](https://img.shields.io/badge/AWS%20RDS-527FFF?style=for-the-badge&logo=amazon-rds&logoColor=white) +![AWS S3](https://img.shields.io/badge/AWS%20S3-8C4FFF?style=for-the-badge&logo=amazon-s3&logoColor=white) +![AWS ECR](https://img.shields.io/badge/AWS%20ECR-F58534?style=for-the-badge&logo=aws&logoColor=white) +![AWS ECS](https://img.shields.io/badge/AWS%20ECS-FF5A00?style=for-the-badge&logo=aws&logoColor=white) +![AWS ALB](https://img.shields.io/badge/AWS%20ALB-232F3E?style=for-the-badge&logo=amazon-aws&logoColor=white) ![Redis](https://img.shields.io/badge/Redis-DC382D?style=for-the-badge&logo=redis&logoColor=white) ![Celery](https://img.shields.io/badge/Celery-37814A?style=for-the-badge&logo=celery&logoColor=white) ![Alembic](https://img.shields.io/badge/Alembic-000000?style=for-the-badge&logo=alembic&logoColor=white) @@ -17,6 +22,7 @@ AugMed is a web application, built for the UNC-Chapel Hill DHEP Lab, that allows ![Shell](https://img.shields.io/badge/Shell-4EAA25?style=for-the-badge&logo=gnu-bash&logoColor=white) ![Docker](https://img.shields.io/badge/Docker-2496ED?style=for-the-badge&logo=docker&logoColor=white) ![Docker Compose](https://img.shields.io/badge/Docker%20Compose-2496ED?style=for-the-badge&logo=docker&logoColor=white) +![Terraform](https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white) ![Git](https://img.shields.io/badge/Git-F05032?style=for-the-badge&logo=git&logoColor=white) ![GitHub](https://img.shields.io/badge/GitHub-181717?style=for-the-badge&logo=github&logoColor=white) ![Postman](https://img.shields.io/badge/Postman-FF6C37?style=for-the-badge&logo=postman&logoColor=white) @@ -149,6 +155,16 @@ git config core.hooksPath .githooks This will ensure that your local git hooks are used instead of the default ones. You can find the hooks in the `.githooks` directory. +## Deployment + +The application is deployed using Docker and AWS services. The deployment process involves building Docker images, pushing them to AWS ECR, and deploying them to AWS ECS. + +Specifically, the application is deployed to an AWS ECS cluster using Fargate. The deployment process is automated using GitHub Actions, which builds the Docker image, pushes it to ECR, and updates the ECS service. + +It also uses Terraform to manage the infrastructure as code. + +> **Visit the [augmed-infra repository](https://github.com/DHEPLab/augmed-infra) for more details on the infrastructure setup and deployment process.** + ## License This project is licensed under the Apache License 2.0. See the [LICENSE](LICENSE) file for details. diff --git a/src/__init__.py b/src/__init__.py index 5f8f81c..ea4d52f 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -5,6 +5,7 @@ from flask_jwt_extended import JWTManager from flask_migrate import Migrate, upgrade from flask_sqlalchemy import SQLAlchemy +# from flask_cors import CORS from src.common.exception.exception_handlers import register_error_handlers @@ -15,6 +16,13 @@ def create_app(config_object=None): app = Flask(__name__) + # + # CORS( + # app, + # origins=["http://localhost:3000"], + # supports_credentials=True, + # expose_headers=["Authorization"], + # ) # Allow custom configuration for testing if config_object: @@ -45,6 +53,7 @@ def create_app(config_object=None): from src.user.controller.auth_controller import auth_blueprint from src.user.controller.config_controller import config_blueprint from src.user.controller.user_controller import user_blueprint + from src.analytics.controller.analytics_controller import analytics_blueprint app.register_blueprint(admin_answer_config_blueprint, url_prefix="/admin") app.register_blueprint(user_blueprint, url_prefix="/admin") @@ -55,6 +64,7 @@ def create_app(config_object=None): app.register_blueprint(healthcheck_blueprint, url_prefix="/api") app.register_blueprint(case_blueprint, url_prefix="/api") app.register_blueprint(answer_blueprint, url_prefix="/api") + app.register_blueprint(analytics_blueprint) register_error_handlers(app) diff --git a/src/analytics/controller/analytics_controller.py b/src/analytics/controller/analytics_controller.py new file mode 100644 index 0000000..aa0af7c --- /dev/null +++ b/src/analytics/controller/analytics_controller.py @@ -0,0 +1,52 @@ +from flask import Blueprint, request, jsonify +from src import db +from src.analytics.service.analytics_service import AnalyticsService +from src.analytics.repository.analytics_repository import AnalyticsRepository +from src.user.repository.display_config_repository import DisplayConfigRepository +from src.common.model.ApiResponse import ApiResponse +from src.user.utils.auth_utils import jwt_validation_required +from src.common.exception.BusinessException import BusinessException, BusinessExceptionEnum +from datetime import datetime, timezone + +# Give the blueprint its full prefix; no strict_slashes here +analytics_blueprint = Blueprint( + "analytics", + __name__, + url_prefix="/api/analytics", +) + +@analytics_blueprint.route("/", methods=["POST"], strict_slashes=False) +@jwt_validation_required() +def record(): # pragma: no cover + payload = request.get_json() or {} + case_config_id = payload.get("caseConfigId") + case_open_str = payload.get("caseOpenTime") + answer_open_str = payload.get("answerOpenTime") + answer_submit_str = payload.get("answerSubmitTime") + + if not all([case_config_id, case_open_str, answer_open_str, answer_submit_str]): + ex = BusinessException( + BusinessExceptionEnum.RenderTemplateError, + "Missing analytics metrics fields" + ) + return jsonify(ApiResponse.error(ex)), 400 + + fmt = "%Y-%m-%dT%H:%M:%S.%fZ" + try: + case_open = datetime.strptime(case_open_str, fmt).replace(tzinfo=timezone.utc) + answer_open = datetime.strptime(answer_open_str, fmt).replace(tzinfo=timezone.utc) + answer_submit = datetime.strptime(answer_submit_str, fmt).replace(tzinfo=timezone.utc) + except ValueError: + ex = BusinessException( + BusinessExceptionEnum.RenderTemplateError, + "Bad timestamp format for analytics" + ) + return jsonify(ApiResponse.error(ex)), 400 + + analytics = AnalyticsService( + analytics_repository=AnalyticsRepository(db.session), + display_config_repository=DisplayConfigRepository(db.session), + ).record_metrics(case_config_id, case_open, answer_open, answer_submit) + + db.session.commit() + return jsonify(ApiResponse.success({"id": analytics.id})), 200 diff --git a/src/analytics/model/analytics.py b/src/analytics/model/analytics.py new file mode 100644 index 0000000..a13af34 --- /dev/null +++ b/src/analytics/model/analytics.py @@ -0,0 +1,35 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, Integer, String, DateTime, Float +from src import db + +class Analytics(db.Model): + __tablename__ = "analytics" + + id = Column(Integer, primary_key=True, autoincrement=True) + user_email = Column(String(128), nullable=False) + case_config_id = Column(String, nullable=False) + case_id = Column(Integer, nullable=False) + + # these three fields will also accept and store tz-aware UTC datetimes + case_open_time = Column(DateTime(timezone=True), nullable=False) + answer_open_time = Column(DateTime(timezone=True), nullable=False) + answer_submit_time= Column(DateTime(timezone=True), nullable=False) + + to_answer_open_secs = Column(Float, nullable=False) + to_submit_secs = Column(Float, nullable=False) + total_duration_secs = Column(Float, nullable=False) + + created_timestamp = Column( + DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc) + ) + modified_timestamp = Column( + DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + onupdate=lambda: datetime.now(timezone.utc) + ) + + __table_args__ = ( + # ensure only one analytics row per case_config_id per user + db.UniqueConstraint("user_email", "case_config_id"), + ) diff --git a/src/analytics/repository/analytics_repository.py b/src/analytics/repository/analytics_repository.py new file mode 100644 index 0000000..0db217a --- /dev/null +++ b/src/analytics/repository/analytics_repository.py @@ -0,0 +1,10 @@ +from src.analytics.model.analytics import Analytics + +class AnalyticsRepository: + def __init__(self, session): # pragma: no cover + self.session = session + + def add(self, analytics: Analytics) -> Analytics: # pragma: no cover + self.session.add(analytics) + self.session.flush() + return analytics diff --git a/src/analytics/service/analytics_service.py b/src/analytics/service/analytics_service.py new file mode 100644 index 0000000..35b5a0a --- /dev/null +++ b/src/analytics/service/analytics_service.py @@ -0,0 +1,42 @@ +from datetime import datetime +from src.analytics.model.analytics import Analytics +from src.analytics.repository.analytics_repository import AnalyticsRepository +from src.common.exception.BusinessException import BusinessException, BusinessExceptionEnum +from src.user.utils.auth_utils import get_user_email_from_jwt +from src.user.repository.display_config_repository import DisplayConfigRepository + +class AnalyticsService: + def __init__( + self, + analytics_repository: AnalyticsRepository, + display_config_repository: DisplayConfigRepository, + ): # pragma: no cover + self.analytics_repo = analytics_repository + self.config_repo = display_config_repository + + def record_metrics(self, case_config_id: str, case_open: datetime, + answer_open: datetime, answer_submit: datetime) -> Analytics: # pragma: no cover + + # verify user owns this case_config + config = self.config_repo.get_configuration_by_id(case_config_id) + user_email = get_user_email_from_jwt() + if not config or config.user_email != user_email: + raise BusinessException(BusinessExceptionEnum.NoAccessToCaseReview) + + # durations in seconds + to_answer_open = (answer_open - case_open).total_seconds() + to_submit = (answer_submit - answer_open).total_seconds() + total = (answer_submit - case_open).total_seconds() + + analytics = Analytics( + user_email=user_email, + case_config_id=case_config_id, + case_id=config.case_id, + case_open_time=case_open, + answer_open_time=answer_open, + answer_submit_time=answer_submit, + to_answer_open_secs=to_answer_open, + to_submit_secs=to_submit, + total_duration_secs=total, + ) + return self.analytics_repo.add(analytics) diff --git a/src/cases/service/case_service.py b/src/cases/service/case_service.py index d9b74bc..df01352 100644 --- a/src/cases/service/case_service.py +++ b/src/cases/service/case_service.py @@ -410,7 +410,7 @@ def get_case_review(self, case_config_id): # pragma: no cover ) sorted_important.append(TreeNode(ai_label, [display_txt])) else: - # (b) fallback to Adjusted CRC Risk… + # (b) fallback to Adjusted CRC Risk for obs in crc_obs: txt = obs.value_as_string or "" if txt.startswith("Adjusted CRC Risk"): diff --git a/src/migrations/versions/cc1f971840fc_create_analytics_table.py b/src/migrations/versions/cc1f971840fc_create_analytics_table.py new file mode 100644 index 0000000..da9afb8 --- /dev/null +++ b/src/migrations/versions/cc1f971840fc_create_analytics_table.py @@ -0,0 +1,47 @@ +"""create analytics table + +Revision ID: cc1f971840fc +Revises: 02d25e5adcad +Create Date: 2025-06-13 16:17:19.503474 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = 'cc1f971840fc' +down_revision = '02d25e5adcad' +branch_labels = None +depends_on = None + +def upgrade(): + op.create_table( + 'analytics', + sa.Column('id', sa.Integer, primary_key=True, autoincrement=True), + sa.Column('user_email', sa.String(128), nullable=False), + sa.Column('case_config_id', sa.String, nullable=False), + sa.Column('case_id', sa.Integer, nullable=False), + sa.Column('case_open_time', sa.DateTime(timezone=True), nullable=False), + sa.Column('answer_open_time', sa.DateTime(timezone=True), nullable=False), + sa.Column('answer_submit_time', sa.DateTime(timezone=True), nullable=False), + sa.Column('to_answer_open_secs', sa.Float, nullable=False), + sa.Column('to_submit_secs', sa.Float, nullable=False), + sa.Column('total_duration_secs', sa.Float, nullable=False), + sa.Column( + 'created_timestamp', + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text('CURRENT_TIMESTAMP') + ), + sa.Column( + 'modified_timestamp', + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text('CURRENT_TIMESTAMP') + ), + sa.UniqueConstraint('user_email', 'case_config_id', name='uq_analytics_user_case') + ) + + +def downgrade(): + op.drop_table('analytics')