diff --git a/.gitignore b/.gitignore index c49dd1da..783b46b2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,8 @@ windwatts-ui/env-vars.sh windwatts-api/env-vars.sh node_modules/ update-env-credentials.sh -manage-aws-parameters.sh \ No newline at end of file +manage-aws-parameters.sh + +# Cursor thoughts +thoughts/ +.thoughts/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..3a5bd65b --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# Contributing to WindWatts + +We welcome contributions! Please follow these guidelines to ensure a smooth process. + +## Getting Started + +See the [Quickstart Guide](docs/02-quickstart.md) to set up your development environment. + +## Pull Request Process + +1. Create a new branch for your feature or fix. +2. Make your changes. +3. Run tests and linters locally. +4. Open a Pull Request against the `develop` branch. +5. Describe your changes clearly in the PR description. + +## Branch Naming + +We recommend using descriptive branch names: + +- `feature/my-new-feature` +- `fix/bug-description` +- `docs/documentation-update` + +## Standards + +- **Commits**: Use clear, descriptive commit messages. We encourage [Conventional Commits](https://www.conventionalcommits.org/). + - `feat: add new map layer` + - `fix: resolve api timeout` + - `docs: update quickstart guide` +- **Code Style**: Follow the existing patterns in the codebase. + - **Frontend**: Prettier & ESLint are configured. Run `yarn format` before committing. + - **Backend**: PEP 8 compliance is expected. +- **Tests**: Add tests for new features or bug fixes. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..f2862282 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +.PHONY: all lint format test + +lint: + $(MAKE) -C windwatts-api lint + cd windwatts-ui && yarn lint + +format: + $(MAKE) -C windwatts-api format + cd windwatts-ui && yarn format + +test: + $(MAKE) -C windwatts-api test + cd windwatts-ui && yarn test + diff --git a/README.md b/README.md index a41f930b..7d381071 100644 --- a/README.md +++ b/README.md @@ -1,188 +1,22 @@ -# API Endpoint for DW-TAP Project +# WindWatts -## About - -This is an endpoint repository -- it contains the code that runs a Flask app serving the processed WTK data. -The processing includes height selection, time interval selection, spatial interpolation, vertical interpolation, and wind rose calculations. - -When UI/API is running, you can: - -1. See WindWatts-beta by navigating your browswer to: -``` -: -``` -2. Run example API query, by going to a link like this (change the values if necessary) -``` -:/1224?lat=39.76004&lon=-105.14058 -``` -3. Check server info at: -``` -:/status -``` - -For a local deployment, these links would be: -``` -http://localhost:8080 -http://localhost:8080/1224?lat=39.76004&lon=-105.14058 -http://localhost:8080/status -``` - -## How To Use - -### Local Deployment of New Windwatts App using Docker (Docker required) - -#### Prerequisites -1. **Install and Start Docker**: Ensure Docker is installed and running on your machine. -2. **AWS Credentials**: Obtain AWS credentials with access to the Windwatts Data package. -3. **Google Maps API Key**: Create an API key and map ID by following the [Google Maps API Documentation](https://developers.google.com/maps/documentation/javascript). - -#### Steps to Deploy Locally -1. **Clone the repository**: The new Windwatts app is hosted on the development branch - ```shell - git clone https://github.com/NREL/dw-tap-api.git - cd dw-tap-api/ - git checkout development # Switch to the development branch where the new app resides - ``` - -2. **Configure environment files**: - - **Root directory (`dw-tap-api/`)**: - - Create or update a `.env` file with the following variables: - ```plaintext - WINDWATTS_DATA_URL=https://windwatts-era5.s3.us-west-2.amazonaws.com/ - AWS_ACCESS_KEY_ID="YOUR_AWS_ACCESS_KEY_ID" - AWS_SECRET_ACCESS_KEY="YOUR_AWS_SECRET_ACCESS_KEY" - AWS_SESSION_TOKEN="YOUR_AWS_SESSION_TOKEN" - ``` - - `WINDWATTS_DATA_URL`: URL for the Windwatts Data package. - - `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`: AWS credentials for accessing resources. - - - **Frontend directory (windwatts-ui)**: - - Create or update a `.env.development` file: - ```plaintext - VITE_API_BASE_URL=http://windwatts-proxy:80 - VITE_MAP_API_KEY=YOUR_MAP_API_KEY - VITE_MAP_ID=YOUR_MAP_ID - ``` - - `VITE_API_BASE_URL`: Base URL for the Windwatts API. - - `VITE_MAP_API_KEY`, `VITE_MAP_ID`: Google Maps API key and map ID. - - - **Backend directory (windwatts-api)**: - - Create or update `windwatts_data_config.json` under `config/`: - ```json - { - "region_name": "us-west-2", - "output_location": "S3_BUCKET_URI_FOR_ATHENA_RESULTS", - "output_bucket": "NAME_OF_S3_BUCKET_FOR_ATHENA_RESULTS", - "database": "NAME_OF_THE_GLUE_DATABASE", - "athena_workgroup": "NAME_OF_THE_ATHENA_WORKGROUP", - "sources": { - "wtk": { - "bucket_name": "NAME_OF_THE_WTK_S3_BUCKET", - "athena_table_name": "NAME_OF_THE_ATHENA_TABLE_FOR_WTK", - "alt_athena_table_name": "" - }, - "era5": { - "bucket_name": "NAME_OF_THE_ERA5_S3_BUCKET", - "athena_table_name": "NAME_OF_THE_ATHENA_TABLE_FOR_ERA5", - "alt_athena_table_name": "" - } - } - } - ``` - - Parameter Description - - **region_name** : AWS region where your S3 buckets and Athena services are hosted. - - **output_location** : S3 bucket URI where Athena will store query results (e.g., s3://bucket-name/). - - **output_bucket** : Name of the bucket used above. - - **database** : AWS Glue database name in which athena tables are created. - - **athena_workgroup** : Name of the Athena workgroup to use for querying. - - **sources**.*wtk* : Configuration specific to WTK-Led Climate dataset. - - **sources**.*era5* : Configuration specific to ERA5 dataset. - - **athena_table_name** : Primary Athena table name for the dataset. This table is used for location specific queries. - - **alt_athena_table_name** : Optional alternate Athena table name for non-location specific queries. - - -3. **Deploy the app**: - - Start Docker containers: - ```shell - docker compose up --build - ``` - - If needed, clean up previous containers and volumes: - ```shell - docker compose down --volumes --remove-orphans - ``` - -4. **Access the app**: - - Open your browser and navigate to: `http://localhost:5173/`. - -### Deploy as a Container (requires Docker on the host) - -Build: -```shell -docker build -t tap-api:latest . -``` - -Run: -```shell -docker run -p 8080:80 -it tap-api:latest python proto.py --production -``` - -For troubleshooting inside the container, run (and you will have the prompt change and try the following commands inside the container): -```shell -docker run -p 8080:80 -it tap-api:latest /bin/bash -``` - -Inside the container, flask app will run on port `80`. On the host, you can use any available port, e.g, `8080`, like shown above. - -For a simple test (showing info about the endpoint), navigate to the following URL in your browser (running on the host): -``` -http://localhost:8080 -``` - -For a more comprehensive test (with HSDS connection and spatial + vertical interpolations), navigate to this URL: -``` -http://localhost:8080/v1/timeseries/windspeed?height=67.00m&lat=40.7888&lon=-74.0059&start_date=20110302&stop_date=20110303&vertical_interpolation=nearest&spatial_interpolation=idw -``` -This should produce a json output with `timestamp` and `windspeed` values. - -For other examples of working queries, refer to the file: `dw-tap-api.postman_collection.json` (look for `raw` attributes). This file can be used by the Postman app (e.g., installed locally, on a laptop), which will cycle through all documented queries and show their status. - -### Build & Run Natively (without a container) - -```shell -conda env create -conda activate dw-tap-api -python api.py --development -``` - -### Running Modes - -Notice the `--development` flag in the command above -- it makes the endpoint run on port `8080`; for short, you can run: `python api.py -d`. - -*Development* is the default mode (run if no flag is specified). In contrast, you can run the endpoint in the *Production* mode using: `python api.py --production` or `python api.py -p` -- this is what is used in the container deployment described above (to see the details, check the end of `Dockerfile`). - -To see how these production and development modes are configured, refer to `config.json` and see what `host` and `port` values are specified. +WindWatts is a distributed wind resource assessment developed by NLR (formerly NREL). It provides processed wind speed data, energy production estimates, and visualizations for locations across the United States. ## Documentation -Interactive HTML page with API documentation is produced using apiDoc. It can be recreated using (requires installing apiDoc locally): -``` -apidoc -i . -o docs/ -t apidoc-template -``` -The output can be seen by opening `docs/index.html` in a browser. The flask app is configured to serve this documentation page (and related files) at the "/api" route. - -For installing apiDoc on osx, run: -``` -brew install apidoc -``` +Full documentation for developers, data scientists, and DevOps engineers is available in the **[Documentation Hub](docs/README.md)**. -### More about the Project +### Quick Links -To read a concise summary of the DW-TAP project, please refer to: https://www.energy.gov/sites/prod/files/2020/02/f72/tap-fact-sheet_0.pdf +- [**Quickstart Guide**](docs/02-quickstart.md): Get the app running in < 15 minutes. +- [**Backend API**](docs/03-backend.md): Python/FastAPI development. +- [**Frontend UI**](docs/04-frontend.md): React/Vite development. +- [**Deployment**](docs/05-deployment.md): Production setup. -## Credit +## Contributing -Code in this repository was developed by Dmitry Duplyakin (dmitry.duplyakin@nrel.gov), Caleb Phillips (caleb.phillips@nrel.gov), and Sagi Zisman (sagi.zisman@nrel.gov) to demonstrate the techniques used in distributed wind resource assessment at the National Laboratory of the Rockies in Golden, Colorado, USA. +Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to submit Pull Requests, report issues, and follow coding standards. ## License -Refer to the file called: `LICENSE`. +This project is licensed under the terms found in the `LICENSE` file. diff --git a/check-api-format.sh b/check-api-format.sh new file mode 100755 index 00000000..72045d01 --- /dev/null +++ b/check-api-format.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Navigate to API directory +cd windwatts-api || exit 1 + +# Try to activate venv +if [ -f ".venv/bin/activate" ]; then + source .venv/bin/activate +fi + +# Capture arguments (files) +FILES="$@" +if [ -z "$FILES" ]; then + FILES="." +fi + +echo "Running Python Lint checks (Ruff)..." +if ! ruff check $FILES; then + echo "" >&2 + echo "--------------------------------------------------------" >&2 + echo "❌ Python linting failed!" >&2 + echo " Please run the following to fix issues:" >&2 + echo " make lint (or 'make -C windwatts-api lint')" >&2 + echo "--------------------------------------------------------" >&2 + exit 1 +fi + +echo "Running Python Format checks (Ruff)..." +if ! ruff format --check $FILES; then + echo "" >&2 + echo "--------------------------------------------------------" >&2 + echo "❌ Python formatting failed!" >&2 + echo " Please run the following to fix issues:" >&2 + echo " make format (or 'make -C windwatts-api format')" >&2 + echo "--------------------------------------------------------" >&2 + exit 1 +fi + +echo "✅ Python Lint and Format checks passed." +exit 0 diff --git a/check-format.sh b/check-format.sh index f7ec665c..1224dea6 100755 --- a/check-format.sh +++ b/check-format.sh @@ -1,10 +1,43 @@ #!/bin/bash -set -e -cd windwatts-ui +# Navigate to UI directory +cd windwatts-ui || exit 1 + +# If files are passed (from lint-staged), check only those. +# Otherwise default to checking everything (e.g. manual run). +FILES="$@" +if [ -z "$FILES" ]; then + FILES="." + CMD_LINT="yarn lint" + CMD_FMT="yarn check-format" +else + # Use 'yarn run' to invoke binaries locally + # Note: FILES usually contains absolute paths from lint-staged + CMD_LINT="yarn run eslint $FILES" + CMD_FMT="yarn run prettier --check $FILES" +fi echo "Running ESLint..." -yarn lint --fix +if ! $CMD_LINT; then + echo "" >&2 + echo "--------------------------------------------------------" >&2 + echo "❌ ESLint check failed!" >&2 + echo " Please run linting locally before committing:" >&2 + echo " cd windwatts-ui && yarn lint" >&2 + echo "--------------------------------------------------------" >&2 + exit 1 +fi echo "Running Prettier check..." -yarn format -w +if ! $CMD_FMT; then + echo "" >&2 + echo "--------------------------------------------------------" >&2 + echo "❌ Prettier check failed!" >&2 + echo " Please run formatting locally before committing:" >&2 + echo " cd windwatts-ui && yarn format" >&2 + echo "--------------------------------------------------------" >&2 + exit 1 +fi + +echo "✅ Lint and Format checks passed." +exit 0 diff --git a/docs/01-overview.md b/docs/01-overview.md new file mode 100644 index 00000000..72f19adf --- /dev/null +++ b/docs/01-overview.md @@ -0,0 +1,52 @@ +# System Overview + +## Architecture + +```mermaid +c4context + title System Context Diagram for WindWatts + + person(user, "User", "A user wanting to access wind data") + system_boundary(windwatts, "WindWatts System") { + container(ui, "Frontend UI", "React, Vite", "Provides the web interface for users") + container(api, "Backend API", "FastAPI, Python", "Handles API requests and business logic") + containerDb(db, "Database", "PostgreSQL", "Stores application data") + container(windwatts_data, "Core Library", "Python", "Scientific data processing logic") + } + system_ext(aws, "AWS Services", "S3, Athena, Glue", "Stores and queries large-scale wind data") + + rel(user, ui, "Uses", "HTTPS") + rel(ui, api, "Makes API calls to", "JSON/HTTPS") + rel(api, db, "Reads/Writes", "SQL/TCP") + rel(api, windwatts_data, "Imports", "Python Module") + rel(api, aws, "Queries", "Boto3/HTTPS") +``` + +WindWatts is a monorepo consisting of: + +- **Frontend**: React application (`windwatts-ui`) +- **Backend**: Python FastAPI application (`windwatts-api`) +- **Core Library**: Data processing logic (`windwatts_data`) + +## Tech Stack + +### Frontend + +- **Language**: TypeScript / JavaScript +- **Framework**: React 19 +- **Build Tool**: Vite +- **UI Library**: Material UI v7 +- **Runtime**: Node.js >= 22.14.0 + +### Backend + +- **Language**: Python 3.13 +- **Framework**: FastAPI [standard] +- **Server**: Gunicorn / Uvicorn +- **ORM**: SQLAlchemy +- **Database**: PostgreSQL + +### Infrastructure + +- **Containerization**: Docker +- **Cloud Provider**: AWS (S3, Athena, Glue) diff --git a/docs/02-quickstart.md b/docs/02-quickstart.md new file mode 100644 index 00000000..e3aaa250 --- /dev/null +++ b/docs/02-quickstart.md @@ -0,0 +1,95 @@ +# Quickstart Guide + +This guide will help you get the WindWatts application up and running on your local machine. + +## Prerequisites + +1. **Docker**: Ensure Docker is installed and running. +2. **AWS Credentials**: You need access to the WindWatts Data package. +3. **Google Maps API Key**: Required for the frontend map. + +## Option 1: Docker (Recommended) + +This sets up both the API and the UI with a local database. + +### 1. Clone the Repository + +```shell +git clone https://github.com/NREL/windwatts.git +cd windwatts/ +git checkout develop # Ensure you are on the 'develop' branch (the main development branch) +``` + +### 2. Configure Environment + +**Root directory (`.env`)**: +Create a `.env` file in the root: + +```plaintext +WINDWATTS_DATA_URL=https://windwatts-era5.s3.us-west-2.amazonaws.com/ +AWS_ACCESS_KEY_ID="YOUR_AWS_ACCESS_KEY_ID" +AWS_SECRET_ACCESS_KEY="YOUR_AWS_SECRET_ACCESS_KEY" +AWS_SESSION_TOKEN="YOUR_AWS_SESSION_TOKEN" +``` + +**Frontend (`windwatts-ui/.env.development`)**: + +```plaintext +VITE_API_BASE_URL=http://windwatts-proxy:80 +VITE_MAP_API_KEY=YOUR_MAP_API_KEY +VITE_MAP_ID=YOUR_MAP_ID +``` + +**Backend (`windwatts-api/app/config/windwatts_data_config.json`)**: + +```json +{ + "region_name": "us-west-2", + "output_location": "S3_BUCKET_URI_FOR_ATHENA_RESULTS", + "output_bucket": "NAME_OF_S3_BUCKET_FOR_ATHENA_RESULTS", + "database": "NAME_OF_THE_GLUE_DATABASE", + "athena_workgroup": "NAME_OF_THE_ATHENA_WORKGROUP", + "sources": { + "wtk": { + "bucket_name": "NAME_OF_THE_WTK_S3_BUCKET", + "athena_table_name": "NAME_OF_THE_ATHENA_TABLE_FOR_WTK", + "alt_athena_table_name": "" + }, + "era5": { + "bucket_name": "NAME_OF_THE_ERA5_S3_BUCKET", + "athena_table_name": "NAME_OF_THE_ATHENA_TABLE_FOR_ERA5", + "alt_athena_table_name": "" + } + } +} +``` + +### 3. Run the App + +```shell +docker compose up --build +``` + +To clean up: + +```shell +docker compose down --volumes --remove-orphans +``` + +### 4. Access + +- **UI**: http://localhost:5173/ +- **API**: http://localhost:8080 + +## Option 2: Native Setup (Without Docker) + +If you prefer running services directly: + +```shell +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +uvicorn app.main:app --reload +``` + +See [Backend Documentation](03-backend.md) and [Frontend Documentation](04-frontend.md) for more details on native development. diff --git a/docs/03-backend.md b/docs/03-backend.md new file mode 100644 index 00000000..41adc740 --- /dev/null +++ b/docs/03-backend.md @@ -0,0 +1,64 @@ +# Backend Development (API) + +The backend is a FastAPI application connected to a PostgreSQL database and AWS Athena/S3 for data retrieval. + +## Setup + +The project uses a standard Python structure. + +### Virtual Environment + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +### WindWatts Data Dependency + +The `windwatts_data` package is a large dependency stored in S3. It is not in PyPI. +If you are running locally without Docker, you must download and install it manually: + +```bash +# Example for installing the wheel manually (check Dockerfile for exact URL/version) +curl -O https://windwatts-era5.s3.us-west-2.amazonaws.com/windwatts_data-1.0.4-py3-none-any.whl +pip install windwatts_data-1.0.4-py3-none-any.whl +``` + +### Makefile Targets + +- `make setup`: Initializes the project. +- `make run`: Runs the webservice locally. + +## Development + +### Running Locally + +```bash +uvicorn app.main:app --reload +``` + +### Database + +**Local Development**: +Add to `.env` in `windwatts-api/`: + +```plaintext +DATABASE_URL=postgresql://windwatts:windwatts@postgres:5432/windwatts_db +POSTGRES_USER=windwatts +POSTGRES_PASSWORD=windwatts +POSTGRES_DB=windwatts_db +``` + +Use Docker Compose (from root) to start the database. + +## Testing + +To run tests: +```bash +pytest +``` + +## API Documentation + +When the app is running, visit `/docs` (e.g., `http://localhost:8080/docs`) to see the auto-generated Swagger UI. diff --git a/docs/04-frontend.md b/docs/04-frontend.md new file mode 100644 index 00000000..fb929053 --- /dev/null +++ b/docs/04-frontend.md @@ -0,0 +1,41 @@ +# Frontend Development (UI) + +The frontend is a React application built with Vite and Material-UI. + +## Prerequisites + +- Node.js >= 22.14.0 +- Yarn + +## Setup + +```bash +cd windwatts-ui +yarn install +``` + +## Configuration + +Create `.env.local` in `windwatts-ui/` (this overrides `.env.development`): + +```shell +VITE_API_BASE_URL=http://windwatts-proxy:80 +VITE_MAP_API_KEY=YOUR_GOOGLE_MAPS_API_KEY +VITE_MAP_ID=YOUR_MAP_ID +``` + +For production, create `.env.production`. + +## Development + +Start the dev server: + +```bash +yarn dev +``` + +## Testing & Linting + +- **Lint**: `yarn lint` +- **Test**: `yarn test` +- **Format**: `yarn format` diff --git a/docs/05-deployment.md b/docs/05-deployment.md new file mode 100644 index 00000000..d2db0407 --- /dev/null +++ b/docs/05-deployment.md @@ -0,0 +1,103 @@ +# Deployment Guide + +## Local Development Deployment + +Uses Docker Compose to run all services (API, UI, database, reverse proxy). + +### Prerequisites + +1. **Docker** and **Docker Compose** installed +2. **AWS credentials** with access to WindWatts data +3. **Google Maps API key** and Map ID + +### Configuration + +**1. Root Environment Variables (`.env`):** + +Create a `.env` file in the project root: + +```plaintext +WINDWATTS_DATA_URL= +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_SESSION_TOKEN= +WINDWATTS_DATA_CONFIG_SECRETS_ARN= +``` + +**2. UI Development Environment (`windwatts-ui/.env.development`):** + +```plaintext +VITE_API_BASE_URL=http://windwatts-proxy:80 +VITE_MAP_API_KEY= +VITE_MAP_ID= +``` + +**3. Data Configuration (`windwatts-api/app/config/windwatts_data_config.json`):** + +Configure AWS Athena settings and data sources: + +```json +{ + "region_name": "us-west-2", + "output_location": "s3://your-athena-results-bucket/", + "output_bucket": "your-athena-results-bucket", + "database": "your-glue-database", + "athena_workgroup": "your-athena-workgroup", + "sources": { + "wtk": { + "bucket_name": "your-wtk-bucket", + "athena_table_name": "wtk_table", + "alt_athena_table_name": "", + "capabilities": { "avg_types": ["all", "annual", "monthly", "hourly"] } + }, + "era5": { + "bucket_name": "your-era5-bucket", + "athena_table_name": "era5_table", + "alt_athena_table_name": "", + "capabilities": { "avg_types": ["all", "annual"] } + } + } +} +``` + +### Deploy + +Start the application stack: + +```shell +docker compose up --build +``` + +- **UI**: Access at `http://localhost:5173` +- **API**: Access at `http://localhost:8080` (via proxy) + +Clean up: + +```shell +docker compose down --volumes --remove-orphans +``` + +## Production Deployment + +### Build Individual Services + +**API:** + +```shell +cd windwatts-api && docker build -t windwatts-api:latest . +``` + +**UI:** + +```shell +cd windwatts-ui && docker build -t windwatts-ui:latest . +``` + +### AWS Configuration + +- **RDS PostgreSQL**: Configure `DATABASE_URL` for production database +- **Secrets Manager**: Store `WINDWATTS_DATA_CONFIG_SECRET_ARN` with data source configuration +- **Environment Variables**: Set production AWS credentials and endpoints +- **Load Balancer**: Route traffic to containerized services + +API runs on port 8000 using Gunicorn with Uvicorn workers. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..f64221d0 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,15 @@ +# Documentation + +Welcome to the WindWatts documentation. + +## Table of Contents + +1. [Overview](01-overview.md) - Architecture and Tech Stack. +2. [Quickstart](02-quickstart.md) - Get the app running in < 15 minutes. +3. [Backend Guide](03-backend.md) - API development. +4. [Frontend Guide](04-frontend.md) - UI development. +5. [Deployment](05-deployment.md) - Production deployment. + +## Contributing + +Please see [CONTRIBUTING.md](../CONTRIBUTING.md) for guidelines on how to contribute to this project. diff --git a/package.json b/package.json index 4d5c9d3f..6c6d7993 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,9 @@ "lint-staged": { "windwatts-ui/src/**/*.{js,jsx,ts,tsx}": [ "./check-format.sh" + ], + "windwatts-api/**/*.py": [ + "./check-api-format.sh" ] }, "devDependencies": { diff --git a/windwatts-api/Makefile b/windwatts-api/Makefile index 55c52a70..86e31134 100644 --- a/windwatts-api/Makefile +++ b/windwatts-api/Makefile @@ -2,6 +2,7 @@ setup: python3 -m venv .venv . .venv/bin/activate; pip install -r requirements.txt + . .venv/bin/activate; pip install -r requirements-dev.txt run: . .venv/bin/activate; gunicorn -w 4 -k uvicorn.workers.UvicornWorker app.main:app @@ -9,3 +10,15 @@ run: .PHONY: openapi openapi: . .venv/bin/activate; PYTHONPATH=. SKIP_DATA_INIT=1 python scripts/generate_openapi.py + +.PHONY: lint +lint: + . .venv/bin/activate; ruff check . + +.PHONY: format +format: + . .venv/bin/activate; ruff check --fix .; ruff format . + +.PHONY: test +test: + . .venv/bin/activate; pytest diff --git a/windwatts-api/README.md b/windwatts-api/README.md index 377f971a..4c1498a1 100644 --- a/windwatts-api/README.md +++ b/windwatts-api/README.md @@ -1,4 +1,8 @@ -# Welcome to the WindWatts API project +# WindWatts API (Backend) + +This directory contains the Python backend for WindWatts, built with FastAPI. + +For development instructions, setup guides, and architecture details, please see the **[Backend Documentation](../docs/03-backend.md)**. This project is set up like a standard Python project. The initialization process also creates a virtualenv within this project, stored under the `.venv` diff --git a/windwatts-api/alembic/env.py b/windwatts-api/alembic/env.py index 78700a2a..f74794f7 100644 --- a/windwatts-api/alembic/env.py +++ b/windwatts-api/alembic/env.py @@ -1,6 +1,5 @@ import sys import os -import app.database.models sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -74,9 +73,7 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/windwatts-api/alembic/versions/adffce76c6b4_initial_migration.py b/windwatts-api/alembic/versions/adffce76c6b4_initial_migration.py index e83deac0..7bf8959c 100644 --- a/windwatts-api/alembic/versions/adffce76c6b4_initial_migration.py +++ b/windwatts-api/alembic/versions/adffce76c6b4_initial_migration.py @@ -1,10 +1,11 @@ """initial migration Revision ID: adffce76c6b4 -Revises: +Revises: Create Date: 2025-05-05 17:18:34.268074 """ + from typing import Sequence, Union from alembic import op @@ -12,7 +13,7 @@ # revision identifiers, used by Alembic. -revision: str = 'adffce76c6b4' +revision: str = "adffce76c6b4" down_revision: Union[str, None] = None branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -21,74 +22,118 @@ def upgrade() -> None: """Upgrade schema.""" # ### commands auto generated by Alembic - please adjust! ### - op.create_table('audit_logs', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('timestamp', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), - sa.Column('user_id', sa.String(), nullable=True), - sa.Column('action', sa.String(), nullable=True), - sa.Column('resource', sa.String(), nullable=True), - sa.Column('method', sa.String(), nullable=True), - sa.Column('status_code', sa.Integer(), nullable=True), - sa.Column('ip_address', sa.String(), nullable=True), - sa.Column('user_agent', sa.Text(), nullable=True), - sa.Column('request_data', sa.JSON(), nullable=True), - sa.Column('response_data', sa.JSON(), nullable=True), - sa.Column('duration_ms', sa.Integer(), nullable=True), - sa.Column('error_message', sa.Text(), nullable=True), - sa.Column('log_metadata', sa.JSON(), nullable=True), - sa.Column('api_version', sa.String(), nullable=True), - sa.Column('endpoint_category', sa.String(), nullable=True), - sa.Column('request_size_bytes', sa.Integer(), nullable=True), - sa.Column('response_size_bytes', sa.Integer(), nullable=True), - sa.Column('is_error', sa.Boolean(), nullable=True), - sa.Column('error_type', sa.String(), nullable=True), - sa.Column('client_type', sa.String(), nullable=True), - sa.Column('country', sa.String(), nullable=True), - sa.Column('referrer', sa.String(), nullable=True), - sa.Column('request_id', sa.String(), nullable=True), - sa.Column('parent_request_id', sa.String(), nullable=True), - sa.Column('tags', sa.JSON(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_audit_logs_action'), 'audit_logs', ['action'], unique=False) - op.create_index(op.f('ix_audit_logs_api_version'), 'audit_logs', ['api_version'], unique=False) - op.create_index(op.f('ix_audit_logs_client_type'), 'audit_logs', ['client_type'], unique=False) - op.create_index(op.f('ix_audit_logs_country'), 'audit_logs', ['country'], unique=False) - op.create_index(op.f('ix_audit_logs_duration_ms'), 'audit_logs', ['duration_ms'], unique=False) - op.create_index(op.f('ix_audit_logs_endpoint_category'), 'audit_logs', ['endpoint_category'], unique=False) - op.create_index(op.f('ix_audit_logs_error_type'), 'audit_logs', ['error_type'], unique=False) - op.create_index(op.f('ix_audit_logs_id'), 'audit_logs', ['id'], unique=False) - op.create_index(op.f('ix_audit_logs_ip_address'), 'audit_logs', ['ip_address'], unique=False) - op.create_index(op.f('ix_audit_logs_is_error'), 'audit_logs', ['is_error'], unique=False) - op.create_index(op.f('ix_audit_logs_method'), 'audit_logs', ['method'], unique=False) - op.create_index(op.f('ix_audit_logs_parent_request_id'), 'audit_logs', ['parent_request_id'], unique=False) - op.create_index(op.f('ix_audit_logs_request_id'), 'audit_logs', ['request_id'], unique=False) - op.create_index(op.f('ix_audit_logs_resource'), 'audit_logs', ['resource'], unique=False) - op.create_index(op.f('ix_audit_logs_status_code'), 'audit_logs', ['status_code'], unique=False) - op.create_index(op.f('ix_audit_logs_timestamp'), 'audit_logs', ['timestamp'], unique=False) - op.create_index(op.f('ix_audit_logs_user_id'), 'audit_logs', ['user_id'], unique=False) + op.create_table( + "audit_logs", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "timestamp", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=True, + ), + sa.Column("user_id", sa.String(), nullable=True), + sa.Column("action", sa.String(), nullable=True), + sa.Column("resource", sa.String(), nullable=True), + sa.Column("method", sa.String(), nullable=True), + sa.Column("status_code", sa.Integer(), nullable=True), + sa.Column("ip_address", sa.String(), nullable=True), + sa.Column("user_agent", sa.Text(), nullable=True), + sa.Column("request_data", sa.JSON(), nullable=True), + sa.Column("response_data", sa.JSON(), nullable=True), + sa.Column("duration_ms", sa.Integer(), nullable=True), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("log_metadata", sa.JSON(), nullable=True), + sa.Column("api_version", sa.String(), nullable=True), + sa.Column("endpoint_category", sa.String(), nullable=True), + sa.Column("request_size_bytes", sa.Integer(), nullable=True), + sa.Column("response_size_bytes", sa.Integer(), nullable=True), + sa.Column("is_error", sa.Boolean(), nullable=True), + sa.Column("error_type", sa.String(), nullable=True), + sa.Column("client_type", sa.String(), nullable=True), + sa.Column("country", sa.String(), nullable=True), + sa.Column("referrer", sa.String(), nullable=True), + sa.Column("request_id", sa.String(), nullable=True), + sa.Column("parent_request_id", sa.String(), nullable=True), + sa.Column("tags", sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_audit_logs_action"), "audit_logs", ["action"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_api_version"), "audit_logs", ["api_version"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_client_type"), "audit_logs", ["client_type"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_country"), "audit_logs", ["country"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_duration_ms"), "audit_logs", ["duration_ms"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_endpoint_category"), + "audit_logs", + ["endpoint_category"], + unique=False, + ) + op.create_index( + op.f("ix_audit_logs_error_type"), "audit_logs", ["error_type"], unique=False + ) + op.create_index(op.f("ix_audit_logs_id"), "audit_logs", ["id"], unique=False) + op.create_index( + op.f("ix_audit_logs_ip_address"), "audit_logs", ["ip_address"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_is_error"), "audit_logs", ["is_error"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_method"), "audit_logs", ["method"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_parent_request_id"), + "audit_logs", + ["parent_request_id"], + unique=False, + ) + op.create_index( + op.f("ix_audit_logs_request_id"), "audit_logs", ["request_id"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_resource"), "audit_logs", ["resource"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_status_code"), "audit_logs", ["status_code"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_timestamp"), "audit_logs", ["timestamp"], unique=False + ) + op.create_index( + op.f("ix_audit_logs_user_id"), "audit_logs", ["user_id"], unique=False + ) # ### end Alembic commands ### def downgrade() -> None: """Downgrade schema.""" # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_audit_logs_user_id'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_timestamp'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_status_code'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_resource'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_request_id'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_parent_request_id'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_method'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_is_error'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_ip_address'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_id'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_error_type'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_endpoint_category'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_duration_ms'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_country'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_client_type'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_api_version'), table_name='audit_logs') - op.drop_index(op.f('ix_audit_logs_action'), table_name='audit_logs') - op.drop_table('audit_logs') + op.drop_index(op.f("ix_audit_logs_user_id"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_timestamp"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_status_code"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_resource"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_request_id"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_parent_request_id"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_method"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_is_error"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_ip_address"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_id"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_error_type"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_endpoint_category"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_duration_ms"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_country"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_client_type"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_api_version"), table_name="audit_logs") + op.drop_index(op.f("ix_audit_logs_action"), table_name="audit_logs") + op.drop_table("audit_logs") # ### end Alembic commands ### diff --git a/windwatts-api/app/config/model_config.py b/windwatts-api/app/config/model_config.py index 31bfbe73..6996bab6 100644 --- a/windwatts-api/app/config/model_config.py +++ b/windwatts-api/app/config/model_config.py @@ -12,59 +12,64 @@ "default_source": "athena", "period_type": { "windspeed": ["all", "annual"], - "production": ["all", "summary", "annual", "full"] - }, - "years": { - "full": list(range(2013, 2024)), - "sample": [2020, 2021, 2022, 2023] + "production": ["all", "summary", "annual", "full"], }, + "years": {"full": list(range(2013, 2024)), "sample": [2020, 2021, 2022, 2023]}, "heights": [30, 40, 50, 60, 80, 100], - "grid_info": { "min_lat":23.402, "min_long":-137.725, - "max_lat":51.403, "max_long":-44.224, - "spatial_resolution" : "31 km", "temporal_resolution": "1 hour"}, - "links" : ["https://www.ecmwf.int/en/forecasts/dataset/ecmwf-reanalysis-v5"], + "grid_info": { + "min_lat": 23.402, + "min_long": -137.725, + "max_lat": 51.403, + "max_long": -44.224, + "spatial_resolution": "31 km", + "temporal_resolution": "1 hour", + }, + "links": ["https://www.ecmwf.int/en/forecasts/dataset/ecmwf-reanalysis-v5"], "references": [ - 'Phillips, C., L. M. Sheridan, P. Conry, D. K. Fytanidis, D. Duplyakin, S. Zisman, N. Duboc, M. Nelson, R. Kotamarthi, R. Linn, M. Broersma, T. Spijkerboer, and H. Tinnesand. 2022. "Evaluation of Obstacle Modelling Approaches for Resource Assessment and Small Wind Turbine Siting: Case Study in the Northern Netherlands." Wind Energy Science 7: 1153-1169. https://doi.org/10.5194/wes-7-1153-2022' - ] + 'Phillips, C., L. M. Sheridan, P. Conry, D. K. Fytanidis, D. Duplyakin, S. Zisman, N. Duboc, M. Nelson, R. Kotamarthi, R. Linn, M. Broersma, T. Spijkerboer, and H. Tinnesand. 2022. "Evaluation of Obstacle Modelling Approaches for Resource Assessment and Small Wind Turbine Siting: Case Study in the Northern Netherlands." Wind Energy Science 7: 1153-1169. https://doi.org/10.5194/wes-7-1153-2022' + ], }, "wtk": { "sources": ["athena", "s3"], "default_source": "athena", "period_type": { "windspeed": ["all", "annual", "monthly", "hourly"], - "production": ["all", "summary", "annual", "monthly", "full"] - }, - "years": { - "full": list(range(2000, 2021)), - "sample": [2018, 2019, 2020] + "production": ["all", "summary", "annual", "monthly", "full"], }, + "years": {"full": list(range(2000, 2021)), "sample": [2018, 2019, 2020]}, "heights": [40, 60, 80, 100, 120, 140, 160, 200], - "grid_info": { "min_lat":7.75129, "min_long":-179.99918, - "max_lat":78.392685, "max_long":180.0, - "spatial_resolution" : "2 km", "temporal_resolution": "1 hour"}, - "links" : ["https://www.nrel.gov/grid/wind-toolkit"], + "grid_info": { + "min_lat": 7.75129, + "min_long": -179.99918, + "max_lat": 78.392685, + "max_long": 180.0, + "spatial_resolution": "2 km", + "temporal_resolution": "1 hour", + }, + "links": ["https://www.nrel.gov/grid/wind-toolkit"], "references": [ - 'Draxl, C., B.M. Hodge, A. Clifton, and J. McCaa. 2015. Overview and Meteorological Validation of the Wind Integration National Dataset Toolkit (Technical Report, NREL/TP-5000-61740). Golden, CO: National Laboratory of the Rockies', + "Draxl, C., B.M. Hodge, A. Clifton, and J. McCaa. 2015. Overview and Meteorological Validation of the Wind Integration National Dataset Toolkit (Technical Report, NREL/TP-5000-61740). Golden, CO: National Laboratory of the Rockies", 'Draxl, C., B.M. Hodge, A. Clifton, and J. McCaa. 2015. "The Wind Integration National Dataset (WIND) Toolkit." Applied Energy 151: 355366', - 'King, J., A. Clifton, and B.M. Hodge. 2014. Validation of Power Output for the WIND Toolkit (Technical Report, NREL/TP-5D00-61714). Golden, CO: National Laboratory of the Rockies' - ] + "King, J., A. Clifton, and B.M. Hodge. 2014. Validation of Power Output for the WIND Toolkit (Technical Report, NREL/TP-5D00-61714). Golden, CO: National Laboratory of the Rockies", + ], }, "ensemble": { "sources": ["athena"], "default_source": "athena", - "period_type": { - "windspeed": ["all"], - "production": ["all"] - }, - "years": { - "full": list(range(2013, 2024)), - "sample": [] - }, + "period_type": {"windspeed": ["all"], "production": ["all"]}, + "years": {"full": list(range(2013, 2024)), "sample": []}, "heights": [30, 40, 50, 60, 80, 100], - "grid_info": { "min_lat":23.402, "min_long":-137.725, - "max_lat":51.403, "max_long":-44.224, - "spatial_resolution" : "31 km", "temporal_resolution": "1 hour"}, - "links" : [], - "references" : ["Kevin Menear, Sameer Shaik, Lindsay Sheridan, Dmitry Duplyakin, and Caleb Phillips. Methods for High-Accuracy Wind Resource Assessment to Support Distributed Wind Turbine Siting. Under Review."] - } + "grid_info": { + "min_lat": 23.402, + "min_long": -137.725, + "max_lat": 51.403, + "max_long": -44.224, + "spatial_resolution": "31 km", + "temporal_resolution": "1 hour", + }, + "links": [], + "references": [ + "Kevin Menear, Sameer Shaik, Lindsay Sheridan, Dmitry Duplyakin, and Caleb Phillips. Methods for High-Accuracy Wind Resource Assessment to Support Distributed Wind Turbine Siting. Under Review." + ], + }, } diff --git a/windwatts-api/app/config_manager.py b/windwatts-api/app/config_manager.py index 9440a159..613c0159 100644 --- a/windwatts-api/app/config_manager.py +++ b/windwatts-api/app/config_manager.py @@ -3,8 +3,14 @@ import boto3 import tempfile + class ConfigManager: - def __init__(self, secret_arn_env_var: str, local_config_path: str = None, region_name="us-west-2"): + def __init__( + self, + secret_arn_env_var: str, + local_config_path: str = None, + region_name="us-west-2", + ): """ Initialize the ConfigManager with the ARN of the secret. @@ -13,7 +19,7 @@ def __init__(self, secret_arn_env_var: str, local_config_path: str = None, regio """ self.secret_arn = os.getenv(secret_arn_env_var) self.local_config_path = local_config_path - self.client = boto3.client('secretsmanager', region_name=region_name) + self.client = boto3.client("secretsmanager", region_name=region_name) def get_config(self) -> str: """ @@ -24,13 +30,13 @@ def get_config(self) -> str: if self.secret_arn: try: response = self.client.get_secret_value(SecretId=self.secret_arn) - secret = response['SecretString'] + secret = response["SecretString"] config_data = json.loads(secret) - + # Save the secret to a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") temp_file.close() - with open(temp_file.name, 'w') as f: + with open(temp_file.name, "w") as f: json.dump(config_data, f) return temp_file.name except self.client.exceptions.ClientError as e: @@ -41,7 +47,7 @@ def get_config(self) -> str: if env_config: temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") temp_file.close() - with open(temp_file.name, 'w') as f: + with open(temp_file.name, "w") as f: json.dump(env_config, f) print("Config loaded from environment variables.") return temp_file.name @@ -51,7 +57,9 @@ def get_config(self) -> str: print("Local configuration file found.") return self.local_config_path else: - raise FileNotFoundError("Local configuration file not found and unable to retrieve secret from AWS Secrets Manager or environment variables.") + raise FileNotFoundError( + "Local configuration file not found and unable to retrieve secret from AWS Secrets Manager or environment variables." + ) def _get_config_from_env(self): """ @@ -59,47 +67,58 @@ def _get_config_from_env(self): Dynamically scan env vars with SOURCES__FIELD_NAME pattern for `sources` configuration. """ # Top-level keys - region_name = os.getenv('REGION_NAME') - output_location = os.getenv('OUTPUT_LOCATION') - output_bucket = os.getenv('OUTPUT_BUCKET') - database = os.getenv('DATABASE') - athena_workgroup = os.getenv('ATHENA_WORKGROUP') + region_name = os.getenv("REGION_NAME") + output_location = os.getenv("OUTPUT_LOCATION") + output_bucket = os.getenv("OUTPUT_BUCKET") + database = os.getenv("DATABASE") + athena_workgroup = os.getenv("ATHENA_WORKGROUP") # Scan for all SOURCES__FIELD_NAME env vars sources = {} - prefix = 'SOURCES_' - suffixes = ['_BUCKET_NAME', '_ATHENA_TABLE_NAME', '_ALT_ATHENA_TABLE_NAME'] + prefix = "SOURCES_" + suffixes = ["_BUCKET_NAME", "_ATHENA_TABLE_NAME", "_ALT_ATHENA_TABLE_NAME"] env = os.environ source_fields = {} for key, value in env.items(): if key.startswith(prefix): - rest = key[len(prefix):] + rest = key[len(prefix) :] for suffix in suffixes: if rest.endswith(suffix): - source = rest[:-len(suffix)].lower() + source = rest[: -len(suffix)].lower() field = suffix[1:].lower() # e.g. 'bucket_name' if source not in source_fields: source_fields[source] = {} source_fields[source][field] = value # Package the sources with required fields into `sources` for source, fields in source_fields.items(): - if 'bucket_name' in fields and 'athena_table_name' in fields: - if 'alt_athena_table_name' not in fields: - fields['alt_athena_table_name'] = '' + if "bucket_name" in fields and "athena_table_name" in fields: + if "alt_athena_table_name" not in fields: + fields["alt_athena_table_name"] = "" sources[source] = { - 'bucket_name': fields['bucket_name'], - 'athena_table_name': fields['athena_table_name'], - 'alt_athena_table_name': fields['alt_athena_table_name'] + "bucket_name": fields["bucket_name"], + "athena_table_name": fields["athena_table_name"], + "alt_athena_table_name": fields["alt_athena_table_name"], } # check if all keys have been set - if all([region_name, output_location, output_bucket, database, athena_workgroup]) and sources: + if ( + all( + [ + region_name, + output_location, + output_bucket, + database, + athena_workgroup, + ] + ) + and sources + ): return { - 'region_name': region_name, - 'output_location': output_location, - 'output_bucket': output_bucket, - 'database': database, - 'athena_workgroup': athena_workgroup, - 'sources': sources + "region_name": region_name, + "output_location": output_location, + "output_bucket": output_bucket, + "database": database, + "athena_workgroup": athena_workgroup, + "sources": sources, } else: - return None \ No newline at end of file + return None diff --git a/windwatts-api/app/controllers/era5_data_controller.py b/windwatts-api/app/controllers/era5_data_controller.py index cca1f660..b4350dca 100644 --- a/windwatts-api/app/controllers/era5_data_controller.py +++ b/windwatts-api/app/controllers/era5_data_controller.py @@ -4,15 +4,17 @@ import zipfile import tempfile import re -import time import os import io + # commented out the data functions until I can get local athena_config working from app.config_manager import ConfigManager from app.data_fetchers.s3_data_fetcher import S3DataFetcher from app.data_fetchers.athena_data_fetcher import AthenaDataFetcher + # from app.data_fetchers.database_data_fetcher import DatabaseDataFetcher from app.data_fetchers.data_fetcher_router import DataFetcherRouter + # from app.database_manager import DatabaseManager from app.utils.data_fetcher_utils import format_coordinate, chunker @@ -21,8 +23,7 @@ WindSpeedResponse, AvailablePowerCurvesResponse, EnergyProductionResponse, - GridLocation, - NearestLocationsResponse + NearestLocationsResponse, ) router = APIRouter() @@ -34,14 +35,24 @@ # Initialize ConfigManager config_manager = ConfigManager( secret_arn_env_var="WINDWATTS_DATA_CONFIG_SECRET_ARN", - local_config_path="./app/config/windwatts_data_config.json") # replace with YOUR local config path + local_config_path="./app/config/windwatts_data_config.json", + ) # replace with YOUR local config path athena_config = config_manager.get_config() # Initialize DataFetchers # s3_data_fetcher = S3DataFetcher("WINDWATTS_S3_BUCKET_NAME") - athena_data_fetcher_era5 = AthenaDataFetcher(athena_config=athena_config, source_key='era5') - athena_data_fetcher_ensemble = AthenaDataFetcher(athena_config=athena_config, source_key='ensemble') - s3_data_fetcher_era5 = S3DataFetcher(bucket_name="windwatts-era5", prefix="era5_timeseries", grid="era5", s3_key_template="era5") + athena_data_fetcher_era5 = AthenaDataFetcher( + athena_config=athena_config, source_key="era5" + ) + athena_data_fetcher_ensemble = AthenaDataFetcher( + athena_config=athena_config, source_key="ensemble" + ) + s3_data_fetcher_era5 = S3DataFetcher( + bucket_name="windwatts-era5", + prefix="era5_timeseries", + grid="era5", + s3_key_template="era5", + ) # db_manager = DatabaseManager() # db_data_fetcher = DatabaseDataFetcher(db_manager=db_manager) @@ -50,7 +61,9 @@ # data_fetcher_router.register_fetcher("database", db_data_fetcher) data_fetcher_router.register_fetcher("s3_era5", s3_data_fetcher_era5) data_fetcher_router.register_fetcher("athena_era5", athena_data_fetcher_era5) - data_fetcher_router.register_fetcher("athena_ensemble", athena_data_fetcher_ensemble) + data_fetcher_router.register_fetcher( + "athena_ensemble", athena_data_fetcher_ensemble + ) # Centralized valid avg types dictionary VALID_AVG_TYPES = { @@ -64,83 +77,100 @@ }, } # YEARS list for the sample data download feature -SAMPLE_YEARS = { - "s3_era5" : [2020, 2021, 2022, 2023] -} +SAMPLE_YEARS = {"s3_era5": [2020, 2021, 2022, 2023]} # YEARS for which we have era5 data in the S3 -ALL_YEARS = { - "s3_era5" : list(range(2013,2024)) -} +ALL_YEARS = {"s3_era5": list(range(2013, 2024))} # data_type='era5' # data_source = "athena_era5" VALID_SOURCES = {"athena_era5", "athena_ensemble", "s3_era5"} # <-- new DEFAULT_SOURCE = "athena_era5" + # Helper validation functions def validate_lat(lat: float) -> float: if not (-90 <= lat <= 90): - raise HTTPException(status_code=400, detail="Latitude must be between -90 and 90.") + raise HTTPException( + status_code=400, detail="Latitude must be between -90 and 90." + ) return lat + def validate_lng(lng: float) -> float: if not (-180 <= lng <= 180): - raise HTTPException(status_code=400, detail="Longitude must be between -180 and 180.") + raise HTTPException( + status_code=400, detail="Longitude must be between -180 and 180." + ) return lng + def validate_height(height: int) -> int: if not (0 < height <= 300): - raise HTTPException(status_code=400, detail="Height must be between 1 and 300 meters.") + raise HTTPException( + status_code=400, detail="Height must be between 1 and 300 meters." + ) return height + def validate_avg_type(avg_type: str, source: str) -> str: allowed = VALID_AVG_TYPES[source]["wind_speed"] if avg_type not in allowed: raise HTTPException( status_code=400, - detail=f"Invalid avg_type. Must be one of: {allowed} for {source}." + detail=f"Invalid avg_type. Must be one of: {allowed} for {source}.", ) return avg_type + def validate_production_avg_type(avg_type: str, source: str) -> str: allowed = VALID_AVG_TYPES[source]["production"] if avg_type not in allowed: raise HTTPException( status_code=400, - detail=f"Invalid time_period. Must be one of: {allowed} for {source}." + detail=f"Invalid time_period. Must be one of: {allowed} for {source}.", ) return avg_type + def validate_selected_powercurve(selected_powercurve: str) -> str: # Only allow alphanumeric, dash, underscore, dot - if not re.match(r'^[\w\-.]+$', selected_powercurve): + if not re.match(r"^[\w\-.]+$", selected_powercurve): raise HTTPException(status_code=400, detail="Invalid selected_powercurve name.") if selected_powercurve not in power_curve_manager.power_curves: raise HTTPException(status_code=400, detail="Selected power curve not found.") return selected_powercurve + def validate_source(source: str) -> str: if source not in VALID_SOURCES: - raise HTTPException(status_code=400, detail=f"Invalid source for ERA5 data. Must be one of: {sorted(VALID_SOURCES)}.") + raise HTTPException( + status_code=400, + detail=f"Invalid source for ERA5 data. Must be one of: {sorted(VALID_SOURCES)}.", + ) return source + def validate_year(year: int, source: str) -> int: if year not in ALL_YEARS[source]: - raise HTTPException(status_code=400, detail=f"Invalid year for ERA5 data. Currently supporting years 2013-2023") + raise HTTPException( + status_code=400, + detail="Invalid year for ERA5 data. Currently supporting years 2013-2023", + ) return year + def validate_n_neighbor(n_neighbor: int) -> int: - if not 1<=n_neighbor<=4: # have to change the limit if needed later on - raise HTTPException(status_code=400, detail=f"Invalid number of neighbors. Currently supporting upto 4 nearest neighbors") + if not 1 <= n_neighbor <= 4: # have to change the limit if needed later on + raise HTTPException( + status_code=400, + detail="Invalid number of neighbors. Currently supporting upto 4 nearest neighbors", + ) return n_neighbor + def _get_windspeed_core( - lat: float, - lng: float, - height: int, - avg_type: str, - source: str + lat: float, lng: float, height: int, avg_type: str, source: str ): """ Core function to retrieve wind speed data from the source database. @@ -158,17 +188,13 @@ def _get_windspeed_core( avg_type = validate_avg_type(avg_type, source) # Legacy conversion: avg_type -> period for new API - params = { - "lat": lat, - "lng": lng, - "height": height, - "period": avg_type - } + params = {"lat": lat, "lng": lng, "height": height, "period": avg_type} data = data_fetcher_router.fetch_data(params, key=source) if data is None: raise HTTPException(status_code=404, detail="Data not found") return data + @router.get( "/windspeed/{avg_type}", summary="Retrieve wind speed with avg type - era5 data", @@ -176,27 +202,32 @@ def _get_windspeed_core( responses={ 200: { "description": "Wind speed data retrieved successfully", - "model": WindSpeedResponse + "model": WindSpeedResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def get_windspeed_with_avg_type( avg_type: str = Path(..., description="Type of average to retrieve."), lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), - ensemble: bool = Query(False, description="If true, use ensemble model (athena_ensemble)."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + ensemble: bool = Query( + False, description="If true, use ensemble model (athena_ensemble)." + ), + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: if ensemble: - return _get_windspeed_core(lat, lng, height, avg_type, source="athena_ensemble") + return _get_windspeed_core( + lat, lng, height, avg_type, source="athena_ensemble" + ) else: return _get_windspeed_core(lat, lng, height, avg_type, source) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get( "/windspeed", summary="Retrieve wind speed with default global avg - era5 data", @@ -204,52 +235,58 @@ def get_windspeed_with_avg_type( responses={ 200: { "description": "Wind speed data retrieved successfully", - "model": WindSpeedResponse + "model": WindSpeedResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def get_windspeed( lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), - ensemble: bool = Query(False, description="If true, use ensemble model (athena_ensemble)."), + ensemble: bool = Query( + False, description="If true, use ensemble model (athena_ensemble)." + ), period: str = Query("all", description="Time period for wind speed calculation."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: if ensemble: - return _get_windspeed_core(lat, lng, height, period, source="athena_ensemble") + return _get_windspeed_core( + lat, lng, height, period, source="athena_ensemble" + ) else: return _get_windspeed_core(lat, lng, height, period, source) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get( - "/powercurves", - summary="Fetch all available power curves", - response_model=AvailablePowerCurvesResponse, - responses={ + "/powercurves", + summary="Fetch all available power curves", + response_model=AvailablePowerCurvesResponse, + responses={ 200: { "description": "Available power curves retrieved successfully", - "model": AvailablePowerCurvesResponse + "model": AvailablePowerCurvesResponse, }, 500: {"description": "Internal server error"}, - }) + }, +) def fetch_available_powercurves(): - ''' + """ returns available power curves - ''' + """ try: all_curves = list(power_curve_manager.power_curves.keys()) - prefix = 'nlr-reference-' + prefix = "nlr-reference-" def extract_kw(curve_name: str): # Extracts the kw value from curves, "2.5kW" -> 2.5 match = re.search(rf"{prefix}([0-9.]+)kW", curve_name) if match: return float(match.group(1)) - return float('inf') + return float("inf") curves = [c for c in all_curves if c.startswith(prefix)] other_curves = [c for c in all_curves if not c.startswith(prefix)] @@ -258,17 +295,13 @@ def extract_kw(curve_name: str): other_curves_sorted = sorted(other_curves) ordered_curves = curves_sorted + other_curves_sorted - return {'available_power_curves': ordered_curves} + return {"available_power_curves": ordered_curves} except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + def _get_energy_production_core( - lat: float, - lng: float, - height: int, - powercurve: str, - period: str, - source: str + lat: float, lng: float, height: int, powercurve: str, period: str, source: str ): """ Fetches the global, yearly and monthly energy production and average windspeed for a given location, height, and power curve. @@ -288,65 +321,95 @@ def _get_energy_production_core( selected_powercurve = validate_selected_powercurve(powercurve) source = validate_source(source) period = validate_production_avg_type(period, source) - params = { - "lat": lat, - "lng": lng, - "height": height - } + params = {"lat": lat, "lng": lng, "height": height} df = data_fetcher_router.fetch_raw(params, key=source) if df is None: raise HTTPException(status_code=404, detail="Data not found") - - if period == 'all': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, selected_powercurve) - return {"energy_production": summary_avg_energy_production['Average year']['kWh produced']} - - elif period == 'summary': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, selected_powercurve) + + if period == "all": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, selected_powercurve + ) + ) + return { + "energy_production": summary_avg_energy_production["Average year"][ + "kWh produced" + ] + } + + elif period == "summary": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, selected_powercurve + ) + ) return {"summary_avg_energy_production": summary_avg_energy_production} - - elif period == 'annual': - yearly_avg_energy_production = power_curve_manager.calculate_yearly_energy_production(df, height, selected_powercurve) + + elif period == "annual": + yearly_avg_energy_production = ( + power_curve_manager.calculate_yearly_energy_production( + df, height, selected_powercurve + ) + ) return {"yearly_avg_energy_production": yearly_avg_energy_production} - - elif period == 'full': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, selected_powercurve) - yearly_avg_energy_production = power_curve_manager.calculate_yearly_energy_production(df, height, selected_powercurve) + + elif period == "full": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, selected_powercurve + ) + ) + yearly_avg_energy_production = ( + power_curve_manager.calculate_yearly_energy_production( + df, height, selected_powercurve + ) + ) return { - "energy_production": summary_avg_energy_production['Average year']['kWh produced'], + "energy_production": summary_avg_energy_production["Average year"][ + "kWh produced" + ], "summary_avg_energy_production": summary_avg_energy_production, - "yearly_avg_energy_production": yearly_avg_energy_production + "yearly_avg_energy_production": yearly_avg_energy_production, } + @router.get( - "/production/{period}", - summary="Get yearly and monthly energy production estimate and average windspeed for a location at a height with a selected power curve", - response_model=EnergyProductionResponse, - responses={ - 200: { - "description": "Energy production data retrieved successfully", - "model": EnergyProductionResponse - }, - 500: {"description": "Internal server error"}, - } - ) + "/production/{period}", + summary="Get yearly and monthly energy production estimate and average windspeed for a location at a height with a selected power curve", + response_model=EnergyProductionResponse, + responses={ + 200: { + "description": "Energy production data retrieved successfully", + "model": EnergyProductionResponse, + }, + 500: {"description": "Internal server error"}, + }, +) def energy_production_with_period( period: str = Path(..., description="Time period for production estimate."), lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), powercurve: str = Query(..., description="Selected power curve name."), - ensemble: bool = Query(False, description="If true, use ensemble model (athena_ensemble)."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + ensemble: bool = Query( + False, description="If true, use ensemble model (athena_ensemble)." + ), + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: if ensemble: - return _get_energy_production_core(lat, lng, height, powercurve, period, source="athena_ensemble") + return _get_energy_production_core( + lat, lng, height, powercurve, period, source="athena_ensemble" + ) else: - return _get_energy_production_core(lat, lng, height, powercurve, period, source) + return _get_energy_production_core( + lat, lng, height, powercurve, period, source + ) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get( "/production", summary="Get global energy production estimate for a location at a height with a selected power curve", @@ -354,10 +417,10 @@ def energy_production_with_period( responses={ 200: { "description": "Energy production data retrieved successfully", - "model": EnergyProductionResponse + "model": EnergyProductionResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def energy_production( lat: float = Query(..., description="Latitude of the location."), @@ -365,76 +428,86 @@ def energy_production( height: int = Query(..., description="Height in meters."), powercurve: str = Query(..., description="Selected power curve name."), period: str = Query("all", description="Time period for production estimate."), - ensemble: bool = Query(False, description="If true, use ensemble model (athena_ensemble)."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + ensemble: bool = Query( + False, description="If true, use ensemble model (athena_ensemble)." + ), + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: if ensemble: - return _get_energy_production_core(lat, lng, height, powercurve, period, source="athena_ensemble") + return _get_energy_production_core( + lat, lng, height, powercurve, period, source="athena_ensemble" + ) else: - return _get_energy_production_core(lat, lng, height, powercurve, period, source) + return _get_energy_production_core( + lat, lng, height, powercurve, period, source + ) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") -def _download_csv_core( - gridIndices: List[str], - years: List[int], - source: str -): + +def _download_csv_core(gridIndices: List[str], years: List[int], source: str): source = validate_source(source) - years= [validate_year(year,source) for year in years] - - params = { - "gridIndices": gridIndices, - "years": years - } + years = [validate_year(year, source) for year in years] + + params = {"gridIndices": gridIndices, "years": years} df = data_fetcher_router.fetch_data(params, key=source) if df is None or df.empty: - raise HTTPException(status_code=404, detail="No data found for the specified parameters") - + raise HTTPException( + status_code=404, detail="No data found for the specified parameters" + ) + return df - + @router.get( "/timeseries", - summary="Download csv file for windspeed timeseries for a specific location for certain year(s) with 1 neighbor" + summary="Download csv file for windspeed timeseries for a specific location for certain year(s) with 1 neighbor", ) def download_timeseries_csv( - gridIndex: str = Query(..., description="Grid index with respect to user selected coordinate"), - years: List[int] = Query(SAMPLE_YEARS["s3_era5"], description="years of which the data to download"), - source: str = Query("s3_era5", description="Source of the data.") + gridIndex: str = Query( + ..., description="Grid index with respect to user selected coordinate" + ), + years: List[int] = Query( + SAMPLE_YEARS["s3_era5"], description="years of which the data to download" + ), + source: str = Query("s3_era5", description="Source of the data."), ): try: # Getting DataFrame from core function df = _download_csv_core([gridIndex], years, source) - + # Converting DataFrame to CSV csv_io = io.StringIO() df.to_csv(csv_io, index=False) csv_io.seek(0) - + return StreamingResponse( - iter([csv_io.getvalue()]), - media_type="text/csv; charset=utf-8" + iter([csv_io.getvalue()]), media_type="text/csv; charset=utf-8" ) - + except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + @router.post( "/timeseries/batch", summary="Download multiple CSVs (one per neighbor) as a streamed ZIP", ) def download_timeseries_csv_batch( payload: NearestLocationsResponse, - years: List[int] = Query(SAMPLE_YEARS["s3_era5"], description="years of which the data to download"), + years: List[int] = Query( + SAMPLE_YEARS["s3_era5"], description="years of which the data to download" + ), source: str = Query("s3_era5", description="Source of the data."), ): try: # Spooled file: stays in memory until threshold, then spills to disk automatically - spooled = tempfile.SpooledTemporaryFile(max_size=30 * 1024 * 1024, mode="w+b") # 30MB threshold (Each decompressed file is around 5.3 MB) + spooled = tempfile.SpooledTemporaryFile( + max_size=30 * 1024 * 1024, mode="w+b" + ) # 30MB threshold (Each decompressed file is around 5.3 MB) with zipfile.ZipFile(spooled, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: for loc in payload.locations: @@ -451,11 +524,14 @@ def download_timeseries_csv_batch( "Content-Disposition": f'attachment; filename="wind_data_{len(payload.locations)}_points.zip"' } - return StreamingResponse(chunker(spooled), media_type="application/zip", headers=headers) + return StreamingResponse( + chunker(spooled), media_type="application/zip", headers=headers + ) except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + @router.get( "/grid-points", summary="Find nearest grid points", @@ -470,7 +546,7 @@ def grid_points( lat: float = Query(..., description="Latitude of the target location."), lng: float = Query(..., description="Longitude of the target location."), limit: int = Query(1, description="Number of nearest grid points."), - source: str = Query(DEFAULT_SOURCE, description=f"Source of the data"), + source: str = Query(DEFAULT_SOURCE, description="Source of the data"), ): try: lat = validate_lat(lat) @@ -481,26 +557,22 @@ def grid_points( grid_lookup_map = { "athena_era5": athena_data_fetcher_era5, } - + fetcher = grid_lookup_map.get(source) if not fetcher: raise HTTPException( - status_code=400, - detail=f"Nearest locations lookup not available for source='{source}'" + status_code=400, + detail=f"Nearest locations lookup not available for source='{source}'", ) # Call find_nearest_locations on the Athena fetcher result = fetcher.find_nearest_locations(lat=lat, lng=lng, n_neighbors=limit) - + locations = [ - { - "index": str(i), - "latitude": float(a), - "longitude": float(o) - } + {"index": str(i), "latitude": float(a), "longitude": float(o)} for i, a, o in result ] return {"locations": locations} except Exception as e: - raise HTTPException(status_code=500, detail=f"Internal server error: {e}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Internal server error: {e}") diff --git a/windwatts-api/app/controllers/random_controller.py b/windwatts-api/app/controllers/random_controller.py index be215d03..5abb2cdb 100644 --- a/windwatts-api/app/controllers/random_controller.py +++ b/windwatts-api/app/controllers/random_controller.py @@ -1,6 +1,6 @@ -''' +""" This still just exists as an example...we should remove this before we go live -''' +""" import requests from fastapi import APIRouter, HTTPException, Query @@ -24,9 +24,10 @@ "religion", "science", "sport", - "travel" + "travel", ] + @router.get("/", summary="Retrieve a random message") def read_random_message(): try: @@ -35,6 +36,7 @@ def read_random_message(): except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get("/chuck", summary="Get a random Chuck Norris joke") def get_chuck_norris_joke(): try: @@ -43,20 +45,34 @@ def get_chuck_norris_joke(): joke = response.json().get("value") return {"joke": joke} except requests.RequestException: - raise HTTPException(status_code=502, detail="Failed to fetch joke from external service.") + raise HTTPException( + status_code=502, detail="Failed to fetch joke from external service." + ) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") -@router.get("/chuck/{category}", summary="Get a random Chuck Norris joke from a list of categories") -def get_chuck_norris_joke_by_category(category: str = Query(..., description="Joke category")): + +@router.get( + "/chuck/{category}", + summary="Get a random Chuck Norris joke from a list of categories", +) +def get_chuck_norris_joke_by_category( + category: str = Query(..., description="Joke category"), +): if category not in categories: - raise HTTPException(status_code=400, detail=f"Invalid category. Must be one of: {categories}") + raise HTTPException( + status_code=400, detail=f"Invalid category. Must be one of: {categories}" + ) try: - response = requests.get(f'https://api.chucknorris.io/jokes/random?category={category}', timeout=5) + response = requests.get( + f"https://api.chucknorris.io/jokes/random?category={category}", timeout=5 + ) response.raise_for_status() joke = response.json().get("value") return {"joke": joke} except requests.RequestException: - raise HTTPException(status_code=502, detail="Failed to fetch joke from external service.") + raise HTTPException( + status_code=502, detail="Failed to fetch joke from external service." + ) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") diff --git a/windwatts-api/app/controllers/wind_data_controller.py b/windwatts-api/app/controllers/wind_data_controller.py index 5550d84c..bce114a9 100644 --- a/windwatts-api/app/controllers/wind_data_controller.py +++ b/windwatts-api/app/controllers/wind_data_controller.py @@ -5,7 +5,6 @@ import tempfile import re import os -import io from app.config_manager import ConfigManager from app.config.model_config import MODEL_CONFIG @@ -14,7 +13,11 @@ from app.data_fetchers.data_fetcher_router import DataFetcherRouter from app.utils.data_fetcher_utils import format_coordinate, chunker from app.utils.validation import validate_model, validate_limit -from app.utils.wind_data_core import get_windspeed_core, get_production_core, get_timeseries_core +from app.utils.wind_data_core import ( + get_windspeed_core, + get_production_core, + get_timeseries_core, +) from app.power_curve.global_power_curve_manager import power_curve_manager from app.schemas import ( @@ -23,7 +26,7 @@ EnergyProductionResponse, NearestLocationsResponse, TimeseriesBatchRequest, - ModelInfoResponse + ModelInfoResponse, ) router = APIRouter() @@ -40,36 +43,43 @@ # Initialize ConfigManager config_manager = ConfigManager( secret_arn_env_var="WINDWATTS_DATA_CONFIG_SECRET_ARN", - local_config_path="./app/config/windwatts_data_config.json" + local_config_path="./app/config/windwatts_data_config.json", ) athena_config = config_manager.get_config() # Initialize Athena data fetchers - athena_data_fetchers["era5"] = AthenaDataFetcher(athena_config=athena_config, source_key='era5') - athena_data_fetchers["ensemble"] = AthenaDataFetcher(athena_config=athena_config, source_key='ensemble') - athena_data_fetchers["wtk"] = AthenaDataFetcher(athena_config=athena_config, source_key='wtk') + athena_data_fetchers["era5"] = AthenaDataFetcher( + athena_config=athena_config, source_key="era5" + ) + athena_data_fetchers["ensemble"] = AthenaDataFetcher( + athena_config=athena_config, source_key="ensemble" + ) + athena_data_fetchers["wtk"] = AthenaDataFetcher( + athena_config=athena_config, source_key="wtk" + ) # Initialize S3 data fetchers s3_data_fetchers["era5"] = S3DataFetcher( bucket_name="windwatts-era5", prefix="era5_timeseries", grid="era5", - s3_key_template="era5" + s3_key_template="era5", ) s3_data_fetchers["wtk"] = S3DataFetcher( - bucket_name="wtk-led", - prefix="1224", - grid="wtk", - s3_key_template="wtk" + bucket_name="wtk-led", prefix="1224", grid="wtk", s3_key_template="wtk" ) # Register fetchers with DataFetcherRouter # Register with simple names: athena, s3 (not athena_era5, s3_era5) for model_key in ["era5", "ensemble", "wtk"]: if model_key in athena_data_fetchers: - data_fetcher_router.register_fetcher(f"athena_{model_key}", athena_data_fetchers[model_key]) + data_fetcher_router.register_fetcher( + f"athena_{model_key}", athena_data_fetchers[model_key] + ) if model_key in s3_data_fetchers: - data_fetcher_router.register_fetcher(f"s3_{model_key}", s3_data_fetchers[model_key]) + data_fetcher_router.register_fetcher( + f"s3_{model_key}", s3_data_fetchers[model_key] + ) # API Endpoints @@ -80,24 +90,29 @@ responses={ 200: { "description": "Wind speed data retrieved successfully", - "model": WindSpeedResponse + "model": WindSpeedResponse, }, 400: {"description": "Bad request - invalid parameters"}, 404: {"description": "Data not found"}, 500: {"description": "Internal server error"}, - } + }, ) def get_windspeed( model: str = Path(..., description="Data model: era5, wtk, or ensemble"), lat: float = Query(..., description="Latitude of the location"), lng: float = Query(..., description="Longitude of the location"), height: int = Query(..., description="Height in meters"), - period: str = Query("all", description="Time period: all, annual, monthly, hourly (varies by model)"), - source: Optional[str] = Query(None, description="Data source: athena or s3. Defaults to model's default source (athena).") + period: str = Query( + "all", description="Time period: all, annual, monthly, hourly (varies by model)" + ), + source: Optional[str] = Query( + None, + description="Data source: athena or s3. Defaults to model's default source (athena).", + ), ): """ Retrieve wind speed data for a specific location and height. - + - **model**: Data model (era5, wtk, ensemble) - **lat**: Latitude (varies by model, refer info endpoint for coordinate bounds) - **lng**: Longitude (varies by model, refer info endpoint for coordinate bounds) @@ -109,8 +124,10 @@ def get_windspeed( # Use default source if not provided if source is None: source = MODEL_CONFIG.get(model, {}).get("default_source", "athena") - - return get_windspeed_core(model, lat, lng, height, period, source, data_fetcher_router) + + return get_windspeed_core( + model, lat, lng, height, period, source, data_fetcher_router + ) except HTTPException: raise except Exception as e: @@ -124,25 +141,33 @@ def get_windspeed( responses={ 200: { "description": "Energy production data retrieved successfully", - "model": EnergyProductionResponse + "model": EnergyProductionResponse, }, 400: {"description": "Bad request - invalid parameters"}, 404: {"description": "Data not found"}, 500: {"description": "Internal server error"}, - } + }, ) def get_production( model: str = Path(..., description="Data model: era5, wtk, or ensemble"), lat: float = Query(..., description="Latitude of the location"), lng: float = Query(..., description="Longitude of the location"), height: int = Query(..., description="Height in meters"), - powercurve: str = Query(..., description="Power curve identifier (e.g., nrel-reference-100kW)"), - period: str = Query("all", description="Time period: all, summary, annual, monthly (varies by model)"), - source: Optional[str] = Query(None, description="Data source: athena or s3. Defaults to model's default source (athena).") + powercurve: str = Query( + ..., description="Power curve identifier (e.g., nrel-reference-100kW)" + ), + period: str = Query( + "all", + description="Time period: all, summary, annual, monthly (varies by model)", + ), + source: Optional[str] = Query( + None, + description="Data source: athena or s3. Defaults to model's default source (athena).", + ), ): """ Retrieve energy production estimates for a specific location, height, and power curve. - + - **model**: Data model (era5, wtk, ensemble) - **lat**: Latitude (varies by model, refer info endpoint for coordinate bounds) - **lng**: Longitude (varies by model, refer info endpoint for coordinate bounds) @@ -155,8 +180,10 @@ def get_production( # Use default source if not provided if source is None: source = MODEL_CONFIG.get(model, {}).get("default_source", "athena") - - return get_production_core(model, lat, lng, height, powercurve, period, source, data_fetcher_router) + + return get_production_core( + model, lat, lng, height, powercurve, period, source, data_fetcher_router + ) except HTTPException: raise except Exception as e: @@ -170,15 +197,15 @@ def get_production( responses={ 200: { "description": "Available power curves retrieved successfully", - "model": AvailablePowerCurvesResponse + "model": AvailablePowerCurvesResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def get_powercurves(): """ Retrieve a list of all available power curves. - + Power curves are model-agnostic and can be used with any dataset (era5, wtk, ensemble). """ try: @@ -189,7 +216,7 @@ def extract_kw(curve_name: str): match = re.search(r"nrel-reference-([0-9.]+)kW", curve_name) if match: return float(match.group(1)) - return float('inf') + return float("inf") nrel_curves = [c for c in all_curves if c.startswith("nrel-reference-")] other_curves = [c for c in all_curves if not c.startswith("nrel-reference-")] @@ -198,7 +225,7 @@ def extract_kw(curve_name: str): other_curves_sorted = sorted(other_curves) ordered_curves = nrel_curves_sorted + other_curves_sorted - return {'available_power_curves': ordered_curves} + return {"available_power_curves": ordered_curves} except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") @@ -218,13 +245,15 @@ def get_grid_points( lat: float = Query(..., description="Latitude of the target location"), lng: float = Query(..., description="Longitude of the target location"), limit: int = Query(1, description="Number of nearest grid points to return (1-4)"), - source: Optional[str] = Query(None, description="Data source. Defaults to model's default source.") + source: Optional[str] = Query( + None, description="Data source. Defaults to model's default source." + ), ): """ Find the nearest grid points to a given coordinate. - + Returns grid indices and their coordinates for the closest data points in the model's grid. - + - **model**: Data model (era5, wtk, ensemble) - **lat**: (varies by model, refer info endpoint for coordinate bounds) - **lng**: (varies by model, refer info endpoint for coordinate bounds) @@ -233,27 +262,23 @@ def get_grid_points( """ try: model = validate_model(model) - + # Grid lookup only available via athena # Use athena fetcher for the specified model fetcher = athena_data_fetchers.get(model) - - if not fetcher or not hasattr(fetcher, 'find_nearest_locations'): + + if not fetcher or not hasattr(fetcher, "find_nearest_locations"): raise HTTPException( status_code=400, - detail=f"Grid point lookup not available for model '{model}'" + detail=f"Grid point lookup not available for model '{model}'", ) # Call find_nearest_locations on the fetcher limit = validate_limit(limit) result = fetcher.find_nearest_locations(lat=lat, lng=lng, n_neighbors=limit) - + locations = [ - { - "index": str(i), - "latitude": float(a), - "longitude": float(o) - } + {"index": str(i), "latitude": float(a), "longitude": float(o)} for i, a, o in result ] @@ -272,37 +297,37 @@ def get_grid_points( 200: {"description": "Model information retrieved successfully"}, 400: {"description": "Invalid model"}, 500: {"description": "Internal server error"}, - } + }, ) def get_model_info( - model: str = Path(..., description="Data model: era5, wtk, or ensemble") + model: str = Path(..., description="Data model: era5, wtk, or ensemble"), ): """ Retrieve metadata and configuration information about a specific data model. - + Returns information about: - Supported time periods - Available years for timeseries downloads - Available heights - Grid information (model’s geographic coverage and spatial-temporal resolution) - Links & References - + - **model**: Data model (era5, wtk, ensemble) """ try: model = validate_model(model) config = MODEL_CONFIG[model] - + return { "model": model, # "available_sources": config["sources"], # "default_source": config["default_source"], "supported_periods": config["period_type"], - "available_years": config.get("years", {}).get("full",[]), + "available_years": config.get("years", {}).get("full", []), "available_heights": config.get("heights", []), "grid_info": config.get("grid_info", {}), "links": config.get("links", []), - "references": config.get("references",[]) + "references": config.get("references", []), } except HTTPException: raise @@ -318,19 +343,24 @@ def get_model_info( 400: {"description": "Bad request - invalid parameters"}, 404: {"description": "Data not found"}, 500: {"description": "Internal server error"}, - } + }, ) def download_timeseries( model: str = Path(..., description="Data model: era5 or wtk"), gridIndex: str = Query(..., description="Grid index identifier"), - years: Optional[List[int]] = Query(None, description="Years to download (defaults to sample years)"), - source: str = Query("s3", description="Data source: athena or s3 (typically s3 for timeseries downloads)") + years: Optional[List[int]] = Query( + None, description="Years to download (defaults to sample years)" + ), + source: str = Query( + "s3", + description="Data source: athena or s3 (typically s3 for timeseries downloads)", + ), ): """ Download timeseries data as CSV for a specific grid point. - + Returns raw timeseries data for the specified grid index and years. - + - **model**: Data model (era5, wtk) - **gridIndex**: Grid index from grid-points endpoint - **years**: List of years to include (optional) @@ -338,14 +368,18 @@ def download_timeseries( """ try: # Get CSV content from core function - csv_content = get_timeseries_core(model, [gridIndex], years, source, data_fetcher_router) - + csv_content = get_timeseries_core( + model, [gridIndex], years, source, data_fetcher_router + ) + return StreamingResponse( iter([csv_content]), media_type="text/csv; charset=utf-8", - headers={"Content-Disposition": f'attachment; filename="wind_data_{gridIndex}.csv"'} + headers={ + "Content-Disposition": f'attachment; filename="wind_data_{gridIndex}.csv"' + }, ) - + except HTTPException: raise except Exception as e: @@ -360,18 +394,18 @@ def download_timeseries( 400: {"description": "Bad request - invalid parameters"}, 404: {"description": "Data not found"}, 500: {"description": "Internal server error"}, - } + }, ) def download_timeseries_batch( payload: TimeseriesBatchRequest, - model: str = Path(..., description="Data model: era5 or wtk") + model: str = Path(..., description="Data model: era5 or wtk"), ): """ Download timeseries data for multiple grid points as a ZIP archive. - + Accepts a request body with grid locations, optional years, and data source. Returns a ZIP file containing CSV files for each location. - + - **model**: Data model (era5, wtk) - **payload**: Request body containing: - **locations**: List of grid locations with indices (use grid-points endpoint) @@ -384,7 +418,13 @@ def download_timeseries_batch( with zipfile.ZipFile(spooled, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: for loc in payload.locations: - csv_content = get_timeseries_core(model, [loc.index], payload.years, payload.source, data_fetcher_router) + csv_content = get_timeseries_core( + model, + [loc.index], + payload.years, + payload.source, + data_fetcher_router, + ) file_name = f"wind_data_{format_coordinate(loc.latitude)}_{format_coordinate(loc.longitude)}.csv" zf.writestr(file_name, csv_content) @@ -394,7 +434,9 @@ def download_timeseries_batch( "Content-Disposition": f'attachment; filename="wind_data_{model}_{len(payload.locations)}_points.zip"' } - return StreamingResponse(chunker(spooled), media_type="application/zip", headers=headers) + return StreamingResponse( + chunker(spooled), media_type="application/zip", headers=headers + ) except HTTPException: raise diff --git a/windwatts-api/app/controllers/wtk_data_controller.py b/windwatts-api/app/controllers/wtk_data_controller.py index 90901526..8c599353 100644 --- a/windwatts-api/app/controllers/wtk_data_controller.py +++ b/windwatts-api/app/controllers/wtk_data_controller.py @@ -1,4 +1,3 @@ -from typing import Optional from fastapi import APIRouter, HTTPException, Path, Query import re import os @@ -7,12 +6,15 @@ from fastapi.responses import StreamingResponse import zipfile import tempfile + # commented out the data functions until I can get local athena_config working from app.config_manager import ConfigManager from app.data_fetchers.s3_data_fetcher import S3DataFetcher from app.data_fetchers.athena_data_fetcher import AthenaDataFetcher + # from app.data_fetchers.database_data_fetcher import DatabaseDataFetcher from app.data_fetchers.data_fetcher_router import DataFetcherRouter + # from app.database_manager import DatabaseManager from app.utils.data_fetcher_utils import format_coordinate, chunker @@ -21,8 +23,7 @@ WindSpeedResponse, AvailablePowerCurvesResponse, EnergyProductionResponse, - GridLocation, - NearestLocationsResponse + NearestLocationsResponse, ) router = APIRouter() @@ -34,12 +35,17 @@ # Initialize ConfigManager config_manager = ConfigManager( secret_arn_env_var="WINDWATTS_DATA_CONFIG_SECRET_ARN", - local_config_path="./app/config/windwatts_data_config.json") # replace with YOUR local config path + local_config_path="./app/config/windwatts_data_config.json", + ) # replace with YOUR local config path athena_config = config_manager.get_config() # Initialize DataFetchers -s3_data_fetcher_wtk = S3DataFetcher(bucket_name="wtk-led", prefix="1224", grid="wtk", s3_key_template="wtk") -athena_data_fetcher_wtk = AthenaDataFetcher(athena_config=athena_config, source_key='wtk') +s3_data_fetcher_wtk = S3DataFetcher( + bucket_name="wtk-led", prefix="1224", grid="wtk", s3_key_template="wtk" +) +athena_data_fetcher_wtk = AthenaDataFetcher( + athena_config=athena_config, source_key="wtk" +) # db_manager = DatabaseManager() # db_data_fetcher = DatabaseDataFetcher(db_manager=db_manager) @@ -56,80 +62,97 @@ } # YEARS list for the sample data download feature -SAMPLE_YEARS = { - "s3_wtk" : [2018, 2019, 2020] -} +SAMPLE_YEARS = {"s3_wtk": [2018, 2019, 2020]} # YEARS for which we have wtk data in the S3 -ALL_YEARS = { - "s3_wtk" : list(range(2000,2021)) -} +ALL_YEARS = {"s3_wtk": list(range(2000, 2021))} # data_type = "wtk" VALID_SOURCES = {"athena_wtk", "s3_wtk"} # <-- new DEFAULT_SOURCE = "athena_wtk" + # Helper validation functions def validate_lat(lat: float) -> float: if not (-90 <= lat <= 90): - raise HTTPException(status_code=400, detail="Latitude must be between -90 and 90.") + raise HTTPException( + status_code=400, detail="Latitude must be between -90 and 90." + ) return lat + def validate_lng(lng: float) -> float: if not (-180 <= lng <= 180): - raise HTTPException(status_code=400, detail="Longitude must be between -180 and 180.") + raise HTTPException( + status_code=400, detail="Longitude must be between -180 and 180." + ) return lng + def validate_height(height: int) -> int: if not (0 < height <= 300): - raise HTTPException(status_code=400, detail="Height must be between 1 and 300 meters.") + raise HTTPException( + status_code=400, detail="Height must be between 1 and 300 meters." + ) return height + def validate_avg_type(avg_type: str, source: str) -> str: allowed = VALID_AVG_TYPES[source]["wind_speed"] if avg_type not in allowed: raise HTTPException( status_code=400, - detail=f"Invalid avg_type. Must be one of: {allowed} for {source}." + detail=f"Invalid avg_type. Must be one of: {allowed} for {source}.", ) return avg_type + def validate_production_avg_type(avg_type: str, source: str) -> str: allowed = VALID_AVG_TYPES[source]["production"] if avg_type not in allowed: raise HTTPException( status_code=400, - detail=f"Invalid time_period. Must be one of: {allowed} for {source}." + detail=f"Invalid time_period. Must be one of: {allowed} for {source}.", ) return avg_type + def validate_selected_powercurve(selected_powercurve: str) -> str: - if not re.match(r'^[\w\-.]+$', selected_powercurve): + if not re.match(r"^[\w\-.]+$", selected_powercurve): raise HTTPException(status_code=400, detail="Invalid selected_powercurve name.") if selected_powercurve not in power_curve_manager.power_curves: raise HTTPException(status_code=400, detail="Selected power curve not found.") return selected_powercurve + def validate_source(source: str) -> str: if source not in VALID_SOURCES: - raise HTTPException(status_code=400, detail=f"Invalid source for WTK data. Must be one of: {sorted(VALID_SOURCES)}.") + raise HTTPException( + status_code=400, + detail=f"Invalid source for WTK data. Must be one of: {sorted(VALID_SOURCES)}.", + ) return source + def validate_year(year: int, source: str) -> int: if year not in ALL_YEARS[source]: - raise HTTPException(status_code=400, detail=f"Invalid year for WTK data. Currently supporting years 2000-2022") + raise HTTPException( + status_code=400, + detail="Invalid year for WTK data. Currently supporting years 2000-2022", + ) return year + def validate_n_neighbor(n_neighbor: int) -> int: - if not 1<=n_neighbor<=4: # have to change the limit if needed later on - raise HTTPException(status_code=400, detail=f"Invalid number of neighbors. Currently supporting upto 4 nearest neighbors") + if not 1 <= n_neighbor <= 4: # have to change the limit if needed later on + raise HTTPException( + status_code=400, + detail="Invalid number of neighbors. Currently supporting upto 4 nearest neighbors", + ) return n_neighbor + def _get_windspeed_core( - lat: float, - lng: float, - height: int, - avg_type: str, - source: str + lat: float, lng: float, height: int, avg_type: str, source: str ): """ Core function to retrieve wind speed data from the source database. @@ -146,17 +169,13 @@ def _get_windspeed_core( source = validate_source(source) avg_type = validate_avg_type(avg_type, source) - params = { - "lat": lat, - "lng": lng, - "height": height, - "period": avg_type - } + params = {"lat": lat, "lng": lng, "height": height, "period": avg_type} data = data_fetcher_router.fetch_data(params, key=source) if data is None: raise HTTPException(status_code=404, detail="Data not found") return data + @router.get( "/windspeed/{avg_type}", summary="Retrieve wind speed with avg type - wtk data", @@ -164,23 +183,24 @@ def _get_windspeed_core( responses={ 200: { "description": "Wind speed data retrieved successfully", - "model": WindSpeedResponse + "model": WindSpeedResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def get_windspeed_with_avg_type( avg_type: str = Path(..., description="Type of average to retrieve."), lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: return _get_windspeed_core(lat, lng, height, avg_type, source) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get( "/windspeed", summary="Retrieve wind speed with default global avg - wtk data", @@ -188,63 +208,66 @@ def get_windspeed_with_avg_type( responses={ 200: { "description": "Wind speed data retrieved successfully", - "model": WindSpeedResponse + "model": WindSpeedResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def get_windspeed( lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: return _get_windspeed_core(lat, lng, height, "global", source) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get( - "/available-powercurves", - summary="Fetch all available power curves", - response_model=AvailablePowerCurvesResponse, - responses={ - 200: { - "description": "Available power curves retrieved successfully", - "model": AvailablePowerCurvesResponse - }, - 500: {"description": "Internal server error"}, - } + "/available-powercurves", + summary="Fetch all available power curves", + response_model=AvailablePowerCurvesResponse, + responses={ + 200: { + "description": "Available power curves retrieved successfully", + "model": AvailablePowerCurvesResponse, + }, + 500: {"description": "Internal server error"}, + }, ) def fetch_available_powercurves(): try: all_curves = list(power_curve_manager.power_curves.keys()) - prefix = 'nlr-reference-' - + prefix = "nlr-reference-" + def extract_kw(curve_name: str): import re + match = re.search(rf"{prefix}([0-9.]+)kW", curve_name) if match: return float(match.group(1)) - return float('inf') + return float("inf") + curves = [c for c in all_curves if c.startswith(prefix)] other_curves = [c for c in all_curves if not c.startswith(prefix)] curves_sorted = sorted(curves, key=extract_kw) other_curves_sorted = sorted(other_curves) ordered_curves = curves_sorted + other_curves_sorted - return {'available_power_curves': ordered_curves} + return {"available_power_curves": ordered_curves} except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + def _get_energy_production_core( lat: float, lng: float, height: int, selected_powercurve: str, time_period: str, - source: str + source: str, ): - """ Fetches the global, yearly and monthly energy production and average windspeed for a given location, height, and power curve. Args: @@ -263,60 +286,87 @@ def _get_energy_production_core( source = validate_source(source) time_period = validate_production_avg_type(time_period, source) - params = { - "lat": lat, - "lng": lng, - "height": height - } + params = {"lat": lat, "lng": lng, "height": height} df = data_fetcher_router.fetch_raw(params, key=source) if df is None: raise HTTPException(status_code=404, detail="Data not found") - if time_period == 'all': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, selected_powercurve) - return {"energy_production": summary_avg_energy_production['Average year']['kWh produced']} - - elif time_period == 'summary': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, selected_powercurve) + if time_period == "all": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, selected_powercurve + ) + ) + return { + "energy_production": summary_avg_energy_production["Average year"][ + "kWh produced" + ] + } + + elif time_period == "summary": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, selected_powercurve + ) + ) return {"summary_avg_energy_production": summary_avg_energy_production} - - elif time_period == 'yearly': - yearly_avg_energy_production = power_curve_manager.calculate_yearly_energy_production(df, height, selected_powercurve) + + elif time_period == "yearly": + yearly_avg_energy_production = ( + power_curve_manager.calculate_yearly_energy_production( + df, height, selected_powercurve + ) + ) return {"yearly_avg_energy_production": yearly_avg_energy_production} - - elif time_period == 'full': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, selected_powercurve) - yearly_avg_energy_production = power_curve_manager.calculate_yearly_energy_production(df, height, selected_powercurve) + + elif time_period == "full": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, selected_powercurve + ) + ) + yearly_avg_energy_production = ( + power_curve_manager.calculate_yearly_energy_production( + df, height, selected_powercurve + ) + ) return { - "energy_production": summary_avg_energy_production['Average year']['kWh produced'], + "energy_production": summary_avg_energy_production["Average year"][ + "kWh produced" + ], "summary_avg_energy_production": summary_avg_energy_production, - "yearly_avg_energy_production": yearly_avg_energy_production + "yearly_avg_energy_production": yearly_avg_energy_production, } + @router.get( - "/energy-production/{time_period}", - summary="Get yearly and monthly energy production estimate and average windspeed for a location at a height with a selected power curve", - response_model=EnergyProductionResponse, - responses={ - 200: { - "description": "Energy production data retrieved successfully", - "model": EnergyProductionResponse - }, - 500: {"description": "Internal server error"}, - } + "/energy-production/{time_period}", + summary="Get yearly and monthly energy production estimate and average windspeed for a location at a height with a selected power curve", + response_model=EnergyProductionResponse, + responses={ + 200: { + "description": "Energy production data retrieved successfully", + "model": EnergyProductionResponse, + }, + 500: {"description": "Internal server error"}, + }, ) def energy_production_with_period( - time_period: str = Path(..., description="Time period for production estimate."),lat: float = Query(..., description="Latitude of the location."), + time_period: str = Path(..., description="Time period for production estimate."), + lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), selected_powercurve: str = Query(..., description="Selected power curve name."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: - return _get_energy_production_core(lat, lng, height, selected_powercurve, time_period, source) + return _get_energy_production_core( + lat, lng, height, selected_powercurve, time_period, source + ) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") + @router.get( "/energy-production", summary="Get global energy production estimate for a location at a height with a selected power curve", @@ -324,82 +374,88 @@ def energy_production_with_period( responses={ 200: { "description": "Energy production data retrieved successfully", - "model": EnergyProductionResponse + "model": EnergyProductionResponse, }, 500: {"description": "Internal server error"}, - } + }, ) def energy_production( lat: float = Query(..., description="Latitude of the location."), lng: float = Query(..., description="Longitude of the location."), height: int = Query(..., description="Height in meters."), selected_powercurve: str = Query(..., description="Selected power curve name."), - source: str = Query(DEFAULT_SOURCE, description="Source of the data.") + source: str = Query(DEFAULT_SOURCE, description="Source of the data."), ): try: - return _get_energy_production_core(lat, lng, height, selected_powercurve, "all", source) + return _get_energy_production_core( + lat, lng, height, selected_powercurve, "all", source + ) except Exception: raise HTTPException(status_code=500, detail="Internal server error.") -def _download_csv_core( - gridIndices: List[str], - years: List[int], - source: str -): + +def _download_csv_core(gridIndices: List[str], years: List[int], source: str): source = validate_source(source) - years= [validate_year(year,source) for year in years] - - params = { - "gridIndices": gridIndices, - "years": years - } + years = [validate_year(year, source) for year in years] + + params = {"gridIndices": gridIndices, "years": years} df = data_fetcher_router.fetch_data(params, key=source) if df is None or df.empty: - raise HTTPException(status_code=404, detail="No data found for the specified parameters") - + raise HTTPException( + status_code=404, detail="No data found for the specified parameters" + ) + return df - + @router.get( "/download-csv", - summary="Download csv file for windspeed for a specific location for certain year(s) with 1 neighbor" + summary="Download csv file for windspeed for a specific location for certain year(s) with 1 neighbor", ) def download_csv( - gridIndex: str = Query(..., description="Grid index with respect to user selected coordinate"), - years: List[int] = Query(SAMPLE_YEARS["s3_wtk"], description="years of which the data to download"), - source: str = Query("s3_wtk", description="Source of the data.") + gridIndex: str = Query( + ..., description="Grid index with respect to user selected coordinate" + ), + years: List[int] = Query( + SAMPLE_YEARS["s3_wtk"], description="years of which the data to download" + ), + source: str = Query("s3_wtk", description="Source of the data."), ): try: # Getting DataFrame from core function df = _download_csv_core([gridIndex], years, source) - + # Converting DataFrame to CSV csv_io = io.StringIO() df.to_csv(csv_io, index=False) csv_io.seek(0) - + return StreamingResponse( - iter([csv_io.getvalue()]), - media_type="text/csv; charset=utf-8" + iter([csv_io.getvalue()]), media_type="text/csv; charset=utf-8" ) - + except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + @router.post( "/download-csv-batch", summary="Download multiple CSVs (one per neighbor) as a streamed ZIP", ) def download_csv_batch( payload: NearestLocationsResponse, - years: List[int] = Query(SAMPLE_YEARS["s3_wtk"], description="years of which the data to download"), + years: List[int] = Query( + SAMPLE_YEARS["s3_wtk"], description="years of which the data to download" + ), source: str = Query("s3_wtk", description="Source of the data."), ): try: # Spooled file: stays in memory until threshold, then spills to disk automatically - spooled = tempfile.SpooledTemporaryFile(max_size=30 * 1024 * 1024, mode="w+b") # 30MB threshold + spooled = tempfile.SpooledTemporaryFile( + max_size=30 * 1024 * 1024, mode="w+b" + ) # 30MB threshold with zipfile.ZipFile(spooled, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: for loc in payload.locations: @@ -416,11 +472,14 @@ def download_csv_batch( "Content-Disposition": f'attachment; filename="wind_data_{len(payload.locations)}_points.zip"' } - return StreamingResponse(chunker(spooled), media_type="application/zip", headers=headers) + return StreamingResponse( + chunker(spooled), media_type="application/zip", headers=headers + ) except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + @router.get( "/nearest-locations", summary="Find nearest grid locations", @@ -434,8 +493,10 @@ def download_csv_batch( def nearest_locations( lat: float = Query(..., description="Latitude of the target location."), lng: float = Query(..., description="Longitude of the target location."), - n_neighbors: int = Query(1, description="Number of nearest grid points.", ge=1, le=4), - source: str = Query(DEFAULT_SOURCE, description=f"Source of the data"), + n_neighbors: int = Query( + 1, description="Number of nearest grid points.", ge=1, le=4 + ), + source: str = Query(DEFAULT_SOURCE, description="Source of the data"), ): try: lat = validate_lat(lat) @@ -446,26 +507,24 @@ def nearest_locations( grid_lookup_map = { "athena_wtk": athena_data_fetcher_wtk, } - + fetcher = grid_lookup_map.get(source) if not fetcher: raise HTTPException( - status_code=400, - detail=f"Grid lookup not available for source='{source}'" + status_code=400, + detail=f"Grid lookup not available for source='{source}'", ) # Call find_nearest_locations directly on the Athena fetcher - result = fetcher.find_nearest_locations(lat=lat, lng=lng, n_neighbors=n_neighbors) - + result = fetcher.find_nearest_locations( + lat=lat, lng=lng, n_neighbors=n_neighbors + ) + locations = [ - { - "index": str(i), - "latitude": float(a), - "longitude": float(o) - } + {"index": str(i), "latitude": float(a), "longitude": float(o)} for i, a, o in result ] return {"locations": locations} except Exception as e: - raise HTTPException(status_code=500, detail=f"Internal server error: {e}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Internal server error: {e}") diff --git a/windwatts-api/app/data_fetchers/abstract_data_fetcher.py b/windwatts-api/app/data_fetchers/abstract_data_fetcher.py index 2a5fd1eb..733e276d 100644 --- a/windwatts-api/app/data_fetchers/abstract_data_fetcher.py +++ b/windwatts-api/app/data_fetchers/abstract_data_fetcher.py @@ -1,10 +1,11 @@ from abc import ABC, abstractmethod -from typing import List + class AbstractDataFetcher(ABC): """ Abstract class for fetching data from the WTK API """ + def __init__(self): """ Add default settings, configurations, etc. @@ -15,7 +16,7 @@ def __init__(self): def fetch_data(self, lat: float, lng: float, height: int): """ Data fetching method specifications: - + Args: lat (float): Latitude of the location lng (float): Longitude of the location @@ -30,7 +31,7 @@ def fetch_data(self, lat: float, lng: float, height: int): def fetch_raw(self, lat: float, lng: float, height: int): """ Fetch raw data. - + Args: lat (float): Latitude of the location lng (float): Longitude of the location @@ -39,4 +40,4 @@ def fetch_raw(self, lat: float, lng: float, height: int): Returns: DataFrame: Raw data without aggregation """ - pass \ No newline at end of file + pass diff --git a/windwatts-api/app/data_fetchers/athena_data_fetcher.py b/windwatts-api/app/data_fetchers/athena_data_fetcher.py index e6dc472f..bd4cc19e 100644 --- a/windwatts-api/app/data_fetchers/athena_data_fetcher.py +++ b/windwatts-api/app/data_fetchers/athena_data_fetcher.py @@ -1,5 +1,10 @@ from .abstract_data_fetcher import AbstractDataFetcher -from windwatts_data import WindwattsWTKClient, WindwattsERA5Client, WindwattsEnsembleClient +from windwatts_data import ( + WindwattsWTKClient, + WindwattsERA5Client, + WindwattsEnsembleClient, +) + class AthenaDataFetcher(AbstractDataFetcher): def __init__(self, athena_config: str, source_key: str): @@ -13,21 +18,31 @@ def __init__(self, athena_config: str, source_key: str): """ # self.data_type = data_type.lower() self.source_key = source_key.lower() - self.base_type = self.source_key.split("_", 1)[0] # 'wtk' or 'era5' (from 'era5_bc' too) + self.base_type = self.source_key.split("_", 1)[ + 0 + ] # 'wtk' or 'era5' (from 'era5_bc' too) - if self.base_type == 'wtk': + if self.base_type == "wtk": print(f"Initializing WTK Client with Source Key: {self.source_key}") - self.client = WindwattsWTKClient(config_path=athena_config, source_key = self.source_key) # source_key "wtk" - elif self.base_type == 'era5': + self.client = WindwattsWTKClient( + config_path=athena_config, source_key=self.source_key + ) # source_key "wtk" + elif self.base_type == "era5": print(f"Initializing ERA5 Client with Source Key: {self.source_key}") - self.client = WindwattsERA5Client(config_path=athena_config, source_key = self.source_key) # source_key "era5" or "era5_bc" - elif self.base_type == 'ensemble': + self.client = WindwattsERA5Client( + config_path=athena_config, source_key=self.source_key + ) # source_key "era5" or "era5_bc" + elif self.base_type == "ensemble": print(f"Initializing Ensemble Client with Source Key: {self.source_key}") - self.client = WindwattsEnsembleClient(config_path=athena_config, source_key = self.source_key) # source_key "ensemble" + self.client = WindwattsEnsembleClient( + config_path=athena_config, source_key=self.source_key + ) # source_key "ensemble" else: raise ValueError(f"Unsupported base dataset: {self.base_type}") - def fetch_data(self, lat: float, lng: float, height: int, period: str = 'all') -> dict: + def fetch_data( + self, lat: float, lng: float, height: int, period: str = "all" + ) -> dict: """ Fetch aggregated wind data using the configured client. @@ -45,17 +60,25 @@ def fetch_data(self, lat: float, lng: float, height: int, period: str = 'all') - Raises: ValueError: If the period is not supported for the selected client. """ - if period == 'all': - return self.client.fetch_global_avg_at_height(lat=lat, long=lng, height=height) - elif period == 'annual': - return self.client.fetch_yearly_avg_at_height(lat=lat, long=lng, height=height) - elif period == 'monthly': - return self.client.fetch_monthly_avg_at_height(lat=lat, long=lng, height=height) - elif period == 'hourly': - return self.client.fetch_hourly_avg_at_height(lat=lat, long=lng, height=height) + if period == "all": + return self.client.fetch_global_avg_at_height( + lat=lat, long=lng, height=height + ) + elif period == "annual": + return self.client.fetch_yearly_avg_at_height( + lat=lat, long=lng, height=height + ) + elif period == "monthly": + return self.client.fetch_monthly_avg_at_height( + lat=lat, long=lng, height=height + ) + elif period == "hourly": + return self.client.fetch_hourly_avg_at_height( + lat=lat, long=lng, height=height + ) else: raise ValueError(f"Invalid period: {period}") - + def fetch_raw(self, lat: float, lng: float, height: int): """ Fetch raw, unaggregated wind data (DataFrame) using the configured client. @@ -69,7 +92,7 @@ def fetch_raw(self, lat: float, lng: float, height: int): DataFrame: Raw wind data without aggregation. """ return self.client.fetch_df(lat=lat, long=lng, height=height) - + def find_nearest_locations(self, lat: float, lng: float, n_neighbors: int = 1): """ Find one or more nearest grid locations (index, latitude, and longitude) to a given coordinate. @@ -81,13 +104,13 @@ def find_nearest_locations(self, lat: float, lng: float, n_neighbors: int = 1): :param n_neighbors: Number of nearest grid points to return. Defaults to 1. :type n_neighbors: int - :return: - - If n_neighbors == 1: a list of single tuple [(index, latitude, longitude)] for the nearest grid point. + :return: + - If n_neighbors == 1: a list of single tuple [(index, latitude, longitude)] for the nearest grid point. - If n_neighbors > 1: a list of tuples, each containing (index, latitude, longitude). - The list will have length n_neighbors. - :rtype: + :rtype: :rtype: list[tuple[str, float, float]] """ # A list of tuples where each tuple contains: (grid_index, latitude, longitude) tuples = self.client.find_n_nearest_locations(lat, lng, n_neighbors) - return tuples \ No newline at end of file + return tuples diff --git a/windwatts-api/app/data_fetchers/data_fetcher_router.py b/windwatts-api/app/data_fetchers/data_fetcher_router.py index fa9f91c9..8c395a1c 100644 --- a/windwatts-api/app/data_fetchers/data_fetcher_router.py +++ b/windwatts-api/app/data_fetchers/data_fetcher_router.py @@ -1,6 +1,7 @@ import json from .abstract_data_fetcher import AbstractDataFetcher + class DataFetcherRouter: def __init__(self): """ @@ -18,7 +19,7 @@ def register_fetcher(self, key: str, fetcher: AbstractDataFetcher): """ self.fetchers[key] = fetcher - def fetch_data(self, params: dict , key: str = "athena_wtk"): + def fetch_data(self, params: dict, key: str = "athena_wtk"): """ Fetch aggregated data using specified data fetcher. @@ -38,7 +39,7 @@ def fetch_data(self, params: dict , key: str = "athena_wtk"): return fetcher.fetch_data(**params) else: raise ValueError(f"No fetcher found for key={key}") - + def fetch_raw(self, params: dict, key: str = "athena_wtk"): """ Fetch raw, unaggregated data (DataFrame) using specified data fetcher. @@ -58,6 +59,7 @@ def fetch_raw(self, params: dict, key: str = "athena_wtk"): return fetcher.fetch_raw(**params) else: raise ValueError(f"No fetcher found for key={key}") + def fetch_data_routing(self, params: dict, source: str = "athena_wtk"): """ Fetch data using the appropriate data fetcher through routing logics. @@ -111,4 +113,4 @@ def is_complex_query(params: dict) -> bool: """ # Implement logic to determine if the query is complex # For example, if the query spans multiple files or requires aggregation - return True \ No newline at end of file + return True diff --git a/windwatts-api/app/data_fetchers/database_data_fetcher.py b/windwatts-api/app/data_fetchers/database_data_fetcher.py index 026736c9..5d51404a 100644 --- a/windwatts-api/app/data_fetchers/database_data_fetcher.py +++ b/windwatts-api/app/data_fetchers/database_data_fetcher.py @@ -1,13 +1,14 @@ -from typing import List import json from .abstract_data_fetcher import WTKDataFetcher from app.utils.data_fetcher_utils import generate_key + class DatabaseDataFetcher(WTKDataFetcher): """ Class for fetching data from the database. TODO: Refactor this to handle pre-computed/aggregated results in the future instead of caching. """ + def __init__(self, db_manager): """ Initializes the DatabaseDataFetcher with the given DatabaseManager. diff --git a/windwatts-api/app/data_fetchers/s3_data_fetcher.py b/windwatts-api/app/data_fetchers/s3_data_fetcher.py index b7c6210f..8d9ab0c4 100644 --- a/windwatts-api/app/data_fetchers/s3_data_fetcher.py +++ b/windwatts-api/app/data_fetchers/s3_data_fetcher.py @@ -9,10 +9,11 @@ MIN_POOL_WORKERS = 1 s3_key_templates = { - "era5" : "{prefix}/year={year}/index={index}/{year}_{index}.csv.gz", - "wtk" : "{prefix}/year={year}/varset=all/index={index}/{index}_{year}_all.csv.gz" + "era5": "{prefix}/year={year}/index={index}/{year}_{index}.csv.gz", + "wtk": "{prefix}/year={year}/varset=all/index={index}/{index}_{year}_all.csv.gz", } + class S3DataFetcher(AbstractDataFetcher): def __init__(self, bucket_name: str, prefix: str, s3_key_template: str, grid: str): """ @@ -23,7 +24,9 @@ def __init__(self, bucket_name: str, prefix: str, s3_key_template: str, grid: st s3_key_template(str): The s3 key template to download files. grid (str): The grid of the data. "era5" or "wtk" """ - print(f"Initializing S3 Data Fetcher: bucket: {bucket_name} prefix: {prefix} grid: {grid} ...") + print( + f"Initializing S3 Data Fetcher: bucket: {bucket_name} prefix: {prefix} grid: {grid} ..." + ) self.s3_client = boto3.client( "s3", config=Config( @@ -35,9 +38,11 @@ def __init__(self, bucket_name: str, prefix: str, s3_key_template: str, grid: st self.prefix = prefix self.grid = grid if s3_key_template not in s3_key_templates: - raise ValueError(f"Unknown s3_key_template '{s3_key_template}', expected one of {list(s3_key_templates)}") + raise ValueError( + f"Unknown s3_key_template '{s3_key_template}', expected one of {list(s3_key_templates)}" + ) self.s3_key_template = s3_key_templates[s3_key_template] - + def generate_s3_keys(self, grid_Indices: List[str], years: List[int]) -> List[str]: """ Build S3 object keys for the given years and nearest grid indices. @@ -51,11 +56,13 @@ def generate_s3_keys(self, grid_Indices: List[str], years: List[int]) -> List[st for year in years: for index in grid_Indices: # uri specific to era5 timeseries, might change for ensemble timeseries as it might not have year. - key = self.s3_key_template.format(prefix=self.prefix, year=year, index=index) + key = self.s3_key_template.format( + prefix=self.prefix, year=year, index=index + ) keys.append(key) - + return keys - + def fetch_raw(self, key: str, cols: Optional[List[str]]): """ Download + parse a single gzip CSV from S3. @@ -112,4 +119,4 @@ def fetch_data( return pd.DataFrame() out = pd.concat(frames, ignore_index=True) - return out \ No newline at end of file + return out diff --git a/windwatts-api/app/database/__init__.py b/windwatts-api/app/database/__init__.py index 073f4a82..cc1e3aba 100644 --- a/windwatts-api/app/database/__init__.py +++ b/windwatts-api/app/database/__init__.py @@ -1,4 +1,4 @@ from .connection import engine, SessionLocal, get_db from .models import AuditLog -__all__ = ['engine', 'SessionLocal', 'get_db', 'AuditLog'] \ No newline at end of file +__all__ = ["engine", "SessionLocal", "get_db", "AuditLog"] diff --git a/windwatts-api/app/database/connection.py b/windwatts-api/app/database/connection.py index ef430f28..051a2b26 100644 --- a/windwatts-api/app/database/connection.py +++ b/windwatts-api/app/database/connection.py @@ -6,7 +6,9 @@ load_dotenv() # Get database URL from environment variable -DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/windwatts") +DATABASE_URL = os.getenv( + "DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/windwatts" +) # Create SQLAlchemy engine engine = create_engine(DATABASE_URL) @@ -17,10 +19,11 @@ # Create base class for models Base = declarative_base() + # Dependency to get DB session def get_db(): db = SessionLocal() try: yield db finally: - db.close() \ No newline at end of file + db.close() diff --git a/windwatts-api/app/database/models.py b/windwatts-api/app/database/models.py index d4aca8d3..0f0ad3d0 100644 --- a/windwatts-api/app/database/models.py +++ b/windwatts-api/app/database/models.py @@ -1,7 +1,8 @@ -from sqlalchemy import Column, Integer, String, DateTime, JSON, Text, Float, Boolean +from sqlalchemy import Column, Integer, String, DateTime, JSON, Text from sqlalchemy.sql import func from .connection import Base + class AuditLog(Base): __tablename__ = "audit_logs" @@ -18,4 +19,4 @@ class AuditLog(Base): duration_ms = Column(Integer, nullable=True, index=True) request_size_bytes = Column(Integer, nullable=True) response_size_bytes = Column(Integer, nullable=True) - log_metadata = Column(JSON, nullable=True) \ No newline at end of file + log_metadata = Column(JSON, nullable=True) diff --git a/windwatts-api/app/database_manager.py b/windwatts-api/app/database_manager.py index 56bd449e..1575c484 100644 --- a/windwatts-api/app/database_manager.py +++ b/windwatts-api/app/database_manager.py @@ -1,12 +1,13 @@ import os import sqlite3 + class DatabaseManager: def __init__(self, db_path=None, timeout=5): """ Initializes the DatabaseManager with the given database path/name and timeout. """ - self.db_path = db_path or os.getenv('DB_PATH', 'db/wtk_data.db') + self.db_path = db_path or os.getenv("DB_PATH", "db/wtk_data.db") os.makedirs(os.path.dirname(self.db_path), exist_ok=True) self.conn = sqlite3.connect(self.db_path, timeout=timeout) self.create_table() @@ -16,12 +17,12 @@ def create_table(self): Creates the cached_data table if it does not already exist. """ with self.conn: - self.conn.execute(''' + self.conn.execute(""" CREATE TABLE IF NOT EXISTS cached_data ( key TEXT PRIMARY KEY, data TEXT ) - ''') + """) def get_data(self, key: str) -> str: """ @@ -31,7 +32,7 @@ def get_data(self, key: str) -> str: str: The data associated with the key, or None if the key does not exist. """ with self.conn.cursor() as cursor: - cursor.execute('SELECT data FROM cached_data WHERE key = ?', (key,)) + cursor.execute("SELECT data FROM cached_data WHERE key = ?", (key,)) row = cursor.fetchone() return row[0] if row else None @@ -43,6 +44,9 @@ def store_data(self, key: str, data): key (str): The key associated with the data. data (str): The data to be stored. """ - + with self.conn: - self.conn.execute('INSERT OR REPLACE INTO cached_data (key, data) VALUES (?, ?)', (key, data)) \ No newline at end of file + self.conn.execute( + "INSERT OR REPLACE INTO cached_data (key, data) VALUES (?, ?)", + (key, data), + ) diff --git a/windwatts-api/app/exception_handlers.py b/windwatts-api/app/exception_handlers.py index cadf451e..c6d1a54f 100644 --- a/windwatts-api/app/exception_handlers.py +++ b/windwatts-api/app/exception_handlers.py @@ -4,21 +4,19 @@ import traceback from app.logging_setup import logger + async def log_unhandled_exceptions(request: Request, exc: Exception): logger.error(f"❌ ERROR: {request.method} {request.url}\n{traceback.format_exc()}") - return JSONResponse( - status_code=500, - content={"detail": "Internal server error"} - ) + return JSONResponse(status_code=500, content={"detail": "Internal server error"}) + async def log_validation_errors(request: Request, exc: RequestValidationError): - logger.warning(f"⚠️ VALIDATION ERROR: {request.method} {request.url} | {exc.errors()}") + logger.warning( + f"⚠️ VALIDATION ERROR: {request.method} {request.url} | {exc.errors()}" + ) # Remove 'input' field from all error details filtered_errors = [] for err in exc.errors(): filtered = {k: v for k, v in err.items() if k != "input"} filtered_errors.append(filtered) - return JSONResponse( - status_code=422, - content={"detail": filtered_errors} - ) + return JSONResponse(status_code=422, content={"detail": filtered_errors}) diff --git a/windwatts-api/app/logging_setup.py b/windwatts-api/app/logging_setup.py index f2c04270..4c321ae3 100644 --- a/windwatts-api/app/logging_setup.py +++ b/windwatts-api/app/logging_setup.py @@ -1,7 +1,5 @@ import logging -logging.basicConfig( - level=logging.DEBUG -) +logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) diff --git a/windwatts-api/app/main.py b/windwatts-api/app/main.py index 124a34ea..0bdee27a 100644 --- a/windwatts-api/app/main.py +++ b/windwatts-api/app/main.py @@ -3,7 +3,6 @@ from app.schemas import HealthCheckResponse from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse from mangum import Mangum from app.controllers.wtk_data_controller import router as wtk_data_router from app.controllers.era5_data_controller import router as era5_data_router @@ -11,7 +10,6 @@ from app.middleware import AuditMiddleware, LoggingMiddleware from app.exception_handlers import log_unhandled_exceptions, log_validation_errors from textwrap import dedent -from app.schemas import HealthCheckResponse app = FastAPI( title="WindWatts API", @@ -43,7 +41,7 @@ ) app.add_middleware(LoggingMiddleware) # Logging middleware first -app.add_middleware(AuditMiddleware) # Audit middleware second +app.add_middleware(AuditMiddleware) # Audit middleware second app.add_exception_handler(Exception, log_unhandled_exceptions) app.add_exception_handler(RequestValidationError, log_validation_errors) @@ -53,7 +51,7 @@ "https://windwatts2-dev.stratus.nrel.gov", "https://windwatts2-stage.stratus.nrel.gov", "https://windwatts2-prod.stratus.nrel.gov", - "https://windwatts.nrel.gov" + "https://windwatts.nrel.gov", ] app.add_middleware( CORSMiddleware, @@ -69,25 +67,22 @@ # Legacy routes - Deprecated # TODO: Remove these routes app.include_router( - wtk_data_router, - prefix="/wtk", - tags=["wtk-data (deprecated)"], - deprecated=True + wtk_data_router, prefix="/wtk", tags=["wtk-data (deprecated)"], deprecated=True ) app.include_router( - era5_data_router, - prefix="/era5", - tags=["era5-data (deprecated)"], - deprecated=True + era5_data_router, prefix="/era5", tags=["era5-data (deprecated)"], deprecated=True ) + @app.get("/healthcheck", response_model=HealthCheckResponse) def healthcheck(): return JSONResponse({"status": "up"}, status_code=200) + # Serve generated OpenAPI JSON if present @app.get("/openapi.json", include_in_schema=False, response_model=None) def serve_openapi_json(): return app.openapi() -handler = Mangum(app) \ No newline at end of file + +handler = Mangum(app) diff --git a/windwatts-api/app/middleware/__init__.py b/windwatts-api/app/middleware/__init__.py index 2e7e7522..8d341d57 100644 --- a/windwatts-api/app/middleware/__init__.py +++ b/windwatts-api/app/middleware/__init__.py @@ -1,4 +1,4 @@ from .audit import AuditMiddleware from .logger import LoggingMiddleware -__all__ = ['AuditMiddleware', 'LoggingMiddleware'] \ No newline at end of file +__all__ = ["AuditMiddleware", "LoggingMiddleware"] diff --git a/windwatts-api/app/middleware/audit.py b/windwatts-api/app/middleware/audit.py index 40ad6f39..fff141f7 100644 --- a/windwatts-api/app/middleware/audit.py +++ b/windwatts-api/app/middleware/audit.py @@ -1,7 +1,5 @@ from fastapi import Request, Response from starlette.middleware.base import BaseHTTPMiddleware -from sqlalchemy.orm import Session -from ..database import SessionLocal, AuditLog import json from typing import Dict, Any import logging @@ -10,28 +8,29 @@ logger = logging.getLogger(__name__) + class AuditMiddleware(BaseHTTPMiddleware): async def dispatch(self, request: Request, call_next): start_time = time.time() request_id = str(uuid.uuid4()) - + # Get request details request_data = await self._get_request_data(request) - request_size = len(json.dumps(request_data).encode('utf-8')) - + request_size = len(json.dumps(request_data).encode("utf-8")) + try: # Process the request response = await call_next(request) - + # Calculate duration duration_ms = int((time.time() - start_time) * 1000) - + # Get response size response_body = b"" async for chunk in response.body_iterator: response_body += chunk response_size = len(response_body) - + # Only log successful requests to audit log if response.status_code < 400: self._log_request( @@ -40,18 +39,18 @@ async def dispatch(self, request: Request, call_next): duration_ms=duration_ms, request_size=request_size, response_size=response_size, - request_id=request_id + request_id=request_id, ) - + # Reconstruct response return Response( content=response_body, status_code=response.status_code, headers=dict(response.headers), - media_type=response.media_type + media_type=response.media_type, ) - - except Exception as e: + + except Exception: raise async def _get_request_data(self, request: Request) -> Dict[str, Any]: @@ -85,7 +84,7 @@ def _log_request( duration_ms: int, request_size: int, response_size: int, - request_id: str + request_id: str, ): """Create audit log entry for successful requests only""" print("Logging request") @@ -123,11 +122,11 @@ def _log_request( # "path_params": request.path_params # } # ) - + # db.add(audit_log) # db.commit() # except Exception as e: # logger.error(f"Failed to create audit log: {str(e)}") # db.rollback() # finally: - # db.close() \ No newline at end of file + # db.close() diff --git a/windwatts-api/app/middleware/logger.py b/windwatts-api/app/middleware/logger.py index 77ea394f..4155e559 100644 --- a/windwatts-api/app/middleware/logger.py +++ b/windwatts-api/app/middleware/logger.py @@ -2,36 +2,36 @@ from fastapi import Request, Response from starlette.middleware.base import BaseHTTPMiddleware import logging -import json import uuid logger = logging.getLogger(__name__) + class LoggingMiddleware(BaseHTTPMiddleware): async def dispatch(self, request: Request, call_next): start_time = time.time() request_id = str(uuid.uuid4()) - + # Log request start logger.info( f"Request started: {request.method} {request.url.path} | " f"ID: {request_id} | " f"Client: {request.client.host if request.client else 'unknown'}" ) - + try: # Process the request response = await call_next(request) - + # Calculate duration duration_ms = int((time.time() - start_time) * 1000) - + # Get response size response_body = b"" async for chunk in response.body_iterator: response_body += chunk response_size = len(response_body) - + # Log request completion logger.info( f"Request completed: {request.method} {request.url.path} | " @@ -39,21 +39,21 @@ async def dispatch(self, request: Request, call_next): f"Duration: {duration_ms}ms | " f"Size: {response_size} bytes" ) - + # Reconstruct response return Response( content=response_body, status_code=response.status_code, headers=dict(response.headers), - media_type=response.media_type + media_type=response.media_type, ) - + except Exception as e: # Log error logger.error( f"Request failed: {request.method} {request.url.path} | " f"Error: {str(e)} | " f"Duration: {int((time.time() - start_time) * 1000)}ms", - exc_info=True + exc_info=True, ) - raise \ No newline at end of file + raise diff --git a/windwatts-api/app/power_curve/global_power_curve_manager.py b/windwatts-api/app/power_curve/global_power_curve_manager.py index f19f04cd..ef05b4b3 100644 --- a/windwatts-api/app/power_curve/global_power_curve_manager.py +++ b/windwatts-api/app/power_curve/global_power_curve_manager.py @@ -3,4 +3,4 @@ # Get the directory of this file and construct path to powercurves _current_dir = Path(__file__).parent -power_curve_manager = PowerCurveManager(str(_current_dir / "powercurves")) \ No newline at end of file +power_curve_manager = PowerCurveManager(str(_current_dir / "powercurves")) diff --git a/windwatts-api/app/power_curve/power_curve_manager.py b/windwatts-api/app/power_curve/power_curve_manager.py index 22fdbee8..380765e9 100644 --- a/windwatts-api/app/power_curve/power_curve_manager.py +++ b/windwatts-api/app/power_curve/power_curve_manager.py @@ -7,43 +7,50 @@ from enum import Enum from typing import Optional + class DatasetSchema(Enum): - TIMESERIES = "timeseries" # Any raw time-series data (year/month/hour based, magnitudes not quantiles) - wtk - QUANTILES_WITH_YEAR = "quantiles_with_year" # Quantile distributions, separated by year - era5 - QUANTILES_GLOBAL = "quantiles_global" # Quantile distribution without year (global) - ensemble data + TIMESERIES = "timeseries" # Any raw time-series data (year/month/hour based, magnitudes not quantiles) - wtk + QUANTILES_WITH_YEAR = ( + "quantiles_with_year" # Quantile distributions, separated by year - era5 + ) + QUANTILES_GLOBAL = "quantiles_global" # Quantile distribution without year (global) - ensemble data + class PowerCurveManager: """ Manages multiple power curves stored in a directory. """ - def __init__(self, - power_curve_dir: str, - use_swi_default: bool = False, - schema_swi_prefs: Optional[dict] = None): + + def __init__( + self, + power_curve_dir: str, + use_swi_default: bool = False, + schema_swi_prefs: Optional[dict] = None, + ): """ Initialize PowerCurveManager to load multiple power curves. :param power_curve_dir: Directory containing power curve files. :param use_swi_default: Fallback if a schema isn't in schema_swi_prefs. - :param schema_swi_prefs: Optional dict overriding per-schema SWI behavior. + :param schema_swi_prefs: Optional dict overriding per-schema SWI behavior. """ self.power_curves = {} self.load_power_curves(power_curve_dir) self.use_swi_default = use_swi_default self.schema_swi_prefs = { - DatasetSchema.TIMESERIES: False, # not used for midpoints; defined for completeness - DatasetSchema.QUANTILES_WITH_YEAR: True, # SWI ON - DatasetSchema.QUANTILES_GLOBAL: False # SWI OFF + DatasetSchema.TIMESERIES: False, # not used for midpoints; defined for completeness + DatasetSchema.QUANTILES_WITH_YEAR: True, # SWI ON + DatasetSchema.QUANTILES_GLOBAL: False, # SWI OFF } - + if schema_swi_prefs: self.schema_swi_prefs.update(schema_swi_prefs) - + def _use_swi_for(self, schema: DatasetSchema) -> bool: """Resolve SWI strictly by schema (falls back to class default if absent).""" return self.schema_swi_prefs.get(schema, self.use_swi_default) - + def set_schema_swi_pref(self, schema: DatasetSchema, enabled: bool) -> None: """Change SWI default for a specific schema.""" self.schema_swi_prefs[schema] = bool(enabled) @@ -92,7 +99,7 @@ def _classify_schema(self, df: pd.DataFrame) -> DatasetSchema: return DatasetSchema.TIMESERIES # Fall back: if neither, assume WTK-like (time-series) but raise if critical cols are missing later return DatasetSchema.TIMESERIES - + def load_power_curves(self, directory: str): """ Load power curves from the specified directory. @@ -100,7 +107,9 @@ def load_power_curves(self, directory: str): for file in os.listdir(directory): if file.endswith(".csv") or file.endswith(".xlsx"): curve_name = os.path.splitext(file)[0] - self.power_curves[curve_name] = PowerCurve(os.path.join(directory, file)) + self.power_curves[curve_name] = PowerCurve( + os.path.join(directory, file) + ) def get_curve(self, curve_name: str) -> PowerCurve: """ @@ -115,8 +124,10 @@ def get_curve(self, curve_name: str) -> PowerCurve: if curve_name not in self.power_curves: raise KeyError(f"Power curve '{curve_name}' not found.") return self.power_curves[curve_name] - - def find_inverse(self, x_smooth: np.ndarray, y_smooth: np.ndarray, y_hat: np.ndarray) -> np.ndarray: + + def find_inverse( + self, x_smooth: np.ndarray, y_smooth: np.ndarray, y_hat: np.ndarray + ) -> np.ndarray: """ Vectorized inverse mapping: finds the x values corresponding to the closest y values to each y_hat. @@ -134,12 +145,12 @@ def find_inverse(self, x_smooth: np.ndarray, y_smooth: np.ndarray, y_hat: np.nda diff = np.abs(y_smooth[:, None] - y_hat[None, :]) closest_indices = np.argmin(diff, axis=0) return x_smooth[closest_indices] - + def _jitter_nonincreasing(self, q: np.ndarray, eps: float = 1e-5): """ Ensure q is strictly increasing by adding a tiny epsilon to any element that is <= its predecessor. Long flat runs become a tiny staircase. - + :param q: Input array of quantile values that may include equal or decreasing entries. :type q: numpy.ndarray :param eps: Minimum increment applied to enforce strict monotonicity. Defaults to 1e-5. @@ -153,12 +164,12 @@ def _jitter_nonincreasing(self, q: np.ndarray, eps: float = 1e-5): # array([3.02, 3.02001, 3.02002, 3.05]) q = np.asarray(q, dtype=np.float64).copy() for i in range(1, q.size): - if not np.isfinite(q[i]) or not np.isfinite(q[i-1]): + if not np.isfinite(q[i]) or not np.isfinite(q[i - 1]): continue - if q[i] <= q[i-1]: - q[i] = q[i-1] + eps + if q[i] <= q[i - 1]: + q[i] = q[i - 1] + eps return q - + def run_cubic(self, x, y, probs_new, M1): """ Internal helper: fit cubic spline F(q)=P(X≤q) and invert to Q(p). @@ -185,22 +196,22 @@ def run_cubic(self, x, y, probs_new, M1): # === Create the cubic spline with clamped boundary conditions === spline = CubicSpline(x, y, bc_type=((1, dy_start), (1, dy_end))) - - #=== High-resolution discretization (interp_point_count is large) === + + # === High-resolution discretization (interp_point_count is large) === x_smooth = np.linspace(x[0], x[-1], M1, dtype=np.float64) y_smooth = spline(x_smooth) # Invert F(q) -> Q(p) q_new = self.find_inverse(x_smooth, y_smooth, probs_new) - return q_new,probs_new - + return q_new, probs_new + def estimation_quantiles_SWI(self, quantiles, probs, M1=1000, M2=501): """ Estimate a smoother quantile function using the Spline With Inversion (SWI) method, with a safe fallback.. - This method constructs a cubic spline interpolation of the empirical CDF (defined by the - provided `quantiles` and corresponding `probs`), and then performs an inversion to generate + This method constructs a cubic spline interpolation of the empirical CDF (defined by the + provided `quantiles` and corresponding `probs`), and then performs an inversion to generate a smooth estimate of quantiles over a high-resolution, uniformly spaced probability range. If the cubic spline fails due to equal or non-increasing quantile values @@ -229,12 +240,12 @@ def estimation_quantiles_SWI(self, quantiles, probs, M1=1000, M2=501): """ q = np.asarray(quantiles, dtype=np.float64) # quantiles - p = np.asarray(probs, dtype=np.float64) # probabilities + p = np.asarray(probs, dtype=np.float64) # probabilities # Predefine defaults in case both attempts fail probs_new = np.linspace(0, 1, M2, dtype=np.float64) quantiles_new_default = np.zeros_like(probs_new, dtype=np.float64) - + try: return self.run_cubic(q, p, probs_new, M1) except (ValueError, ZeroDivisionError) as e1: @@ -242,20 +253,20 @@ def estimation_quantiles_SWI(self, quantiles, probs, M1=1000, M2=501): except Exception as e2: print(f"Cubic Spline failed due to: {e2}.") return quantiles_new_default, probs_new - + try: q_fix = self._jitter_nonincreasing(q, eps=1e-5) return self.run_cubic(q_fix, p, probs_new, M1) except Exception as e3: print(f"Warning: CubicSpline failed even after jittering — {e3}") return quantiles_new_default, probs_new - + def _quantiles_to_kw_midpoints( self, df_sorted: pd.DataFrame, ws_col: str, power_curve: PowerCurve, - use_swi: bool + use_swi: bool, ) -> pd.DataFrame: """ Takes a dataframe with columns [probability, ws_col] sorted by probability @@ -266,7 +277,7 @@ def _quantiles_to_kw_midpoints( quants = df_sorted[ws_col].to_numpy(dtype=float) if use_swi: - q_est, _ = self.estimation_quantiles_SWI(quantiles=quants,probs=probs) + q_est, _ = self.estimation_quantiles_SWI(quantiles=quants, probs=probs) else: q_est = quants @@ -279,7 +290,13 @@ def _quantiles_to_kw_midpoints( mid_df[f"{ws_col}_kw"] = power_curve.windspeed_to_kw(mid_df, ws_col) return mid_df - def compute_energy_production_df(self, df: pd.DataFrame, height: int, selected_power_curve: str, relevant_columns_only: bool = True) -> pd.DataFrame: + def compute_energy_production_df( + self, + df: pd.DataFrame, + height: int, + selected_power_curve: str, + relevant_columns_only: bool = True, + ) -> pd.DataFrame: """ Computes energy production dataframe using the selected power curve. @@ -295,10 +312,10 @@ def compute_energy_production_df(self, df: pd.DataFrame, height: int, selected_p - Quantiles-with-year: ["year", ws_col, f"{ws_col}_kw"] for midpoint bins - Global-quantiles: ["year"(absent), ws_col, f"{ws_col}_kw"] for midpoint bins """ - ws_col = f'windspeed_{height}m' + ws_col = f"windspeed_{height}m" if ws_col not in df.columns: raise KeyError(f"Expected column '{ws_col}' in input dataframe.") - + schema = self._classify_schema(df) power_curve = self.get_curve(selected_power_curve) @@ -307,7 +324,9 @@ def compute_energy_production_df(self, df: pd.DataFrame, height: int, selected_p work = df.copy() if "month" not in work.columns or "hour" not in work.columns: if "mohr" not in work.columns: - raise KeyError("WTK-like input requires 'mohr' or explicit 'month' and 'hour' columns.") + raise KeyError( + "WTK-like input requires 'mohr' or explicit 'month' and 'hour' columns." + ) work["month"], work["hour"] = work["mohr"] // 100, work["mohr"] % 100 work[f"{ws_col}_kw"] = power_curve.windspeed_to_kw(work, ws_col) @@ -315,26 +334,40 @@ def compute_energy_production_df(self, df: pd.DataFrame, height: int, selected_p cols = ["year", "mohr", "month", "hour", ws_col, f"{ws_col}_kw"] return work[cols] return work - + elif schema == DatasetSchema.QUANTILES_WITH_YEAR: use_swi_eff = self._use_swi_for(schema) records = [] for year, group in df.groupby("year"): # sorting by probability is important since the records might be shuffled by "groupby" and we are using midpoint method. group = group.sort_values("probability").reset_index(drop=True) - mid_df = self._quantiles_to_kw_midpoints(group, ws_col, power_curve, use_swi=use_swi_eff) + mid_df = self._quantiles_to_kw_midpoints( + group, ws_col, power_curve, use_swi=use_swi_eff + ) mid_df.insert(0, "year", year) records.append(mid_df) - out = pd.concat(records, ignore_index=True) if records else pd.DataFrame(columns=["year", ws_col, f"{ws_col}_kw"]) - return out if not relevant_columns_only else out[["year", ws_col, f"{ws_col}_kw"]] - + out = ( + pd.concat(records, ignore_index=True) + if records + else pd.DataFrame(columns=["year", ws_col, f"{ws_col}_kw"]) + ) + return ( + out + if not relevant_columns_only + else out[["year", ws_col, f"{ws_col}_kw"]] + ) + else: # DatasetSchema.QUANTILES_GLOBAL use_swi_eff = self._use_swi_for(schema) group = df.sort_values("probability").reset_index(drop=True) - out = self._quantiles_to_kw_midpoints(group, ws_col, power_curve, use_swi=use_swi_eff) + out = self._quantiles_to_kw_midpoints( + group, ws_col, power_curve, use_swi=use_swi_eff + ) return out if not relevant_columns_only else out[[ws_col, f"{ws_col}_kw"]] - - def prepare_yearly_production_df(self, df: pd.DataFrame, height: int, selected_power_curve: str) -> pd.DataFrame: + + def prepare_yearly_production_df( + self, df: pd.DataFrame, height: int, selected_power_curve: str + ) -> pd.DataFrame: """ Prepares yearly average energy production and windspeed dataframe for dependent methods. @@ -349,27 +382,32 @@ def prepare_yearly_production_df(self, df: pd.DataFrame, height: int, selected_p pd.Dataframe """ prod_df = self.compute_energy_production_df(df, height, selected_power_curve) - ws_column = f'windspeed_{height}m' - kw_column = f'windspeed_{height}m_kw' + ws_column = f"windspeed_{height}m" + kw_column = f"windspeed_{height}m_kw" schema = self._classify_schema(df) - + res_list = [] if schema == DatasetSchema.TIMESERIES: work = prod_df.copy() # If wind direction columns slipped through, drop them - work = work.drop(columns=[c for c in work.columns if "winddirection" in c], errors="ignore") + work = work.drop( + columns=[c for c in work.columns if "winddirection" in c], + errors="ignore", + ) for year, group in work.groupby("year"): avg_ws = group[ws_column].mean() # Original approximation used in your code: # sum of instantaneous power over typical month × 30 days kwh = group[kw_column].sum() * 30 - res_list.append({ - "year": year, - "Average wind speed (m/s)": avg_ws, - "kWh produced": kwh - }) + res_list.append( + { + "year": year, + "Average wind speed (m/s)": avg_ws, + "kWh produced": kwh, + } + ) elif schema == DatasetSchema.QUANTILES_WITH_YEAR: # Midpoints are equal-probability bins → average power × hours/year @@ -377,30 +415,34 @@ def prepare_yearly_production_df(self, df: pd.DataFrame, height: int, selected_p avg_ws = group[ws_column].mean() avg_power_kw = group[kw_column].mean() kwh = avg_power_kw * 8760.0 - res_list.append({ - "year": year, - "Average wind speed (m/s)": avg_ws, - "kWh produced": kwh - }) + res_list.append( + { + "year": year, + "Average wind speed (m/s)": avg_ws, + "kWh produced": kwh, + } + ) else: # QUANTILES_GLOBAL if len(prod_df) == 0: - return pd.DataFrame(columns=["year", "Average wind speed (m/s)", "kWh produced"]) + return pd.DataFrame( + columns=["year", "Average wind speed (m/s)", "kWh produced"] + ) avg_ws = prod_df[ws_column].mean() avg_power_kw = prod_df[kw_column].mean() kwh = avg_power_kw * 8760.0 - res_list.append({ - "year": None, - "Average wind speed (m/s)": avg_ws, - "kWh produced": kwh - }) + res_list.append( + {"year": None, "Average wind speed (m/s)": avg_ws, "kWh produced": kwh} + ) res = pd.DataFrame(res_list) res.sort_values("Average wind speed (m/s)", inplace=True, ignore_index=True) return res - - def calculate_yearly_energy_production(self, df: pd.DataFrame, height: int, selected_power_curve: str) -> dict: + + def calculate_yearly_energy_production( + self, df: pd.DataFrame, height: int, selected_power_curve: str + ) -> dict: """ Computes yearly average energy production and windspeed. @@ -411,7 +453,7 @@ def calculate_yearly_energy_production(self, df: pd.DataFrame, height: int, sele Returns: dict - + Example: { "2001": {"Average wind speed (m/s)": "5.65", "kWh produced": 250117}, @@ -419,20 +461,24 @@ def calculate_yearly_energy_production(self, df: pd.DataFrame, height: int, sele ... } """ - yearly_prod_df = self.prepare_yearly_production_df(df,height,selected_power_curve) - + yearly_prod_df = self.prepare_yearly_production_df( + df, height, selected_power_curve + ) + result = {} for _, row in yearly_prod_df.iterrows(): # Use "Global" if year is missing (for quantiles without year) year_key = "Global" if pd.isna(row["year"]) else str(int(row["year"])) result[year_key] = { "Average wind speed (m/s)": f"{float(row['Average wind speed (m/s)']):.2f}", - "kWh produced": int(round(float(row["kWh produced"]))) + "kWh produced": int(round(float(row["kWh produced"]))), } return result - - def calculate_energy_production_summary(self, df: pd.DataFrame, height: int, selected_power_curve: str) -> dict: + + def calculate_energy_production_summary( + self, df: pd.DataFrame, height: int, selected_power_curve: str + ) -> dict: """ Computes yearly average energy production and windspeed summary. @@ -443,7 +489,7 @@ def calculate_energy_production_summary(self, df: pd.DataFrame, height: int, sel Returns: dict - + Example: { "Lowest year": {"year": 2015, "Average wind speed (m/s)": "5.36", "kWh produced": 202791}, @@ -451,32 +497,39 @@ def calculate_energy_production_summary(self, df: pd.DataFrame, height: int, sel "Highest year": {"year": 2014, "Average wind speed (m/s)": "6.32", "kWh produced": 326354} } """ - yearly_prod_df = self.prepare_yearly_production_df(df,height,selected_power_curve) + yearly_prod_df = self.prepare_yearly_production_df( + df, height, selected_power_curve + ) if yearly_prod_df.empty: return {} - res_avg = pd.DataFrame(yearly_prod_df.drop(columns=['year']).mean()).T + res_avg = pd.DataFrame(yearly_prod_df.drop(columns=["year"]).mean()).T res_avg.index = ["Average year"] # Final formatting - res_summary = pd.concat([yearly_prod_df.iloc[[0]], res_avg, yearly_prod_df.iloc[[-1]]], ignore_index=False) + res_summary = pd.concat( + [yearly_prod_df.iloc[[0]], res_avg, yearly_prod_df.iloc[[-1]]], + ignore_index=False, + ) def fmt_year(v): return None if pd.isna(v) else int(v) # Handle None year for average row - convert to proper None instead of pandas NA res_summary["year"] = res_summary["year"].map(fmt_year) - res_summary["kWh produced"] = res_summary["kWh produced"].astype(float).round().astype(int) - res_summary["Average wind speed (m/s)"] = res_summary["Average wind speed (m/s)"].astype(float).map('{:,.2f}'.format) - - res_summary.index = [ - "Lowest year", - "Average year", - "Highest year" - ] + res_summary["kWh produced"] = ( + res_summary["kWh produced"].astype(float).round().astype(int) + ) + res_summary["Average wind speed (m/s)"] = ( + res_summary["Average wind speed (m/s)"].astype(float).map("{:,.2f}".format) + ) + + res_summary.index = ["Lowest year", "Average year", "Highest year"] res_summary = res_summary.replace({np.nan: None}) return res_summary.to_dict(orient="index") - def calculate_monthly_energy_production(self, df: pd.DataFrame, height: int, selected_power_curve: str) -> dict: + def calculate_monthly_energy_production( + self, df: pd.DataFrame, height: int, selected_power_curve: str + ) -> dict: """ Computes monthly average energy production. @@ -489,27 +542,42 @@ def calculate_monthly_energy_production(self, df: pd.DataFrame, height: int, sel dict: dict summarizing monthly energy production and windspeed. Example: - {'Jan': {'Average wind speed, m/s': '3.80', 'kWh produced': '5,934'}, - 'Feb': {'Average wind speed, m/s': '3.92', 'kWh produced': '6,357'}, + {'Jan': {'Average wind speed, m/s': '3.80', 'kWh produced': '5,934'}, + 'Feb': {'Average wind speed, m/s': '3.92', 'kWh produced': '6,357'}, 'Mar': {'Average wind speed, m/s': '4.17', 'kWh produced': '7,689'}....} """ schema = self._classify_schema(df) if schema != DatasetSchema.TIMESERIES: - raise ValueError("Monthly averages are only supported for time-series (TIMESERIES) inputs.") + raise ValueError( + "Monthly averages are only supported for time-series (TIMESERIES) inputs." + ) prod_df = self.compute_energy_production_df(df, height, selected_power_curve) - - ws_column = f'windspeed_{height}m' - kw_column = f'windspeed_{height}m_kw' - - work = prod_df.drop(columns=["mohr", "year", "hour"] + [col for col in prod_df.columns if "winddirection" in col],errors="ignore") - - res = work.groupby("month").agg(avg_ws=(ws_column, "mean"), kwh_total=(kw_column, "sum")) - res["kwh_total"] *= 30 / 20.0 # Approximation: 30 days per month, averaged over 20 years - res.rename(columns={"avg_ws": "Average wind speed (m/s)", "kwh_total": "kWh produced"}, inplace=True) + ws_column = f"windspeed_{height}m" + kw_column = f"windspeed_{height}m_kw" + + work = prod_df.drop( + columns=["mohr", "year", "hour"] + + [col for col in prod_df.columns if "winddirection" in col], + errors="ignore", + ) + + res = work.groupby("month").agg( + avg_ws=(ws_column, "mean"), kwh_total=(kw_column, "sum") + ) + res["kwh_total"] *= ( + 30 / 20.0 + ) # Approximation: 30 days per month, averaged over 20 years + + res.rename( + columns={"avg_ws": "Average wind speed (m/s)", "kwh_total": "kWh produced"}, + inplace=True, + ) res.index = pd.Series(res.index).apply(lambda x: calendar.month_abbr[int(x)]) res["kWh produced"] = res["kWh produced"].round().astype(int) - res["Average wind speed (m/s)"] = res["Average wind speed (m/s)"].astype(float).map('{:,.2f}'.format) + res["Average wind speed (m/s)"] = ( + res["Average wind speed (m/s)"].astype(float).map("{:,.2f}".format) + ) - return res.to_dict(orient="index") \ No newline at end of file + return res.to_dict(orient="index") diff --git a/windwatts-api/app/power_curve/powercurve.py b/windwatts-api/app/power_curve/powercurve.py index 6671f67c..342aa856 100644 --- a/windwatts-api/app/power_curve/powercurve.py +++ b/windwatts-api/app/power_curve/powercurve.py @@ -2,21 +2,26 @@ import numpy as np from scipy import interpolate -class PowerCurve(object): +class PowerCurve(object): def __init__(self, power_curve_path): - # Load data and minimal preprocessing if ".xslx" in power_curve_path: self.raw_data = pd.read_excel(power_curve_path) - self.raw_data.rename(columns={"Wind Speed (m/s)": "ws", "Turbine Output": "kw"}, inplace=True) + self.raw_data.rename( + columns={"Wind Speed (m/s)": "ws", "Turbine Output": "kw"}, inplace=True + ) elif ".csv" in power_curve_path: self.raw_data = pd.read_csv(power_curve_path) - self.raw_data.rename(columns={"Wind Speed (m/s)": "ws", "Turbine Output": "kw"}, inplace=True) - #print(self.raw_data.columns) + self.raw_data.rename( + columns={"Wind Speed (m/s)": "ws", "Turbine Output": "kw"}, inplace=True + ) + # print(self.raw_data.columns) else: - raise ValueError("Unsupported powercurve file format (should be .xslx or .csv).") + raise ValueError( + "Unsupported powercurve file format (should be .xslx or .csv)." + ) # Add (0,0) if not there already if self.raw_data["ws"].min() > 0: @@ -29,9 +34,11 @@ def __init__(self, power_curve_path): self.interp_y = self.raw_data.kw # Cubic interpolation - #self.powercurve_intrp = interp1d(self.interp_x, self.interp_y, kind='cubic') + # self.powercurve_intrp = interp1d(self.interp_x, self.interp_y, kind='cubic') # Switched back to linear to avoid bad interpolation with negative values - self.powercurve_intrp = interpolate.interp1d(self.interp_x, self.interp_y, kind='linear') + self.powercurve_intrp = interpolate.interp1d( + self.interp_x, self.interp_y, kind="linear" + ) # Saving a list of instances where windspeeds are higher/lower than what is in the curve self.above_curve = [] @@ -41,23 +48,33 @@ def __init__(self, power_curve_path): self.reset_counters() - def windspeed_to_kw(self, df, ws_column="ws-adjusted", dt_column="datetime", trim=True): - """ Converts wind speed to kw """ - + def windspeed_to_kw( + self, df, ws_column="ws-adjusted", dt_column="datetime", trim=True + ): + """Converts wind speed to kw""" + # by default round down/up values below or under the range of the curve if trim: - ws = df[ws_column].apply(lambda x: 0 if x < 0 else x).apply(lambda x: self.max_ws if x > self.max_ws else x) + ws = ( + df[ws_column] + .apply(lambda x: 0 if x < 0 else x) + .apply(lambda x: self.max_ws if x > self.max_ws else x) + ) else: ws = df[ws_column] - + kw = self.powercurve_intrp(ws) below_curve = df[kw < 0] above_curve = df[kw > self.max_ws] if dt_column in df.columns: - self.below_curve.extend(zip(below_curve[dt_column].tolist(), below_curve[ws_column].tolist())) - self.above_curve.extend(zip(above_curve[dt_column].tolist(), above_curve[ws_column].tolist())) + self.below_curve.extend( + zip(below_curve[dt_column].tolist(), below_curve[ws_column].tolist()) + ) + self.above_curve.extend( + zip(above_curve[dt_column].tolist(), above_curve[ws_column].tolist()) + ) return kw @@ -65,17 +82,20 @@ def reset_counters(self): self.above_curve = [] self.below_curve = [] - def plot(self): - fig = px.line(y=self.powercurve_intrp(self.interp_x), x=self.interp_x, - labels={"x":"Windspeed (m/s)","y":"Power (kW)"}) - fig.add_trace(go.Scatter(y=self.interp_y, x=self.interp_x, - mode='markers', - name='Data')) - fig.show() + # def plot(self): + # fig = px.line( + # y=self.powercurve_intrp(self.interp_x), + # x=self.interp_x, + # labels={"x": "Windspeed (m/s)", "y": "Power (kW)"}, + # ) + # fig.add_trace( + # go.Scatter(y=self.interp_y, x=self.interp_x, mode="markers", name="Data") + # ) + # fig.show() def kw_to_windspeed(self, df, kw_column="output_power_mean"): # Sampling a hundred points from the interpolated function # allows us to invert with an approximate accuracy of 12/100 or 0.1 ws2 = np.linspace(0, 12, num=100) pc2 = self.powercurve_intrp(ws2) - return df[kw_column].map(lambda x: ws2[np.abs(pc2 - x).argmin()] ) + return df[kw_column].map(lambda x: ws2[np.abs(pc2 - x).argmin()]) diff --git a/windwatts-api/app/schemas.py b/windwatts-api/app/schemas.py index e58a9bb1..aa6f4b49 100644 --- a/windwatts-api/app/schemas.py +++ b/windwatts-api/app/schemas.py @@ -9,15 +9,13 @@ ValueMapAlphaNumericNone = Dict[str, AlphaNumericNone] ValueMapNumericList = List[ValueMapNumeric] + # Wind speed response models for different avg_types class GlobalWindSpeedResponse(BaseModel): global_avg: float - model_config = { - "json_schema_extra": { - "example": {"global_avg": 2.1} - } - } + model_config = {"json_schema_extra": {"example": {"global_avg": 2.1}}} + class YearlyWindSpeedResponse(BaseModel): yearly_avg: ValueMapNumericList @@ -27,12 +25,13 @@ class YearlyWindSpeedResponse(BaseModel): "example": { "yearly_avg": [ {"year": 2020, "windspeed_100m": 5.23}, - {"year": 2021, "windspeed_100m": 5.34} + {"year": 2021, "windspeed_100m": 5.34}, ] } } } + class MonthlyWindSpeedResponse(BaseModel): monthly_avg: ValueMapNumericList @@ -42,12 +41,13 @@ class MonthlyWindSpeedResponse(BaseModel): "monthly_avg": [ {"month": 1, "windspeed_100m": 5.12}, {"month": 2, "windspeed_100m": 5.45}, - {"month": 12, "windspeed_100m": 6.10} + {"month": 12, "windspeed_100m": 6.10}, ] } } } + class HourlyWindSpeedResponse(BaseModel): hourly_avg: ValueMapNumericList @@ -57,14 +57,21 @@ class HourlyWindSpeedResponse(BaseModel): "hourly_avg": [ {"hour": 0, "windspeed_100m": 5.12}, {"hour": 2, "windspeed_100m": 5.45}, - {"hour": 10, "windspeed_100m": 6.10} + {"hour": 10, "windspeed_100m": 6.10}, ] } } } + # Union type for wind speed responses - FastAPI will show all examples -WindSpeedResponse = Union[GlobalWindSpeedResponse, YearlyWindSpeedResponse, MonthlyWindSpeedResponse, HourlyWindSpeedResponse] +WindSpeedResponse = Union[ + GlobalWindSpeedResponse, + YearlyWindSpeedResponse, + MonthlyWindSpeedResponse, + HourlyWindSpeedResponse, +] + class AvailablePowerCurvesResponse(BaseModel): available_power_curves: List[str] @@ -80,15 +87,13 @@ class AvailablePowerCurvesResponse(BaseModel): } } + # Energy production response models for different time_periods class AllEnergyProductionResponse(BaseModel): energy_production: Numeric = Field(description="global-averaged kWh produced") - model_config = { - "json_schema_extra": { - "example": {"energy_production": 12345.67} - } - } + model_config = {"json_schema_extra": {"example": {"energy_production": 12345.67}}} + class SummaryEnergyProductionResponse(BaseModel): summary_avg_energy_production: Dict[str, ValueMapAlphaNumericNone] @@ -97,14 +102,27 @@ class SummaryEnergyProductionResponse(BaseModel): "json_schema_extra": { "example": { "summary_avg_energy_production": { - "Lowest year": {"year": 2015, "Average wind speed (m/s)": "5.36", "kWh produced": 202791}, - "Average year": {"year": None, "Average wind speed (m/s)": "5.86", "kWh produced": 267712}, - "Highest year": {"year": 2014, "Average wind speed (m/s)": "6.32", "kWh produced": 326354} + "Lowest year": { + "year": 2015, + "Average wind speed (m/s)": "5.36", + "kWh produced": 202791, + }, + "Average year": { + "year": None, + "Average wind speed (m/s)": "5.86", + "kWh produced": 267712, + }, + "Highest year": { + "year": 2014, + "Average wind speed (m/s)": "6.32", + "kWh produced": 326354, + }, } } } } + class YearlyEnergyProductionResponse(BaseModel): yearly_avg_energy_production: Dict[str, ValueMapAlphaNumeric] @@ -112,13 +130,20 @@ class YearlyEnergyProductionResponse(BaseModel): "json_schema_extra": { "example": { "yearly_avg_energy_production": { - "2001": {"Average wind speed (m/s)": "5.65", "kWh produced": 250117}, - "2002": {"Average wind speed (m/s)": "5.72", "kWh produced": 264044} + "2001": { + "Average wind speed (m/s)": "5.65", + "kWh produced": 250117, + }, + "2002": { + "Average wind speed (m/s)": "5.72", + "kWh produced": 264044, + }, } } } } + class FullEnergyProductionResponse(BaseModel): energy_production: Numeric = Field(description="global-averaged kWh produced") summary_avg_energy_production: Dict[str, ValueMapAlphaNumericNone] @@ -129,17 +154,30 @@ class FullEnergyProductionResponse(BaseModel): "example": { "energy_production": 500, "summary_avg_energy_production": { - "Lowest year": {"year": 2015, "Average wind speed (m/s)": "5.36", "kWh produced": 202791}, - "Average year": {"year": None, "Average wind speed (m/s)": "5.86", "kWh produced": 267712}, - "Highest year": {"year": 2014, "Average wind speed (m/s)": "6.32", "kWh produced": 326354} + "Lowest year": { + "year": 2015, + "Average wind speed (m/s)": "5.36", + "kWh produced": 202791, + }, + "Average year": { + "year": None, + "Average wind speed (m/s)": "5.86", + "kWh produced": 267712, + }, + "Highest year": { + "year": 2014, + "Average wind speed (m/s)": "6.32", + "kWh produced": 326354, + }, }, "yearly_avg_energy_production": { "2001": {"Average wind speed (m/s)": "5.65", "kWh produced": 250117} - } + }, } } } + class MonthlyEnergyProductionResponse(BaseModel): monthly_avg_energy_production: Dict[str, ValueMapAlphaNumeric] @@ -148,23 +186,28 @@ class MonthlyEnergyProductionResponse(BaseModel): "example": { "monthly_avg_energy_production": { "Jan": {"Average wind speed, m/s": "3.80", "kWh produced": "5,934"}, - "Feb": {"Average wind speed, m/s": "3.92", "kWh produced": "6,357"} + "Feb": {"Average wind speed, m/s": "3.92", "kWh produced": "6,357"}, } } } } + # Union type for energy production responses -EnergyProductionResponse = Union[AllEnergyProductionResponse, SummaryEnergyProductionResponse, YearlyEnergyProductionResponse, FullEnergyProductionResponse, MonthlyEnergyProductionResponse] +EnergyProductionResponse = Union[ + AllEnergyProductionResponse, + SummaryEnergyProductionResponse, + YearlyEnergyProductionResponse, + FullEnergyProductionResponse, + MonthlyEnergyProductionResponse, +] + class HealthCheckResponse(BaseModel): status: Literal["up"] = "up" - model_config = { - "json_schema_extra": { - "example": {"status": "up"} - } - } + model_config = {"json_schema_extra": {"example": {"status": "up"}}} + class GridLocation(BaseModel): index: str = Field(..., description="Grid point identifier/index") @@ -173,82 +216,100 @@ class GridLocation(BaseModel): model_config = { "json_schema_extra": { - "example": {"index": "031233", "latitude": 43.653, "longitude": -79.47437700534891} + "example": { + "index": "031233", + "latitude": 43.653, + "longitude": -79.47437700534891, + } } } + class NearestLocationsResponse(BaseModel): locations: List[GridLocation] = Field( - ..., - min_length=1, - max_length=4, - description="List of nearest grid locations (1-4 points)" + ..., + min_length=1, + max_length=4, + description="List of nearest grid locations (1-4 points)", ) model_config = { "json_schema_extra": { "example": { "locations": [ - {"index": "031233", "latitude": 43.653, "longitude": -79.47437700534891}, - {"index": "031234", "latitude": 43.653, "longitude": -79.22437433155213} + { + "index": "031233", + "latitude": 43.653, + "longitude": -79.47437700534891, + }, + { + "index": "031234", + "latitude": 43.653, + "longitude": -79.22437433155213, + }, ] } } } + class TimeseriesBatchRequest(BaseModel): locations: List[GridLocation] = Field( - ..., + ..., min_length=1, - description="List of grid locations to download timeseries data for" + description="List of grid locations to download timeseries data for", ) years: Optional[List[int]] = Field( - None, - description="Years to download (defaults to sample years if not provided)" + None, description="Years to download (defaults to sample years if not provided)" ) source: str = Field( "s3", - description="Data source: athena or s3 (typically s3 for timeseries downloads)" + description="Data source: athena or s3 (typically s3 for timeseries downloads)", ) model_config = { "json_schema_extra": { "example": { "locations": [ - {"index": "031233", "latitude": 43.653, "longitude": -79.47437700534891}, - {"index": "031234", "latitude": 43.653, "longitude": -79.22437433155213} + { + "index": "031233", + "latitude": 43.653, + "longitude": -79.47437700534891, + }, + { + "index": "031234", + "latitude": 43.653, + "longitude": -79.22437433155213, + }, ], "years": [2020, 2021, 2022], - "source": "s3" + "source": "s3", } } } + class ModelInfoResponse(BaseModel): model: str = Field(..., description="Data model name") supported_periods: Dict[str, List[str]] = Field( - default_factory=dict, - description="Supported aggregation periods for windspeed/ production" + default_factory=dict, + description="Supported aggregation periods for windspeed/ production", ) available_years: List[int] = Field( - ..., - description="Available years for timeseries data" + ..., description="Available years for timeseries data" ) available_heights: List[int] = Field( - ..., - description="Supported hub heights (in meters)" + ..., description="Supported hub heights (in meters)" ) grid_info: Dict[str, AlphaNumeric] = Field( default_factory=dict, - description="Metadata about the model grid (bounds, resolution, etc.)" + description="Metadata about the model grid (bounds, resolution, etc.)", ) references: List[str] = Field( - ..., - description="References of relevant publications or documents" + ..., description="References of relevant publications or documents" ) links: List[str] = Field( - ..., - description="Links to data sources or relevant resources" + ..., description="Links to data sources or relevant resources" ) model_config = { "json_schema_extra": { @@ -256,9 +317,21 @@ class ModelInfoResponse(BaseModel): "model": "era5", "supported_periods": { "windspeed": ["all", "annual"], - "production": ["all", "summary", "annual", "full"] + "production": ["all", "summary", "annual", "full"], }, - "available_years":[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023], + "available_years": [ + 2013, + 2014, + 2015, + 2016, + 2017, + 2018, + 2019, + 2020, + 2021, + 2022, + 2023, + ], "available_heights": [30, 40, 50, 60, 80, 100], "grid_info": { "min_lat": 23.402, @@ -266,14 +339,14 @@ class ModelInfoResponse(BaseModel): "max_lat": 51.403, "max_long": -44.224, "spatial_resolution": "31 km", - "temporal_resolution": "1 hour" + "temporal_resolution": "1 hour", }, "links": [ "https://www.ecmwf.int/en/forecasts/dataset/ecmwf-reanalysis-v5" ], "references": [ 'Phillips, C., L. M. Sheridan, P. Conry, D. K. Fytanidis, D. Duplyakin, S. Zisman, N. Duboc, M. Nelson, R. Kotamarthi, R. Linn, M. Broersma, T. Spijkerboer, and H. Tinnesand. 2022. "Evaluation of Obstacle Modelling Approaches for Resource Assessment and Small Wind Turbine Siting: Case Study in the Northern Netherlands." Wind Energy Science 7: 1153-1169. https://doi.org/10.5194/wes-7-1153-2022' - ] + ], } } - } \ No newline at end of file + } diff --git a/windwatts-api/app/utils/data_fetcher_utils.py b/windwatts-api/app/utils/data_fetcher_utils.py index 09fdec7d..2372a719 100644 --- a/windwatts-api/app/utils/data_fetcher_utils.py +++ b/windwatts-api/app/utils/data_fetcher_utils.py @@ -1,5 +1,6 @@ from typing import List + def generate_key(lat: float, lon: float, height: List[int], yearly: bool) -> str: """ Generate a unique key for the database based on the parameters. @@ -13,17 +14,19 @@ def generate_key(lat: float, lon: float, height: List[int], yearly: bool) -> str Returns: str: A unique key for the database. """ - height_str = '_'.join(map(str, height)) + height_str = "_".join(map(str, height)) return f"{lat}_{lon}_{height_str}_{'yearly' if yearly else 'latest'}" + def format_coordinate(coordinate: float) -> str: """Format a coordinate to 3 decimal places, matching JS .toFixed(3).""" return f"{coordinate:.3f}" + def chunker(file_obj, chunk_size: int = 1024 * 1024): """Stream the spooled file in chunks for constant memory usage""" while True: data = file_obj.read(chunk_size) if not data: break - yield data \ No newline at end of file + yield data diff --git a/windwatts-api/app/utils/random_message.py b/windwatts-api/app/utils/random_message.py index 93dfeff8..9d52bc2a 100644 --- a/windwatts-api/app/utils/random_message.py +++ b/windwatts-api/app/utils/random_message.py @@ -5,8 +5,9 @@ "Goodbye, World!", "I am a message!", "This is a message!", - "Random message!" + "Random message!", ] + def random_message(): return random.choice(messages) diff --git a/windwatts-api/app/utils/validation.py b/windwatts-api/app/utils/validation.py index 1020175f..41b1039b 100644 --- a/windwatts-api/app/utils/validation.py +++ b/windwatts-api/app/utils/validation.py @@ -14,7 +14,7 @@ def validate_model(model: str) -> str: if model not in MODEL_CONFIG: raise HTTPException( status_code=400, - detail=f"Invalid model. Must be one of: {list(MODEL_CONFIG.keys())}" + detail=f"Invalid model. Must be one of: {list(MODEL_CONFIG.keys())}", ) return model @@ -25,7 +25,7 @@ def validate_source(model: str, source: str) -> str: if source not in valid_sources: raise HTTPException( status_code=400, - detail=f"Invalid source for {model}. Must be one of: {valid_sources}" + detail=f"Invalid source for {model}. Must be one of: {valid_sources}", ) return source @@ -36,7 +36,7 @@ def validate_period_type(model: str, period_type: str, data_type: str) -> str: if period_type not in valid_periods: raise HTTPException( status_code=400, - detail=f"Invalid period for {model}. Must be one of: {valid_periods}" + detail=f"Invalid period for {model}. Must be one of: {valid_periods}", ) return period_type @@ -46,7 +46,10 @@ def validate_lat(model: str, lat: float) -> float: min_lat = MODEL_CONFIG[model]["grid_info"].get("min_lat") max_lat = MODEL_CONFIG[model]["grid_info"].get("max_lat") if not (min_lat <= lat <= max_lat): - raise HTTPException(status_code=400, detail=f"Latitude for {model} must be between {min_lat} and {max_lat}.") + raise HTTPException( + status_code=400, + detail=f"Latitude for {model} must be between {min_lat} and {max_lat}.", + ) return lat @@ -55,7 +58,10 @@ def validate_lng(model: str, lng: float) -> float: min_lng = MODEL_CONFIG[model]["grid_info"].get("min_long") max_lng = MODEL_CONFIG[model]["grid_info"].get("max_long") if not (min_lng <= lng <= max_lng): - raise HTTPException(status_code=400, detail=f"Longitude for {model} must be between {min_lng} and {max_lng}.") + raise HTTPException( + status_code=400, + detail=f"Longitude for {model} must be between {min_lng} and {max_lng}.", + ) return lng @@ -65,14 +71,14 @@ def validate_height(model: str, height: int) -> int: if valid_heights and height not in valid_heights: raise HTTPException( status_code=400, - detail=f"Invalid height for {model}. Must be one of: {valid_heights}" + detail=f"Invalid height for {model}. Must be one of: {valid_heights}", ) return height def validate_powercurve(powercurve: str) -> str: """Validate power curve name""" - if not re.match(r'^[\w\-.]+$', powercurve): + if not re.match(r"^[\w\-.]+$", powercurve): raise HTTPException(status_code=400, detail="Invalid power curve name.") if powercurve not in power_curve_manager.power_curves: raise HTTPException(status_code=400, detail="Power curve not found.") @@ -86,7 +92,7 @@ def validate_year(year: int, model: str) -> int: year_range = f"{min(valid_years)}-{max(valid_years)}" raise HTTPException( status_code=400, - detail=f"Invalid year for {model}. Currently supporting years {year_range}" + detail=f"Invalid year for {model}. Currently supporting years {year_range}", ) return year @@ -96,6 +102,6 @@ def validate_limit(limit: int) -> int: if not 1 <= limit <= 4: raise HTTPException( status_code=400, - detail="Invalid limit. Currently supporting up to 4 nearest grid points" + detail="Invalid limit. Currently supporting up to 4 nearest grid points", ) return limit diff --git a/windwatts-api/app/utils/wind_data_core.py b/windwatts-api/app/utils/wind_data_core.py index 6b113637..85ac0186 100644 --- a/windwatts-api/app/utils/wind_data_core.py +++ b/windwatts-api/app/utils/wind_data_core.py @@ -17,7 +17,7 @@ validate_source, validate_period_type, validate_powercurve, - validate_year + validate_year, ) from app.power_curve.global_power_curve_manager import power_curve_manager @@ -29,11 +29,11 @@ def get_windspeed_core( height: int, period: str, source: str, - data_fetcher_router: DataFetcherRouter + data_fetcher_router: DataFetcherRouter, ): """ Core function to retrieve wind speed data from the source database. - + Args: model (str): Data model (era5, wtk, ensemble). lat (float): Latitude of the location. @@ -53,13 +53,8 @@ def get_windspeed_core( source = validate_source(model, source) period = validate_period_type(model, period, "windspeed") - params = { - "lat": lat, - "lng": lng, - "height": height, - "period": period - } - + params = {"lat": lat, "lng": lng, "height": height, "period": period} + key = f"{source}_{model}" data = data_fetcher_router.fetch_data(params, key=key) if data is None: @@ -75,11 +70,11 @@ def get_production_core( powercurve: str, period: str, source: str, - data_fetcher_router: DataFetcherRouter + data_fetcher_router: DataFetcherRouter, ): """ Core function to retrieve energy production data. - + Args: model (str): Data model (era5, wtk, ensemble). lat (float): Latitude of the location. @@ -102,84 +97,110 @@ def get_production_core( period = validate_period_type(model, period, "production") # Always fetch raw data for production calculations - params = { - "lat": lat, - "lng": lng, - "height": height - } - + params = {"lat": lat, "lng": lng, "height": height} + key = f"{source}_{model}" df = data_fetcher_router.fetch_raw(params, key=key) if df is None: raise HTTPException(status_code=404, detail="Data not found") - - if period == 'all': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, powercurve) - return {"energy_production": summary_avg_energy_production['Average year']['kWh produced']} - - elif period == 'summary': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, powercurve) + + if period == "all": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, powercurve + ) + ) + return { + "energy_production": summary_avg_energy_production["Average year"][ + "kWh produced" + ] + } + + elif period == "summary": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, powercurve + ) + ) return {"summary_avg_energy_production": summary_avg_energy_production} - - elif period == 'annual': - yearly_avg_energy_production = power_curve_manager.calculate_yearly_energy_production(df, height, powercurve) + + elif period == "annual": + yearly_avg_energy_production = ( + power_curve_manager.calculate_yearly_energy_production( + df, height, powercurve + ) + ) return {"yearly_avg_energy_production": yearly_avg_energy_production} - - elif period == 'monthly': - monthly_avg_energy_production = power_curve_manager.calculate_monthly_energy_production(df, height, powercurve) + + elif period == "monthly": + monthly_avg_energy_production = ( + power_curve_manager.calculate_monthly_energy_production( + df, height, powercurve + ) + ) return {"monthly_avg_energy_production": monthly_avg_energy_production} - - elif period == 'full': - summary_avg_energy_production = power_curve_manager.calculate_energy_production_summary(df, height, powercurve) - yearly_avg_energy_production = power_curve_manager.calculate_yearly_energy_production(df, height, powercurve) + + elif period == "full": + summary_avg_energy_production = ( + power_curve_manager.calculate_energy_production_summary( + df, height, powercurve + ) + ) + yearly_avg_energy_production = ( + power_curve_manager.calculate_yearly_energy_production( + df, height, powercurve + ) + ) return { - "energy_production": summary_avg_energy_production['Average year']['kWh produced'], + "energy_production": summary_avg_energy_production["Average year"][ + "kWh produced" + ], "summary_avg_energy_production": summary_avg_energy_production, - "yearly_avg_energy_production": yearly_avg_energy_production - } + "yearly_avg_energy_production": yearly_avg_energy_production, + } + def get_timeseries_core( model: str, gridIndices: List[str], years: List[int], source: str, - data_fetcher_router: DataFetcherRouter + data_fetcher_router: DataFetcherRouter, ): """ Core function to retrieve timeseries data for download. - + Args: model (str): Data model (era5, wtk, ensemble). gridIndices (List[str]): List of grid indices to retrieve. years (List[int]): List of years to retrieve, default to sample years. source (str): Source of the data. data_fetcher_router: Router instance for fetching data. - + Returns: str: CSV content as string. """ from io import StringIO from app.config.model_config import MODEL_CONFIG - + model = validate_model(model) source = validate_source(model, source) - + if years is None: years = MODEL_CONFIG[model]["years"].get("sample", []) - + years = [validate_year(year, model) for year in years] - - params = { - "gridIndices": gridIndices, - "years": years - } - + + params = {"gridIndices": gridIndices, "years": years} + key = f"{source}_{model}" df = data_fetcher_router.fetch_data(params, key=key) if df is None or df.empty: - raise HTTPException(status_code=404, detail="No data found for the specified parameters") - + raise HTTPException( + status_code=404, detail="No data found for the specified parameters" + ) + # Convert DataFrame to CSV string csv_io = StringIO() df.to_csv(csv_io, index=False) diff --git a/windwatts-api/cache_manager.py b/windwatts-api/cache_manager.py index 89f84d25..dee84ed2 100644 --- a/windwatts-api/cache_manager.py +++ b/windwatts-api/cache_manager.py @@ -1,8 +1,9 @@ from cachetools import cached, TTLCache + class CacheManager: def __init__(self, maxsize=100, ttl=300): self.cache = TTLCache(maxsize=maxsize, ttl=ttl) def cache_data(self, func): - return cached(self.cache)(func) \ No newline at end of file + return cached(self.cache)(func) diff --git a/windwatts-api/requirements-dev.txt b/windwatts-api/requirements-dev.txt index e079f8a6..71cc5395 100644 --- a/windwatts-api/requirements-dev.txt +++ b/windwatts-api/requirements-dev.txt @@ -1 +1,2 @@ pytest +ruff diff --git a/windwatts-api/scripts/generate_openapi.py b/windwatts-api/scripts/generate_openapi.py deleted file mode 100644 index 68bee1eb..00000000 --- a/windwatts-api/scripts/generate_openapi.py +++ /dev/null @@ -1,33 +0,0 @@ -import json -import os -import sys -from pathlib import Path - - -def main() -> None: - # Ensure heavy data initializers in controllers are skipped in local runs - os.environ.setdefault("SKIP_DATA_INIT", "1") - - # Add repo root to PYTHONPATH so `import app` works when executing directly - repo_root = Path(__file__).resolve().parents[1] - if str(repo_root) not in sys.path: - sys.path.insert(0, str(repo_root)) - - from app.main import app - - openapi_schema = app.openapi() - - # Write to a deterministic location inside the repo so it can be served or consumed - output_dir = Path(__file__).resolve().parent.parent / "app" / "static" / "docs" - output_dir.mkdir(parents=True, exist_ok=True) - output_path = output_dir / "openapi.json" - with output_path.open("w", encoding="utf-8") as fp: - json.dump(openapi_schema, fp, indent=2) - - print(f"OpenAPI schema written to {output_path}") - - -if __name__ == "__main__": - main() - - diff --git a/windwatts-api/tests/test_config_manager_env.py b/windwatts-api/tests/test_config_manager_env.py index 393a8492..c0dde694 100644 --- a/windwatts-api/tests/test_config_manager_env.py +++ b/windwatts-api/tests/test_config_manager_env.py @@ -2,22 +2,22 @@ from app.config_manager import ConfigManager # Set required top-level environment variables -os.environ['REGION_NAME'] = 'us-west-2' -os.environ['OUTPUT_LOCATION'] = 's3://test-bucket/' -os.environ['OUTPUT_BUCKET'] = 'test-bucket' -os.environ['DATABASE'] = 'test_database' -os.environ['ATHENA_WORKGROUP'] = 'test_workgroup' +os.environ["REGION_NAME"] = "us-west-2" +os.environ["OUTPUT_LOCATION"] = "s3://test-bucket/" +os.environ["OUTPUT_BUCKET"] = "test-bucket" +os.environ["DATABASE"] = "test_database" +os.environ["ATHENA_WORKGROUP"] = "test_workgroup" # Set environment variables for two sources: wtk and era5 -os.environ['SOURCES_WTK_BUCKET_NAME'] = 'wtk-bucket' -os.environ['SOURCES_WTK_ATHENA_TABLE_NAME'] = 'wtk_table' -os.environ['SOURCES_WTK_ALT_ATHENA_TABLE_NAME'] = 'wtk_alt_table' -os.environ['SOURCES_ERA5_BUCKET_NAME'] = 'era5-bucket' -os.environ['SOURCES_ERA5_ATHENA_TABLE_NAME'] = 'era5_table' +os.environ["SOURCES_WTK_BUCKET_NAME"] = "wtk-bucket" +os.environ["SOURCES_WTK_ATHENA_TABLE_NAME"] = "wtk_table" +os.environ["SOURCES_WTK_ALT_ATHENA_TABLE_NAME"] = "wtk_alt_table" +os.environ["SOURCES_ERA5_BUCKET_NAME"] = "era5-bucket" +os.environ["SOURCES_ERA5_ATHENA_TABLE_NAME"] = "era5_table" # alt_athena_table_name is optional # Instantiate ConfigManager (no secret ARN, no local file) -cm = ConfigManager(secret_arn_env_var='DUMMY_SECRET_ARN', local_config_path=None) +cm = ConfigManager(secret_arn_env_var="DUMMY_SECRET_ARN", local_config_path=None) # Get config path config_path = cm.get_config() @@ -25,4 +25,4 @@ # Print the contents for verification with open(config_path) as f: - print(f.read()) \ No newline at end of file + print(f.read()) diff --git a/windwatts-api/tests/test_random_controller.py b/windwatts-api/tests/test_random_controller.py index 162f2cad..6ddeb61f 100644 --- a/windwatts-api/tests/test_random_controller.py +++ b/windwatts-api/tests/test_random_controller.py @@ -4,24 +4,28 @@ client = TestClient(app) + def test_read_root(): response = client.get("/random") json = response.json() assert response.status_code == 200 assert "message" in json and json["message"] in messages + def test_read_chuck(): response = client.get("/random/chuck") json = response.json() assert response.status_code == 200 assert "joke" in json + def test_read_chuck_category(): response = client.get("/random/chuck/dev") json = response.json() assert response.status_code == 200 assert "joke" in json + def test_read_chuck_category_invalid(): response = client.get("/random/chuck/invalid") json = response.json() diff --git a/windwatts-api/tests/test_v1_api.py b/windwatts-api/tests/test_v1_api.py index 538ae78c..0a6463bd 100644 --- a/windwatts-api/tests/test_v1_api.py +++ b/windwatts-api/tests/test_v1_api.py @@ -2,31 +2,37 @@ Tests for V1 API endpoints. """ + from fastapi.testclient import TestClient from app.main import app client = TestClient(app) + class TestV1WindspeedEndpoints: """Test windspeed endpoints for all models.""" - + def test_era5_windspeed_default(self): """Test ERA5 windspeed with default period.""" response = client.get("/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40") assert response.status_code == 200 json = response.json() assert "global_avg" in json - + def test_era5_windspeed_all(self): """Test ERA5 windspeed with period=all.""" - response = client.get("/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40&period=all") + response = client.get( + "/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40&period=all" + ) assert response.status_code == 200 json = response.json() assert "global_avg" in json - + def test_era5_windspeed_annual(self): """Test ERA5 windspeed with period=annual.""" - response = client.get("/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40&period=annual") + response = client.get( + "/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40&period=annual" + ) assert response.status_code == 200 json = response.json() assert isinstance(json, dict) @@ -34,35 +40,39 @@ def test_era5_windspeed_annual(self): if json: first_key = list(json.keys())[0] assert first_key.isdigit() or first_key == "2013" # Year format - + def test_wtk_windspeed_default(self): """Test WTK windspeed with default period.""" response = client.get("/api/v1/wtk/windspeed?lat=40.0&lng=-100.0&height=80") assert response.status_code == 200 json = response.json() assert "global_avg" in json - + def test_ensemble_windspeed(self): """Test ensemble windspeed.""" response = client.get("/api/v1/ensemble/windspeed?lat=40.0&lng=-70.0&height=40") assert response.status_code == 200 json = response.json() assert "global_avg" in json - + def test_windspeed_invalid_model(self): """Test windspeed with invalid model.""" - response = client.get("/api/v1/invalid_model/windspeed?lat=40.0&lng=-70.0&height=40") + response = client.get( + "/api/v1/invalid_model/windspeed?lat=40.0&lng=-70.0&height=40" + ) assert response.status_code == 400 - + def test_windspeed_invalid_period(self): """Test windspeed with invalid period.""" - response = client.get("/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40&period=invalid") + response = client.get( + "/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=40&period=invalid" + ) assert response.status_code == 400 class TestV1ProductionEndpoints: """Test production endpoints for all models.""" - + def test_era5_production_all(self): """Test ERA5 production with period=all.""" response = client.get( @@ -72,7 +82,7 @@ def test_era5_production_all(self): json = response.json() assert "energy_production" in json assert isinstance(json["energy_production"], (int, float)) - + def test_era5_production_summary(self): """Test ERA5 production with period=summary.""" response = client.get( @@ -81,7 +91,7 @@ def test_era5_production_summary(self): assert response.status_code == 200 json = response.json() assert "energy_production" in json - + def test_era5_production_annual(self): """Test ERA5 production with period=annual.""" response = client.get( @@ -90,7 +100,7 @@ def test_era5_production_annual(self): assert response.status_code == 200 json = response.json() assert "yearly_avg_energy_production" in json - + def test_era5_production_full(self): """Test ERA5 production with period=full.""" response = client.get( @@ -101,7 +111,7 @@ def test_era5_production_full(self): assert "energy_production" in json assert "summary_avg_energy_production" in json assert "yearly_avg_energy_production" in json - + def test_wtk_production(self): """Test WTK production.""" response = client.get( @@ -110,7 +120,7 @@ def test_wtk_production(self): assert response.status_code == 200 json = response.json() assert "energy_production" in json - + def test_ensemble_production(self): """Test ensemble production (only supports period=all).""" response = client.get( @@ -119,14 +129,14 @@ def test_ensemble_production(self): assert response.status_code == 200 json = response.json() assert "energy_production" in json - + def test_ensemble_production_full_fails(self): """Test ensemble production with period=full should fail.""" response = client.get( "/api/v1/ensemble/production?lat=40.0&lng=-70.0&height=40&powercurve=nlr-reference-100kW&period=full" ) assert response.status_code == 400 - + def test_production_invalid_powercurve(self): """Test production with invalid power curve.""" response = client.get( @@ -137,7 +147,7 @@ def test_production_invalid_powercurve(self): class TestV1PowerCurves: """Test power curves endpoint.""" - + def test_get_powercurves(self): """Test getting available power curves.""" response = client.get("/api/v1/powercurves") @@ -152,7 +162,7 @@ def test_get_powercurves(self): class TestV1GridPoints: """Test grid points endpoints.""" - + def test_era5_grid_points(self): """Test ERA5 grid points lookup.""" response = client.get("/api/v1/era5/grid-points?lat=40.0&lng=-70.0&limit=1") @@ -164,14 +174,14 @@ def test_era5_grid_points(self): assert "index" in json["locations"][0] assert "latitude" in json["locations"][0] assert "longitude" in json["locations"][0] - + def test_grid_points_multiple_neighbors(self): """Test grid points with multiple neighbors.""" response = client.get("/api/v1/era5/grid-points?lat=40.0&lng=-70.0&limit=4") assert response.status_code == 200 json = response.json() assert len(json["locations"]) == 4 - + def test_wtk_grid_points(self): """Test WTK grid points lookup.""" response = client.get("/api/v1/wtk/grid-points?lat=40.0&lng=-100.0&limit=1") @@ -182,22 +192,22 @@ def test_wtk_grid_points(self): class TestV1Validation: """Test parameter validation.""" - + def test_invalid_latitude(self): """Test with invalid latitude.""" response = client.get("/api/v1/era5/windspeed?lat=100.0&lng=-70.0&height=40") assert response.status_code == 400 - + def test_invalid_longitude(self): """Test with invalid longitude.""" response = client.get("/api/v1/era5/windspeed?lat=40.0&lng=-200.0&height=40") assert response.status_code == 400 - + def test_invalid_height(self): """Test with invalid height.""" response = client.get("/api/v1/era5/windspeed?lat=40.0&lng=-70.0&height=500") assert response.status_code == 400 - + def test_missing_required_params(self): """Test with missing required parameters.""" response = client.get("/api/v1/era5/windspeed") @@ -206,7 +216,7 @@ def test_missing_required_params(self): class TestV1Info: """Test info endpoint.""" - + def test_era5_info(self): """Test ERA5 info endpoint.""" response = client.get("/api/v1/era5/info") @@ -216,14 +226,14 @@ def test_era5_info(self): assert json["model"] == "era5" assert "sources" in json assert "heights" in json - + def test_wtk_info(self): """Test WTK info endpoint.""" response = client.get("/api/v1/wtk/info") assert response.status_code == 200 json = response.json() assert json["model"] == "wtk" - + def test_ensemble_info(self): """Test ensemble info endpoint.""" response = client.get("/api/v1/ensemble/info") @@ -234,4 +244,5 @@ def test_ensemble_info(self): if __name__ == "__main__": import pytest + pytest.main([__file__, "-v"]) diff --git a/windwatts-ui/README.md b/windwatts-ui/README.md index 9e6348ba..98d7140f 100644 --- a/windwatts-ui/README.md +++ b/windwatts-ui/README.md @@ -1,7 +1,8 @@ +# WindWatts UI (Frontend) -# WindWatts Frontend App +This directory contains the React frontend for WindWatts, built with Vite and Material UI. -This project is a React application built with Vite, using Yarn as the package manager and Material-UI for the UI components. +For development instructions, setup guides, and architecture details, please see the **[Frontend Documentation](../docs/04-frontend.md)**. ## Prerequisites @@ -17,12 +18,12 @@ To get a local copy up and running, follow these steps: 1. Clone the repository: ```bash - git clone https://github.com/NREL/dw-tap-api.git + git clone https://github.com/NREL/windwatts.git ``` 2. Navigate to the project directory: ```bash - cd frontend + cd windwatts-ui ``` 3. Install the dependencies: @@ -32,7 +33,7 @@ To get a local copy up and running, follow these steps: ### Configure Environment Files -Create two environment files, `.env.development` and `.env.production`, in the root of the project directory (./frontend/). These files will contain the necessary environment variables for the development and production environments. +Create two environment files, `.env.development` and `.env.production`, in the root of the project directory (./windwatts-ui/). These files will contain the necessary environment variables for the development and production environments. #### .env.development ```shell @@ -43,7 +44,7 @@ VITE_MAP_ID=YOUR_MAP_ID #### .env.production ```shell -VITE_API_BASE_URL=https://dw-tap.nrel.gov/ +VITE_API_BASE_URL=https://windwatts.nrel.gov/ VITE_MAP_API_KEY=YOUR_GOOGLE_MAPS_API_KEY VITE_MAP_ID=YOUR_MAP_ID ``` diff --git a/windwatts-ui/package.json b/windwatts-ui/package.json index 6d275692..51ad83bc 100644 --- a/windwatts-ui/package.json +++ b/windwatts-ui/package.json @@ -8,7 +8,8 @@ "build": "tsc -b && vite build", "lint": "eslint .", "test": "vitest", - "format": "prettier --check --ignore-path .gitignore \"**/*.+(js|jsx|ts|tsx|json)\"", + "format": "prettier --write --ignore-path .gitignore \"**/*.+(js|jsx|ts|tsx|json)\"", + "check-format": "prettier --check --ignore-path .gitignore \"**/*.+(js|jsx|ts|tsx|json)\"", "preview": "vite preview" }, "dependencies": { diff --git a/windwatts-ui/src/components/shared/OutOfBoundsWarning.tsx b/windwatts-ui/src/components/shared/OutOfBoundsWarning.tsx index 3ba6d3f8..05d13c28 100644 --- a/windwatts-ui/src/components/shared/OutOfBoundsWarning.tsx +++ b/windwatts-ui/src/components/shared/OutOfBoundsWarning.tsx @@ -33,7 +33,7 @@ export function OutOfBoundsWarning({ message }: { message: string }) { = { label: "ERA5", source_href: "https://www.ecmwf.int/en/forecasts/dataset/ecmwf-reanalysis-v5", - help_href: - "https://github.com/NREL/dw-tap-api/blob/main/docs/about/era5.md", + help_href: "https://github.com/NREL/windwatts/blob/main/docs/about/era5.md", description: "ERA5 (ECMWF Reanalysis v5) Dataset", year_range: "2020-2023", wind_speed_heights: ["10m", "30m", "40m", "50m", "60m", "80m", "100m"],