Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5a163ca
ruff changes
yawenzzzz Oct 17, 2024
7877e59
add api main
yawenzzzz Oct 17, 2024
c400985
change number of workers to 1
yawenzzzz Oct 17, 2024
8f05ca3
merge conflict
yawenzzzz Oct 17, 2024
7b53fdb
merge conflict
yawenzzzz Oct 17, 2024
2bfc38e
merge conflict
yawenzzzz Oct 17, 2024
19421b5
minor change
yawenzzzz Oct 16, 2024
02ab349
fastapi is working now
yawenzzzz Oct 17, 2024
8c2136b
ruff change
yawenzzzz Oct 17, 2024
f7081e4
ruff
yawenzzzz Oct 18, 2024
b64ab97
minor changes
yawenzzzz Oct 18, 2024
cbdeec9
update dockerfile
yawenzzzz Oct 18, 2024
9df535d
set use_initial_job = True
yawenzzzz Oct 18, 2024
a1b6d1e
move example to scripts folder
yawenzzzz Oct 19, 2024
8691f53
changes based on comments
yawenzzzz Oct 21, 2024
1d7d4d6
uncomment restore config
yawenzzzz Oct 21, 2024
164e895
Add test for Landsat prediction pipeline
favyen2 Oct 21, 2024
0babbbe
add aws credentials
favyen2 Oct 21, 2024
12d6a5b
fix
favyen2 Oct 21, 2024
67f29b5
makes path arguments as optional
yawenzzzz Oct 21, 2024
3f546bc
add gcs access
favyen2 Oct 21, 2024
2fd1847
make sure either scene_id or image_files are provided
yawenzzzz Oct 21, 2024
5a1149c
write crops only if specified path
yawenzzzz Oct 21, 2024
3a9cad1
Merge branch 'pbeukema/fastapi' of github.com:allenai/rslearn_project…
yawenzzzz Oct 21, 2024
ee16650
fix bug with b8_fname not defined and tmp_dir.cleanup not exist
favyen2 Oct 21, 2024
dd674b9
fix
uakfdotb Oct 22, 2024
ec987af
fix
uakfdotb Oct 22, 2024
d505f18
don't run slow test in CI
uakfdotb Oct 22, 2024
c0396a3
Merge remote-tracking branch 'origin/master' into pbeukema/fastapi
favyen2 Oct 22, 2024
e2790b7
fix lint issues
favyen2 Oct 22, 2024
eb0b5a5
minor change
yawenzzzz Oct 22, 2024
850429c
Merge branch 'pbeukema/fastapi' of github.com:allenai/rslearn_project…
yawenzzzz Oct 22, 2024
b1cea89
reduce batch_size to resolve the error: received 0 items of ancdata
yawenzzzz Oct 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ RUN pip install -r /opt/rslearn_projects/rslearn/extra_requirements.txt
COPY requirements.txt /opt/rslearn_projects/requirements.txt
RUN pip install -r /opt/rslearn_projects/requirements.txt

# We can use `pip install rslearn[extra]` now
# We need rslp to be pip installed as well

ENV PYTHONPATH="${PYTHONPATH}:/opt/rslearn_projects/rslearn:."
Expand Down
31 changes: 31 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,34 @@ version: "3.9"
services:
test:
build: .

# Define the base image
base-image:
build:
context: .
dockerfile: Dockerfile
image: base-image:latest # Tag it as "base-image"

# Define the landsat-vessels service
landsat-vessels:
build:
context: ./rslp/landsat_vessels
dockerfile: Dockerfile
shm_size: '10G' # This adds the shared memory size
depends_on:
- base-image
ports:
- "5555:5555"
environment:
- RSLP_BUCKET
- S3_ACCESS_KEY_ID
- S3_SECRET_ACCESS_KEY
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible
deploy:
resources:
reservations:
devices:
- capabilities: [gpu] # Ensure this service can access GPUs
runtime: nvidia # Use the NVIDIA runtime
8 changes: 6 additions & 2 deletions one_off_projects/minderoo/classify/add_train_fp_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
from rslearn.utils.vector_format import GeojsonVectorFormat

if __name__ == "__main__":
json_fname = "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/20240811_annotations.json"
big_crop_dir = "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/annotate_crops/"
json_fname = (
"/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/20240811_annotations.json"
)
big_crop_dir = (
"/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/annotate_crops/"
)
out_dir = "/multisat/datasets/dvim/rslearn_classify/"

# We are just using fake projection for training this model.
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
beaker-py
python-dotenv
pytest
uvicorn
fastapi
pydantic
typing-extensions
ruff
12 changes: 12 additions & 0 deletions rslp/landsat_vessels/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Base image
FROM base-image:latest

# Environment variables
ENV PYTHONPATH="/opt/rslearn_projects:${PYTHONPATH}"
ENV LANDSAT_PORT=5555

# Make port 5555 available to the world outside this container
EXPOSE $LANDSAT_PORT

# Run app.py when the container launches
CMD ["python3", "rslp/landsat_vessels/api_main.py"]
99 changes: 99 additions & 0 deletions rslp/landsat_vessels/api_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Landsat Vessel Detection Service."""

from __future__ import annotations

import logging
import multiprocessing
import os

import uvicorn
from fastapi import FastAPI, Response
from pydantic import BaseModel
from typing_extensions import TypedDict

from rslp.landsat_vessels import predict_pipeline

app = FastAPI()

# Set up the logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

LANDSAT_HOST = "0.0.0.0"
LANDSAT_PORT = 5555


class FormattedPrediction(TypedDict):
"""Formatted prediction for a single vessel detection."""

latitude: float
longitude: float
score: float
rgb_fname: str
b8_fname: str


class LandsatResponse(BaseModel):
"""Response object for vessel detections."""

status: list[str]
predictions: list[FormattedPrediction]


class LandsatRequest(BaseModel):
"""Request object for vessel detections."""

scene_id: str | None = None
image_files: dict[str, str] | None = None
crop_path: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be nice for crop_path to be optional as well. If you don't make it optional then in the test I added, crop_path should be changed to be a subdirectory of the scratch_path, otherwise it writes the crops in the current directory.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Also leave it as None in the test, so that we can query API just with a scene_id.

scratch_path: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think scratch_path is fine to be required.

json_path: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

json_path should be optional since Skylight can read the JSON from the response body.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, made json_path optional now.



@app.on_event("startup")
async def rslp_init() -> None:
"""Landsat Vessel Service Initialization."""
logger.info("Initializing")
multiprocessing.set_start_method("forkserver", force=True)
multiprocessing.set_forkserver_preload(
[
"rslp.utils.rslearn.materialize_dataset",
"rslp.utils.rslearn.run_model_predict",
]
)


@app.get("/")
async def home() -> dict:
"""Returns a simple message to indicate the service is running."""
return {"message": "Landsat Detections App"}


@app.post("/detections", response_model=LandsatResponse)
async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse:
"""Returns vessel detections Response object for a given Request object."""
try:
logger.info(f"Received request with scene_id: {info.scene_id}")
json_data = predict_pipeline(
crop_path=info.crop_path,
scene_id=info.scene_id,
image_files=info.image_files,
scratch_path=info.scratch_path,
json_path=info.json_path,
)
return LandsatResponse(status=["success"], predictions=json_data)
except ValueError as e:
logger.error(f"Value error during prediction pipeline: {e}")
return LandsatResponse(status=["error"], predictions=[])
except Exception as e:
logger.error(f"Unexpected error during prediction pipeline: {e}")
return LandsatResponse(status=["error"], predictions=[])


if __name__ == "__main__":
uvicorn.run(
"api_main:app",
host=os.getenv("LANDSAT_HOST", default="0.0.0.0"),
port=int(os.getenv("LANDSAT_PORT", default=5555)),
proxy_headers=True,
)
41 changes: 41 additions & 0 deletions rslp/landsat_vessels/example_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Use this script to inference the API with locally stored data."""

import json
import os

import requests

PORT = os.getenv("LANDSAT_PORT", default=5555)
LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections"
TIMEOUT_SECONDS = 60000
SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2"
CROP_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_crops"
SCRATCH_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_scratch"
JSON_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/vessels.json"


def sample_request() -> None:
"""Sample request for files stored locally."""
REQUEST_BODY = {
"scene_id": SCENE_ID,
"crop_path": CROP_PATH,
"scratch_path": SCRATCH_PATH,
"json_path": JSON_PATH,
"image_files": None,
}

response = requests.post(
LANDSAT_ENDPOINT, json=REQUEST_BODY, timeout=TIMEOUT_SECONDS
)
output_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "response.json"
)
if response.ok:
with open(output_filename, "w") as outfile:
json.dump(response.json(), outfile)
else:
print(f"Error: {response.status_code} - {response.text}")


if __name__ == "__main__":
sample_request()
39 changes: 33 additions & 6 deletions rslp/landsat_vessels/predict_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Landsat vessel prediction pipeline."""

import json
import time
from datetime import datetime, timedelta
from typing import Any

import numpy as np
import rasterio
Expand Down Expand Up @@ -181,12 +183,12 @@ def run_classifier(


def predict_pipeline(
scratch_path: str,
json_path: str,
crop_path: str,
scratch_path: str | None = None,
json_path: str | None = None,
image_files: dict[str, str] | None = None,
scene_id: str | None = None,
):
) -> dict[str, Any]:
"""Run the Landsat vessel prediction pipeline.

This inputs a Landsat scene (consisting of per-band GeoTIFFs) and produces the
Expand All @@ -202,6 +204,20 @@ def predict_pipeline(
scene_id: Landsat scene ID. Exactly one of image_files or scene_id should be
specified.
"""
# if not scratch_path:
# tmp_dir = TemporaryDirectory()
# scratch_path = tmp_dir.name
# else:
# tmp_dir = None

print(f"scratch_path: {scratch_path}")
print(f"crop_path: {crop_path}")
print(f"json_path: {json_path}")
print(f"image_files: {image_files}")
print(f"scene_id: {scene_id}")

start_time = time.time() # Start the timer

ds_path = UPath(scratch_path)
ds_path.mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -261,14 +277,16 @@ def predict_pipeline(
)

# Run pipeline.
print("get vessel detections")
detections = get_vessel_detections(
ds_path, projection, scene_bounds, time_range=time_range
)
print("run classifier")
detections = run_classifier(ds_path, detections, time_range=time_range)

# Write JSON and crops.
json_path = UPath(json_path)
crop_path = UPath(crop_path)
crop_path.mkdir(parents=True, exist_ok=True)

json_data = []
for idx, detection in enumerate(detections):
Expand Down Expand Up @@ -326,5 +344,14 @@ def predict_pipeline(
)
)

with json_path.open("w") as f:
json.dump(json_data, f)
# if tmp_dir:
# tmp_dir.cleanup()
elapsed_time = time.time() - start_time # Calculate elapsed time
print(f"Prediction pipeline completed in {elapsed_time:.2f} seconds")

if json_path:
json_path = UPath(json_path)
with json_path.open("w") as f:
json.dump(json_data, f)

return json_data
2 changes: 2 additions & 0 deletions rslp/utils/rslearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int =
dataset,
workers=workers,
group=group,
use_initial_job=False,
)
apply_on_windows(
MaterializeHandler(),
dataset,
workers=workers,
group=group,
use_initial_job=False,
)


Expand Down
Loading