Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5a163ca
ruff changes
yawenzzzz Oct 17, 2024
7877e59
add api main
yawenzzzz Oct 17, 2024
c400985
change number of workers to 1
yawenzzzz Oct 17, 2024
8f05ca3
merge conflict
yawenzzzz Oct 17, 2024
7b53fdb
merge conflict
yawenzzzz Oct 17, 2024
2bfc38e
merge conflict
yawenzzzz Oct 17, 2024
19421b5
minor change
yawenzzzz Oct 16, 2024
02ab349
fastapi is working now
yawenzzzz Oct 17, 2024
8c2136b
ruff change
yawenzzzz Oct 17, 2024
f7081e4
ruff
yawenzzzz Oct 18, 2024
b64ab97
minor changes
yawenzzzz Oct 18, 2024
cbdeec9
update dockerfile
yawenzzzz Oct 18, 2024
9df535d
set use_initial_job = True
yawenzzzz Oct 18, 2024
a1b6d1e
move example to scripts folder
yawenzzzz Oct 19, 2024
8691f53
changes based on comments
yawenzzzz Oct 21, 2024
1d7d4d6
uncomment restore config
yawenzzzz Oct 21, 2024
164e895
Add test for Landsat prediction pipeline
favyen2 Oct 21, 2024
0babbbe
add aws credentials
favyen2 Oct 21, 2024
12d6a5b
fix
favyen2 Oct 21, 2024
67f29b5
makes path arguments as optional
yawenzzzz Oct 21, 2024
3f546bc
add gcs access
favyen2 Oct 21, 2024
2fd1847
make sure either scene_id or image_files are provided
yawenzzzz Oct 21, 2024
5a1149c
write crops only if specified path
yawenzzzz Oct 21, 2024
3a9cad1
Merge branch 'pbeukema/fastapi' of github.com:allenai/rslearn_project…
yawenzzzz Oct 21, 2024
ee16650
fix bug with b8_fname not defined and tmp_dir.cleanup not exist
favyen2 Oct 21, 2024
dd674b9
fix
uakfdotb Oct 22, 2024
ec987af
fix
uakfdotb Oct 22, 2024
d505f18
don't run slow test in CI
uakfdotb Oct 22, 2024
c0396a3
Merge remote-tracking branch 'origin/master' into pbeukema/fastapi
favyen2 Oct 22, 2024
e2790b7
fix lint issues
favyen2 Oct 22, 2024
eb0b5a5
minor change
yawenzzzz Oct 22, 2024
850429c
Merge branch 'pbeukema/fastapi' of github.com:allenai/rslearn_project…
yawenzzzz Oct 22, 2024
b1cea89
reduce batch_size to resolve the error: received 0 items of ancdata
yawenzzzz Oct 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
FROM pytorch/pytorch:2.4.0-cuda11.8-cudnn9-runtime@sha256:58a28ab734f23561aa146fbaf777fb319a953ca1e188832863ed57d510c9f197

# TEMPORARY Until RSLEARN Is Public
ARG GIT_USERNAME
ARG GIT_TOKEN

RUN apt update
RUN apt install -y libpq-dev ffmpeg libsm6 libxext6 git
RUN git clone https://${GIT_USERNAME}:${GIT_TOKEN}@github.com/allenai/rslearn.git /opt/rslearn_projects/rslearn
RUN git clone https://github.com/allenai/rslearn.git /opt/rslearn_projects/rslearn
RUN pip install -r /opt/rslearn_projects/rslearn/requirements.txt
RUN pip install -r /opt/rslearn_projects/rslearn/extra_requirements.txt
COPY requirements.txt /opt/rslearn_projects/requirements.txt
Expand Down
2 changes: 2 additions & 0 deletions landsat/recheck_landsat_labels/phase123_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ data:
allow_invalid: true
skip_unknown_categories: true
prob_property: "prob"
positive_class: "correct"
positive_class_threshold: 0.85
input_mapping:
class:
label: "targets"
Expand Down
8 changes: 6 additions & 2 deletions one_off_projects/minderoo/classify/add_train_fp_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
from rslearn.utils.vector_format import GeojsonVectorFormat

if __name__ == "__main__":
json_fname = "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/20240811_annotations.json"
big_crop_dir = "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/annotate_crops/"
json_fname = (
"/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/20240811_annotations.json"
)
big_crop_dir = (
"/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/annotate_crops/"
)
out_dir = "/multisat/datasets/dvim/rslearn_classify/"

# We are just using fake projection for training this model.
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
beaker-py
python-dotenv
pytest
uvicorn
fastapi
pydantic
typing-extensions
ruff
12 changes: 12 additions & 0 deletions rslp/landsat_vessels/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Base image
FROM base-image:latest

# Environment variables
ENV PYTHONPATH="/opt/rslearn_projects:${PYTHONPATH}"
ENV LANDSAT_PORT=5555

# Make port 5555 available to the world outside this container
EXPOSE $LANDSAT_PORT

# Run app.py when the container launches
CMD ["python3", "rslp/landsat_vessels/api_main.py"]
99 changes: 99 additions & 0 deletions rslp/landsat_vessels/api_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Landsat Vessel Detection Service."""

from __future__ import annotations

import logging
import multiprocessing
import os

import uvicorn
from fastapi import FastAPI, Response
from pydantic import BaseModel
from typing_extensions import TypedDict

from rslp.landsat_vessels import predict_pipeline

app = FastAPI()

# Set up the logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

LANDSAT_HOST = "0.0.0.0"
LANDSAT_PORT = 5555


class FormattedPrediction(TypedDict):
"""Formatted prediction for a single vessel detection."""

latitude: float
longitude: float
score: float
rgb_fname: str
b8_fname: str


class LandsatResponse(BaseModel):
"""Response object for vessel detections."""

status: list[str]
predictions: list[FormattedPrediction]


class LandsatRequest(BaseModel):
"""Request object for vessel detections."""

scene_id: str | None = None
image_files: dict[str, str] | None = None
crop_path: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be nice for crop_path to be optional as well. If you don't make it optional then in the test I added, crop_path should be changed to be a subdirectory of the scratch_path, otherwise it writes the crops in the current directory.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Also leave it as None in the test, so that we can query API just with a scene_id.

scratch_path: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think scratch_path is fine to be required.

json_path: str
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

json_path should be optional since Skylight can read the JSON from the response body.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, made json_path optional now.



@app.on_event("startup")
async def rslp_init() -> None:
"""Landsat Vessel Service Initialization."""
logger.info("Initializing")
multiprocessing.set_start_method("forkserver", force=True)
multiprocessing.set_forkserver_preload(
[
"rslp.utils.rslearn.materialize_dataset",
"rslp.utils.rslearn.run_model_predict",
]
)


@app.get("/")
async def home() -> dict:
"""Returns a simple message to indicate the service is running."""
return {"message": "Landsat Detections App"}


@app.post("/detections", response_model=LandsatResponse)
async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse:
"""Returns vessel detections Response object for a given Request object."""
try:
logger.info(f"Received request with scene_id: {info.scene_id}")
json_data = predict_pipeline(
crop_path=info.crop_path,
scene_id=info.scene_id,
image_files=info.image_files,
scratch_path=info.scratch_path,
json_path=info.json_path,
)
return LandsatResponse(status=["success"], predictions=json_data)
except ValueError as e:
logger.error(f"Value error during prediction pipeline: {e}")
return LandsatResponse(status=["error"], predictions=[])
except Exception as e:
logger.error(f"Unexpected error during prediction pipeline: {e}")
return LandsatResponse(status=["error"], predictions=[])


if __name__ == "__main__":
uvicorn.run(
"api_main:app",
host=os.getenv("LANDSAT_HOST", default="0.0.0.0"),
port=int(os.getenv("LANDSAT_PORT", default=5555)),
proxy_headers=True,
)
33 changes: 33 additions & 0 deletions rslp/landsat_vessels/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
version: "3.9"

services:
# Define the base image
base-image:
build:
context: ../..
dockerfile: Dockerfile
image: base-image:latest # Tag it as "base-image"

# Define the landsat-vessels service
landsat-vessels:
build:
context: .
dockerfile: Dockerfile
shm_size: '10G' # This adds the shared memory size
depends_on:
- base-image
ports:
- "5555:5555"
environment:
- RSLP_BUCKET
- S3_ACCESS_KEY_ID
- S3_SECRET_ACCESS_KEY
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible
deploy:
resources:
reservations:
devices:
- capabilities: [gpu] # Ensure this service can access GPUs
runtime: nvidia # Use the NVIDIA runtime
40 changes: 34 additions & 6 deletions rslp/landsat_vessels/predict_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Landsat vessel prediction pipeline."""

import json
import time
from datetime import datetime, timedelta
from typing import Any

import numpy as np
import rasterio
Expand Down Expand Up @@ -181,12 +183,12 @@ def run_classifier(


def predict_pipeline(
scratch_path: str,
json_path: str,
crop_path: str,
scratch_path: str | None = None,
json_path: str | None = None,
image_files: dict[str, str] | None = None,
scene_id: str | None = None,
):
) -> dict[str, Any]:
"""Run the Landsat vessel prediction pipeline.

This inputs a Landsat scene (consisting of per-band GeoTIFFs) and produces the
Expand All @@ -202,6 +204,9 @@ def predict_pipeline(
scene_id: Landsat scene ID. Exactly one of image_files or scene_id should be
specified.
"""
start_time = time.time() # Start the timer
time_profile = {}

ds_path = UPath(scratch_path)
ds_path.mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -260,15 +265,25 @@ def predict_pipeline(
dst_geom.time_range[1] + timedelta(minutes=30),
)

time_profile["setup"] = time.time() - start_time

# Run pipeline.
step_start_time = time.time()
print("run detector")
detections = get_vessel_detections(
ds_path, projection, scene_bounds, time_range=time_range
)
time_profile["get_vessel_detections"] = time.time() - step_start_time

step_start_time = time.time()
print("run classifier")
detections = run_classifier(ds_path, detections, time_range=time_range)
time_profile["run_classifier"] = time.time() - step_start_time

# Write JSON and crops.
json_path = UPath(json_path)
step_start_time = time.time()
crop_path = UPath(crop_path)
crop_path.mkdir(parents=True, exist_ok=True)

json_data = []
for idx, detection in enumerate(detections):
Expand Down Expand Up @@ -326,5 +341,18 @@ def predict_pipeline(
)
)

with json_path.open("w") as f:
json.dump(json_data, f)
time_profile["write_json_and_crops"] = time.time() - step_start_time

elapsed_time = time.time() - start_time # Calculate elapsed time
time_profile["total"] = elapsed_time

if json_path:
json_path = UPath(json_path)
with json_path.open("w") as f:
json.dump(json_data, f)

print(f"Prediction pipeline completed in {elapsed_time:.2f} seconds")
for step, duration in time_profile.items():
print(f"{step} took {duration:.2f} seconds")

return json_data
2 changes: 2 additions & 0 deletions rslp/utils/rslearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int =
dataset,
workers=workers,
group=group,
use_initial_job=False,
)
apply_on_windows(
MaterializeHandler(),
dataset,
workers=workers,
group=group,
use_initial_job=False,
)


Expand Down
26 changes: 26 additions & 0 deletions tests/integration/landsat_vessels/test_fastapi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import tempfile

from fastapi.testclient import TestClient

from rslp.landsat_vessels.api_main import app

client = TestClient(app)


def test_singapore_dense_scene():
# LC08_L1TP_125059_20240913_20240920_02_T1 is a scene that includes southeast coast
# of Singapore where there are hundreds of vessels.
with tempfile.TemporaryDirectory() as tmp_dir:
response = client.post(
"/detections",
json={
"scene_id": "LC08_L1TP_125059_20240913_20240920_02_T1",
"scratch_path": tmp_dir,
"json_path": "",
"crop_path": "",
},
)
assert response.status_code == 200
predictions = response.json()["predictions"]
# There are many correct vessels in this scene.
assert len(predictions) >= 100
Loading