From 5a163cac13a6e2f4f7443c254683706775380ec0 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 17:01:36 +0000 Subject: [PATCH 01/30] ruff changes --- landsat/example_request.py | 39 +++++++++++++++++++ .../classify/add_train_fp_examples.py | 8 +++- requirements.txt | 5 +++ rslp/landsat_vessels/predict_pipeline.py | 17 +++++--- 4 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 landsat/example_request.py diff --git a/landsat/example_request.py b/landsat/example_request.py new file mode 100644 index 00000000..54ccd8c3 --- /dev/null +++ b/landsat/example_request.py @@ -0,0 +1,39 @@ +"""Use this script to inference the API with locally stored data.""" + +import json +import os + +import requests + +PORT = os.getenv("Landsat_PORT", default=5555) +LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections" +TIMEOUT_SECONDS = 200000 +SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2" +CROP_PATH = "/home/yawenz/rslearn_projects/landsat/temp_crops" +SCRATCH_PATH = "/home/yawenz/rslearn_projects/landsat/temp_scratch" +JSON_PATH = "/home/yawenz/rslearn_projects/landsat/vessels.json" + + +def sample_request() -> None: + """Sample request for files stored locally.""" + REQUEST_BODY = { + "scene_id": SCENE_ID, + "crop_path": CROP_PATH, + "scratch_path": SCRATCH_PATH, + "json_path": JSON_PATH, + "image_files": None, + } + + response = requests.post( + LANDSAT_ENDPOINT, json=REQUEST_BODY, timeout=TIMEOUT_SECONDS + ) + output_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "sample_response.json" + ) + if response.ok: + with open(output_filename, "w") as outfile: + json.dump(response.json(), outfile) + + +if __name__ == "__main__": + sample_request() diff --git a/one_off_projects/minderoo/classify/add_train_fp_examples.py b/one_off_projects/minderoo/classify/add_train_fp_examples.py index e9753356..cf0a5ccb 100644 --- a/one_off_projects/minderoo/classify/add_train_fp_examples.py +++ b/one_off_projects/minderoo/classify/add_train_fp_examples.py @@ -28,8 +28,12 @@ from rslearn.utils.vector_format import GeojsonVectorFormat if __name__ == "__main__": - json_fname = "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/20240811_annotations.json" - big_crop_dir = "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/annotate_crops/" + json_fname = ( + "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/20240811_annotations.json" + ) + big_crop_dir = ( + "/multisat/datasets/dvim/train_outputs/2024-07-30_0.5/annotate_crops/" + ) out_dir = "/multisat/datasets/dvim/rslearn_classify/" # We are just using fake projection for training this model. diff --git a/requirements.txt b/requirements.txt index d197c05d..f908dcc8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,8 @@ beaker-py python-dotenv pytest +uvicorn +fastapi +pydantic +typing-extensions +ruff diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index ded0dcd0..f36e0065 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -2,6 +2,7 @@ import json from datetime import datetime, timedelta +from typing import Any import numpy as np import rasterio @@ -181,12 +182,12 @@ def run_classifier( def predict_pipeline( - scratch_path: str, - json_path: str, crop_path: str, + scratch_path: str | None = None, + json_path: str | None = None, image_files: dict[str, str] | None = None, scene_id: str | None = None, -): +) -> dict[str, Any]: """Run the Landsat vessel prediction pipeline. This inputs a Landsat scene (consisting of per-band GeoTIFFs) and produces the @@ -267,8 +268,8 @@ def predict_pipeline( detections = run_classifier(ds_path, detections, time_range=time_range) # Write JSON and crops. - json_path = UPath(json_path) crop_path = UPath(crop_path) + crop_path.mkdir(parents=True, exist_ok=True) json_data = [] for idx, detection in enumerate(detections): @@ -326,5 +327,9 @@ def predict_pipeline( ) ) - with json_path.open("w") as f: - json.dump(json_data, f) + if json_path: + json_path = UPath(json_path) + with json_path.open("w") as f: + json.dump(json_data, f) + + return json_data From 7877e59d0238efb788df9869d812490945b35c86 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 17:03:21 +0000 Subject: [PATCH 02/30] add api main --- rslp/landsat_vessels/api_main.py | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 rslp/landsat_vessels/api_main.py diff --git a/rslp/landsat_vessels/api_main.py b/rslp/landsat_vessels/api_main.py new file mode 100644 index 00000000..c5d930c1 --- /dev/null +++ b/rslp/landsat_vessels/api_main.py @@ -0,0 +1,81 @@ +"""Landsat Vessel Detection Service.""" + +from __future__ import annotations + +import logging.config +import os + +import uvicorn +from fastapi import FastAPI, Response +from pydantic import BaseModel +from typing_extensions import TypedDict + +from rslp.landsat_vessels import predict_pipeline + +app = FastAPI() +logger = logging.getLogger(__name__) + +HOST = "0.0.0.0" # nosec B104 +PORT = os.getenv("LANDSAT_PORT", default=5555) +# MODEL_VERSION = os.getenv("GIT_COMMIT_HASH", datetime.today()) + + +class FormattedPrediction(TypedDict): + """Formatted prediction for a single vessel detection.""" + + # [{"longitude": 123.96480506005342, "latitude": -34.75794960371865, "score": 0.9195963740348816, "rgb_fname": "crops/0_rgb.png", "b8_fname": "crops/0_b8.png"} + latitude: float + longitude: float + score: float + rgb_fname: str + b8_fname: str + + +class LandsatResponse(BaseModel): + """Response object for vessel detections.""" + + status: list[str] + predictions: list[FormattedPrediction] + + +class LandsatRequest(BaseModel): + """Request object for vessel detections.""" + + scene_id: str | None = None + image_files: dict[str, str] | None = None + crop_path: str + scratch_path: str + json_path: str + + +@app.on_event("startup") +async def rslp_init() -> None: + """Landsat Vessel Service Initialization.""" + logger.info("Initializing") + + +@app.get("/") +async def home() -> dict: + """Returns a simple message to indicate the service is running.""" + return {"message": "Landsat Detections App"} + + +@app.post("/detections", response_model=LandsatResponse) +async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse: + """Returns vessel detections Response object for a given Request object.""" + try: + json_data = predict_pipeline( + crop_path=info.crop_path, + scene_id=info.scene_id, + image_files=info.image_files, + scratch_path=info.scratch_path, + json_path=info.json_path, + ) + return LandsatResponse(status=["success"], predictions=json_data) + except Exception as e: + logger.error(f"Error during prediction pipeline: {e}") + return LandsatResponse(status=["error"], predictions=[]) + + +if __name__ == "__main__": + uvicorn.run("api_main:app", host=HOST, port=PORT, proxy_headers=True) From c400985d2d65c0ac69f57daa85e39ad6f98d944d Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 17:29:28 +0000 Subject: [PATCH 03/30] change number of workers to 1 --- rslp/utils/rslearn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rslp/utils/rslearn.py b/rslp/utils/rslearn.py index 555bb254..ec67ab70 100644 --- a/rslp/utils/rslearn.py +++ b/rslp/utils/rslearn.py @@ -14,7 +14,7 @@ from rslp.lightning_cli import CustomLightningCLI -def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = 32): +def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = 1): """Materialize the specified dataset by running prepare/ingest/materialize. Args: @@ -22,6 +22,7 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = group: limit dataset actions to this group. workers: number of workers to use. """ + # multiprocessing.set_start_method("forkserver") dataset = Dataset(ds_path) apply_on_windows( PrepareHandler(force=False), From 8f05ca32c32a738974706880d202efdad35e6138 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 17:31:17 +0000 Subject: [PATCH 04/30] merge conflict --- landsat/example_request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/landsat/example_request.py b/landsat/example_request.py index 54ccd8c3..be4d6066 100644 --- a/landsat/example_request.py +++ b/landsat/example_request.py @@ -7,7 +7,7 @@ PORT = os.getenv("Landsat_PORT", default=5555) LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections" -TIMEOUT_SECONDS = 200000 +TIMEOUT_SECONDS = 60000 SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2" CROP_PATH = "/home/yawenz/rslearn_projects/landsat/temp_crops" SCRATCH_PATH = "/home/yawenz/rslearn_projects/landsat/temp_scratch" From 7b53fdb257668c5b3a23d6dbb075c878409dae0a Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 17:32:59 +0000 Subject: [PATCH 05/30] merge conflict --- rslp/landsat_vessels/predict_pipeline.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index f36e0065..7648efbe 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -2,6 +2,7 @@ import json from datetime import datetime, timedelta +from tempfile import TemporaryDirectory from typing import Any import numpy as np @@ -203,6 +204,12 @@ def predict_pipeline( scene_id: Landsat scene ID. Exactly one of image_files or scene_id should be specified. """ + if not scratch_path: + tmp_dir = TemporaryDirectory() + scratch_path = tmp_dir.name + else: + tmp_dir = None + ds_path = UPath(scratch_path) ds_path.mkdir(parents=True, exist_ok=True) From 2bfc38ef7f274c12a84efeae0edfd7c3570cf4c6 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 17:34:25 +0000 Subject: [PATCH 06/30] merge conflict --- rslp/api_main.py | 128 +++++++++++++++++++++++ rslp/landsat_vessels/predict_pipeline.py | 22 ++-- rslp/utils/rslearn.py | 2 + 3 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 rslp/api_main.py diff --git a/rslp/api_main.py b/rslp/api_main.py new file mode 100644 index 00000000..16c5ed9b --- /dev/null +++ b/rslp/api_main.py @@ -0,0 +1,128 @@ +"""VIIRS Vessel Detection Service.""" + +from __future__ import annotations + +import logging.config +import os + +import uvicorn +from fastapi import FastAPI, Response +from pydantic import BaseModel +from typing_extensions import TypedDict + +from rslp.landsat_vessels import predict_pipeline + +app = FastAPI() +logger = logging.getLogger(__name__) + +HOST = "0.0.0.0" # nosec B104 +PORT = os.getenv("LANDSAT_PORT", default=5555) +# MODEL_VERSION = os.getenv("GIT_COMMIT_HASH", datetime.today()) + + +class FormattedPrediction(TypedDict): + """Formatted prediction for a single vessel detection.""" + + # [{"longitude": 123.96480506005342, "latitude": -34.75794960371865, "score": 0.9195963740348816, "rgb_fname": "crops/0_rgb.png", "b8_fname": "crops/0_b8.png"} + + latitude: float + longitude: float + score: float + rgb_fname: str + b8_fname: str + + +class LandsatResponse(BaseModel): + """Response object for vessel detections.""" + + status: list[str] + predictions: list[FormattedPrediction] + + +class LandsatRequest(BaseModel): + """Request object for vessel detections.""" + + scene_id: str | None = None + image_files: dict[str, str] | None = None + crop_path: str + scratch_path: str + json_path: str + + # class Config: + # """example configuration for a request where files are stored in cloud""" + + # schema_extra = { + # "example": { + # "input_dir": "input", + # "output_dir": "output", + # "filename": "VJ102DNB.A2022362.0154.021.2022362055600.nc", + # "geo_filename": "VJ103DNB.A2022362.0154.021.2022362052511.nc", + # "modraw_filename": "VJ102MOD.A2022362.0154.002.2022362115107.nc", + # "modgeo_filename": "VJ103MOD.A2022362.0154.002.2022362095104.nc", + # }, + # } + + +@app.on_event("startup") +async def rslp_init() -> None: + """VIIRS Vessel Service Initialization.""" + logger.info("Initializing") + + +@app.get("/") +async def home() -> dict: + """Returns a simple message to indicate the service is running.""" + return {"message": "Landsat Detections App"} + + +@app.post("/detections", response_model=LandsatResponse) +async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse: + """Returns vessel detections Response object for a given Request object.""" + try: + json_data = predict_pipeline( + crop_path=info.crop_path, + scene_id=info.scene_id, + image_files=info.image_files, + scratch_path=info.scratch_path, + json_path=info.json_path, + ) + return LandsatResponse(status=["success"], predictions=json_data) + except Exception as e: + logger.error(f"Error during prediction pipeline: {e}") + return LandsatResponse(status=["error"], predictions=[]) + + # ves_detections = all_detections["vessel_detections"] + + # satellite_name = utils.get_provider_name(dnb_dataset) + # acquisition_time, end_time = utils.get_acquisition_time(dnb_dataset) + # chips_dict = utils.get_chips(image, ves_detections, dnb_dataset) + # if info.gcp_bucket is not None: + # chips_dict = utils.upload_image( + # info.gcp_bucket, chips_dict, info.output_dir, dnb_path + # ) + # else: + # utils.save_chips_locally( + # chips_dict, + # destination_path=info.output_dir, + # chip_features=ves_detections, + # ) + + # average_moonlight = RoundedFloat(utils.get_average_moonlight(dnb_dataset), 2) + + # frame_extents = utils.get_frame_extents(dnb_dataset) + + # predictions = utils.format_detections(chips_dict) + # time_s = round(perf_counter() - start) + # n_ves = len(chips_dict) + # logger.info( + # f"In frame: {dnb_path}, vvd detected {n_ves} vessels in ({time_s} seconds)" + # ) + # response.headers["n_detections"] = str(len(chips_dict)) + # response.headers["avg_moonlight"] = str(average_moonlight) + # response.headers["lightning_count"] = str(all_detections["lightning_count"]) + # response.headers["gas_flare_count"] = str(all_detections["gas_flare_count"]) + # response.headers["inference_time"] = str("elapsed_time") + + +if __name__ == "__main__": + uvicorn.run("api_main:app", host=HOST, port=PORT, proxy_headers=True) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index 7648efbe..faa35d6a 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -2,7 +2,6 @@ import json from datetime import datetime, timedelta -from tempfile import TemporaryDirectory from typing import Any import numpy as np @@ -204,11 +203,17 @@ def predict_pipeline( scene_id: Landsat scene ID. Exactly one of image_files or scene_id should be specified. """ - if not scratch_path: - tmp_dir = TemporaryDirectory() - scratch_path = tmp_dir.name - else: - tmp_dir = None + # if not scratch_path: + # tmp_dir = TemporaryDirectory() + # scratch_path = tmp_dir.name + # else: + # tmp_dir = None + + print(f"scratch_path: {scratch_path}") + print(f"crop_path: {crop_path}") + print(f"json_path: {json_path}") + print(f"image_files: {image_files}") + print(f"scene_id: {scene_id}") ds_path = UPath(scratch_path) ds_path.mkdir(parents=True, exist_ok=True) @@ -269,9 +274,11 @@ def predict_pipeline( ) # Run pipeline. + print("get vessel detections") detections = get_vessel_detections( ds_path, projection, scene_bounds, time_range=time_range ) + print("run classifier") detections = run_classifier(ds_path, detections, time_range=time_range) # Write JSON and crops. @@ -334,6 +341,9 @@ def predict_pipeline( ) ) + # if tmp_dir: + # tmp_dir.cleanup() + if json_path: json_path = UPath(json_path) with json_path.open("w") as f: diff --git a/rslp/utils/rslearn.py b/rslp/utils/rslearn.py index ec67ab70..ac9128c5 100644 --- a/rslp/utils/rslearn.py +++ b/rslp/utils/rslearn.py @@ -35,12 +35,14 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = dataset, workers=workers, group=group, + use_initial_job=False, ) apply_on_windows( MaterializeHandler(), dataset, workers=workers, group=group, + use_initial_job=False, ) From 19421b5d5606226d33d8fcdb95558c1d37b009bc Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Wed, 16 Oct 2024 20:27:42 +0000 Subject: [PATCH 07/30] minor change --- rslp/api_main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rslp/api_main.py b/rslp/api_main.py index 16c5ed9b..a5e486a5 100644 --- a/rslp/api_main.py +++ b/rslp/api_main.py @@ -24,7 +24,6 @@ class FormattedPrediction(TypedDict): """Formatted prediction for a single vessel detection.""" # [{"longitude": 123.96480506005342, "latitude": -34.75794960371865, "score": 0.9195963740348816, "rgb_fname": "crops/0_rgb.png", "b8_fname": "crops/0_b8.png"} - latitude: float longitude: float score: float From 02ab349cc8a1ed567b73532c211bfd3bf0fe3b73 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 16:52:29 +0000 Subject: [PATCH 08/30] fastapi is working now --- landsat/example_request.py | 2 +- rslp/api_main.py | 50 ++------------------------------------ 2 files changed, 3 insertions(+), 49 deletions(-) diff --git a/landsat/example_request.py b/landsat/example_request.py index be4d6066..54ccd8c3 100644 --- a/landsat/example_request.py +++ b/landsat/example_request.py @@ -7,7 +7,7 @@ PORT = os.getenv("Landsat_PORT", default=5555) LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections" -TIMEOUT_SECONDS = 60000 +TIMEOUT_SECONDS = 200000 SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2" CROP_PATH = "/home/yawenz/rslearn_projects/landsat/temp_crops" SCRATCH_PATH = "/home/yawenz/rslearn_projects/landsat/temp_scratch" diff --git a/rslp/api_main.py b/rslp/api_main.py index a5e486a5..c5d930c1 100644 --- a/rslp/api_main.py +++ b/rslp/api_main.py @@ -1,4 +1,4 @@ -"""VIIRS Vessel Detection Service.""" +"""Landsat Vessel Detection Service.""" from __future__ import annotations @@ -47,24 +47,10 @@ class LandsatRequest(BaseModel): scratch_path: str json_path: str - # class Config: - # """example configuration for a request where files are stored in cloud""" - - # schema_extra = { - # "example": { - # "input_dir": "input", - # "output_dir": "output", - # "filename": "VJ102DNB.A2022362.0154.021.2022362055600.nc", - # "geo_filename": "VJ103DNB.A2022362.0154.021.2022362052511.nc", - # "modraw_filename": "VJ102MOD.A2022362.0154.002.2022362115107.nc", - # "modgeo_filename": "VJ103MOD.A2022362.0154.002.2022362095104.nc", - # }, - # } - @app.on_event("startup") async def rslp_init() -> None: - """VIIRS Vessel Service Initialization.""" + """Landsat Vessel Service Initialization.""" logger.info("Initializing") @@ -90,38 +76,6 @@ async def get_detections(info: LandsatRequest, response: Response) -> LandsatRes logger.error(f"Error during prediction pipeline: {e}") return LandsatResponse(status=["error"], predictions=[]) - # ves_detections = all_detections["vessel_detections"] - - # satellite_name = utils.get_provider_name(dnb_dataset) - # acquisition_time, end_time = utils.get_acquisition_time(dnb_dataset) - # chips_dict = utils.get_chips(image, ves_detections, dnb_dataset) - # if info.gcp_bucket is not None: - # chips_dict = utils.upload_image( - # info.gcp_bucket, chips_dict, info.output_dir, dnb_path - # ) - # else: - # utils.save_chips_locally( - # chips_dict, - # destination_path=info.output_dir, - # chip_features=ves_detections, - # ) - - # average_moonlight = RoundedFloat(utils.get_average_moonlight(dnb_dataset), 2) - - # frame_extents = utils.get_frame_extents(dnb_dataset) - - # predictions = utils.format_detections(chips_dict) - # time_s = round(perf_counter() - start) - # n_ves = len(chips_dict) - # logger.info( - # f"In frame: {dnb_path}, vvd detected {n_ves} vessels in ({time_s} seconds)" - # ) - # response.headers["n_detections"] = str(len(chips_dict)) - # response.headers["avg_moonlight"] = str(average_moonlight) - # response.headers["lightning_count"] = str(all_detections["lightning_count"]) - # response.headers["gas_flare_count"] = str(all_detections["gas_flare_count"]) - # response.headers["inference_time"] = str("elapsed_time") - if __name__ == "__main__": uvicorn.run("api_main:app", host=HOST, port=PORT, proxy_headers=True) From 8c2136b28f85efcbd18457662a9142b5947abeb8 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Thu, 17 Oct 2024 18:53:43 +0000 Subject: [PATCH 09/30] ruff change --- rslp/api_main.py | 81 -------------------------------- rslp/landsat_vessels/api_main.py | 8 ++++ rslp/utils/rslearn.py | 3 +- 3 files changed, 9 insertions(+), 83 deletions(-) delete mode 100644 rslp/api_main.py diff --git a/rslp/api_main.py b/rslp/api_main.py deleted file mode 100644 index c5d930c1..00000000 --- a/rslp/api_main.py +++ /dev/null @@ -1,81 +0,0 @@ -"""Landsat Vessel Detection Service.""" - -from __future__ import annotations - -import logging.config -import os - -import uvicorn -from fastapi import FastAPI, Response -from pydantic import BaseModel -from typing_extensions import TypedDict - -from rslp.landsat_vessels import predict_pipeline - -app = FastAPI() -logger = logging.getLogger(__name__) - -HOST = "0.0.0.0" # nosec B104 -PORT = os.getenv("LANDSAT_PORT", default=5555) -# MODEL_VERSION = os.getenv("GIT_COMMIT_HASH", datetime.today()) - - -class FormattedPrediction(TypedDict): - """Formatted prediction for a single vessel detection.""" - - # [{"longitude": 123.96480506005342, "latitude": -34.75794960371865, "score": 0.9195963740348816, "rgb_fname": "crops/0_rgb.png", "b8_fname": "crops/0_b8.png"} - latitude: float - longitude: float - score: float - rgb_fname: str - b8_fname: str - - -class LandsatResponse(BaseModel): - """Response object for vessel detections.""" - - status: list[str] - predictions: list[FormattedPrediction] - - -class LandsatRequest(BaseModel): - """Request object for vessel detections.""" - - scene_id: str | None = None - image_files: dict[str, str] | None = None - crop_path: str - scratch_path: str - json_path: str - - -@app.on_event("startup") -async def rslp_init() -> None: - """Landsat Vessel Service Initialization.""" - logger.info("Initializing") - - -@app.get("/") -async def home() -> dict: - """Returns a simple message to indicate the service is running.""" - return {"message": "Landsat Detections App"} - - -@app.post("/detections", response_model=LandsatResponse) -async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse: - """Returns vessel detections Response object for a given Request object.""" - try: - json_data = predict_pipeline( - crop_path=info.crop_path, - scene_id=info.scene_id, - image_files=info.image_files, - scratch_path=info.scratch_path, - json_path=info.json_path, - ) - return LandsatResponse(status=["success"], predictions=json_data) - except Exception as e: - logger.error(f"Error during prediction pipeline: {e}") - return LandsatResponse(status=["error"], predictions=[]) - - -if __name__ == "__main__": - uvicorn.run("api_main:app", host=HOST, port=PORT, proxy_headers=True) diff --git a/rslp/landsat_vessels/api_main.py b/rslp/landsat_vessels/api_main.py index c5d930c1..053badc5 100644 --- a/rslp/landsat_vessels/api_main.py +++ b/rslp/landsat_vessels/api_main.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging.config +import multiprocessing import os import uvicorn @@ -52,6 +53,13 @@ class LandsatRequest(BaseModel): async def rslp_init() -> None: """Landsat Vessel Service Initialization.""" logger.info("Initializing") + multiprocessing.set_start_method("forkserver", force=True) + multiprocessing.set_forkserver_preload( + [ + "rslp.utils.rslearn.materialize_dataset", + "rslp.utils.rslearn.run_model_predict", + ] + ) @app.get("/") diff --git a/rslp/utils/rslearn.py b/rslp/utils/rslearn.py index ac9128c5..caf4a212 100644 --- a/rslp/utils/rslearn.py +++ b/rslp/utils/rslearn.py @@ -14,7 +14,7 @@ from rslp.lightning_cli import CustomLightningCLI -def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = 1): +def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = 32): """Materialize the specified dataset by running prepare/ingest/materialize. Args: @@ -22,7 +22,6 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = group: limit dataset actions to this group. workers: number of workers to use. """ - # multiprocessing.set_start_method("forkserver") dataset = Dataset(ds_path) apply_on_windows( PrepareHandler(force=False), From f7081e41ae7d79b6b8bceedb0aec5daaea589340 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Fri, 18 Oct 2024 01:05:12 +0000 Subject: [PATCH 10/30] ruff --- Dockerfile | 1 + docker-compose.yaml | 31 +++++++++++++++++++ rslp/landsat_vessels/Dockerfile | 12 +++++++ rslp/landsat_vessels/api_main.py | 24 +++++++++----- .../landsat_vessels}/example_request.py | 14 +++++---- rslp/landsat_vessels/predict_pipeline.py | 5 +++ 6 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 rslp/landsat_vessels/Dockerfile rename {landsat => rslp/landsat_vessels}/example_request.py (64%) diff --git a/Dockerfile b/Dockerfile index 1e3de9e6..d0d0cfe5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,7 @@ RUN pip install -r /opt/rslearn_projects/rslearn/extra_requirements.txt COPY requirements.txt /opt/rslearn_projects/requirements.txt RUN pip install -r /opt/rslearn_projects/requirements.txt +# We can use `pip install rslearn[extra]` now # We need rslp to be pip installed as well ENV PYTHONPATH="${PYTHONPATH}:/opt/rslearn_projects/rslearn:." diff --git a/docker-compose.yaml b/docker-compose.yaml index 1a2fa426..161402f2 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,3 +3,34 @@ version: "3.9" services: test: build: . + + # Define the base image + base-image: + build: + context: . + dockerfile: Dockerfile + image: base-image:latest # Tag it as "base-image" + + # Define the landsat-vessels service + landsat-vessels: + build: + context: ./rslp/landsat_vessels + dockerfile: Dockerfile + shm_size: '10G' # This adds the shared memory size + depends_on: + - base-image + ports: + - "5555:5555" + environment: + - RSLP_BUCKET + - S3_ACCESS_KEY_ID + - S3_SECRET_ACCESS_KEY + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] # Ensure this service can access GPUs + runtime: nvidia # Use the NVIDIA runtime diff --git a/rslp/landsat_vessels/Dockerfile b/rslp/landsat_vessels/Dockerfile new file mode 100644 index 00000000..0e46ab55 --- /dev/null +++ b/rslp/landsat_vessels/Dockerfile @@ -0,0 +1,12 @@ +# Base image +FROM base-image:latest + +# Environment variables +ENV PYTHONPATH="/opt/rslearn_projects:${PYTHONPATH}" +ENV LANDSAT_PORT=5555 + +# Make port 5555 available to the world outside this container +EXPOSE $LANDSAT_PORT + +# Run app.py when the container launches +CMD ["python3", "rslp/landsat_vessels/api_main.py"] \ No newline at end of file diff --git a/rslp/landsat_vessels/api_main.py b/rslp/landsat_vessels/api_main.py index 053badc5..4b424016 100644 --- a/rslp/landsat_vessels/api_main.py +++ b/rslp/landsat_vessels/api_main.py @@ -2,7 +2,7 @@ from __future__ import annotations -import logging.config +import logging import multiprocessing import os @@ -14,17 +14,18 @@ from rslp.landsat_vessels import predict_pipeline app = FastAPI() + +# Set up the logger +logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -HOST = "0.0.0.0" # nosec B104 -PORT = os.getenv("LANDSAT_PORT", default=5555) -# MODEL_VERSION = os.getenv("GIT_COMMIT_HASH", datetime.today()) +LANDSAT_HOST = "0.0.0.0" +LANDSAT_PORT = 5555 class FormattedPrediction(TypedDict): """Formatted prediction for a single vessel detection.""" - # [{"longitude": 123.96480506005342, "latitude": -34.75794960371865, "score": 0.9195963740348816, "rgb_fname": "crops/0_rgb.png", "b8_fname": "crops/0_b8.png"} latitude: float longitude: float score: float @@ -72,6 +73,7 @@ async def home() -> dict: async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse: """Returns vessel detections Response object for a given Request object.""" try: + logger.info(f"Received request with scene_id: {info.scene_id}") json_data = predict_pipeline( crop_path=info.crop_path, scene_id=info.scene_id, @@ -80,10 +82,18 @@ async def get_detections(info: LandsatRequest, response: Response) -> LandsatRes json_path=info.json_path, ) return LandsatResponse(status=["success"], predictions=json_data) + except ValueError as e: + logger.error(f"Value error during prediction pipeline: {e}") + return LandsatResponse(status=["error"], predictions=[]) except Exception as e: - logger.error(f"Error during prediction pipeline: {e}") + logger.error(f"Unexpected error during prediction pipeline: {e}") return LandsatResponse(status=["error"], predictions=[]) if __name__ == "__main__": - uvicorn.run("api_main:app", host=HOST, port=PORT, proxy_headers=True) + uvicorn.run( + "api_main:app", + host=os.getenv("LANDSAT_HOST", default="0.0.0.0"), + port=int(os.getenv("LANDSAT_PORT", default=5555)), + proxy_headers=True, + ) diff --git a/landsat/example_request.py b/rslp/landsat_vessels/example_request.py similarity index 64% rename from landsat/example_request.py rename to rslp/landsat_vessels/example_request.py index 54ccd8c3..1cfefcd5 100644 --- a/landsat/example_request.py +++ b/rslp/landsat_vessels/example_request.py @@ -5,13 +5,13 @@ import requests -PORT = os.getenv("Landsat_PORT", default=5555) +PORT = os.getenv("LANDSAT_PORT", default=5555) LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections" -TIMEOUT_SECONDS = 200000 +TIMEOUT_SECONDS = 60000 SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2" -CROP_PATH = "/home/yawenz/rslearn_projects/landsat/temp_crops" -SCRATCH_PATH = "/home/yawenz/rslearn_projects/landsat/temp_scratch" -JSON_PATH = "/home/yawenz/rslearn_projects/landsat/vessels.json" +CROP_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_crops" +SCRATCH_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_scratch" +JSON_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/vessels.json" def sample_request() -> None: @@ -28,11 +28,13 @@ def sample_request() -> None: LANDSAT_ENDPOINT, json=REQUEST_BODY, timeout=TIMEOUT_SECONDS ) output_filename = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "sample_response.json" + os.path.dirname(os.path.realpath(__file__)), "response.json" ) if response.ok: with open(output_filename, "w") as outfile: json.dump(response.json(), outfile) + else: + print(f"Error: {response.status_code} - {response.text}") if __name__ == "__main__": diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index faa35d6a..09d0d305 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -1,6 +1,7 @@ """Landsat vessel prediction pipeline.""" import json +import time from datetime import datetime, timedelta from typing import Any @@ -215,6 +216,8 @@ def predict_pipeline( print(f"image_files: {image_files}") print(f"scene_id: {scene_id}") + start_time = time.time() # Start the timer + ds_path = UPath(scratch_path) ds_path.mkdir(parents=True, exist_ok=True) @@ -343,6 +346,8 @@ def predict_pipeline( # if tmp_dir: # tmp_dir.cleanup() + elapsed_time = time.time() - start_time # Calculate elapsed time + print(f"Prediction pipeline completed in {elapsed_time:.2f} seconds") if json_path: json_path = UPath(json_path) From b64ab97d4ad4625987d358b1a2afe88c45256137 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Fri, 18 Oct 2024 15:57:49 +0000 Subject: [PATCH 11/30] minor changes --- Dockerfile | 2 +- rslp/landsat_vessels/example_request.py | 1 + rslp/landsat_vessels/predict_pipeline.py | 14 -------------- rslp/utils/rslearn.py | 2 -- 4 files changed, 2 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index d0d0cfe5..3a11b397 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ RUN pip install -r /opt/rslearn_projects/rslearn/extra_requirements.txt COPY requirements.txt /opt/rslearn_projects/requirements.txt RUN pip install -r /opt/rslearn_projects/requirements.txt -# We can use `pip install rslearn[extra]` now +# Not sure if we want to replace to `pip install rslearn[extra]` now, the current setup always import the latest version of rslearn # We need rslp to be pip installed as well ENV PYTHONPATH="${PYTHONPATH}:/opt/rslearn_projects/rslearn:." diff --git a/rslp/landsat_vessels/example_request.py b/rslp/landsat_vessels/example_request.py index 1cfefcd5..4e71c69d 100644 --- a/rslp/landsat_vessels/example_request.py +++ b/rslp/landsat_vessels/example_request.py @@ -9,6 +9,7 @@ LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections" TIMEOUT_SECONDS = 60000 SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2" +# TODO: change the paths CROP_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_crops" SCRATCH_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_scratch" JSON_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/vessels.json" diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index 09d0d305..9ea80227 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -204,18 +204,6 @@ def predict_pipeline( scene_id: Landsat scene ID. Exactly one of image_files or scene_id should be specified. """ - # if not scratch_path: - # tmp_dir = TemporaryDirectory() - # scratch_path = tmp_dir.name - # else: - # tmp_dir = None - - print(f"scratch_path: {scratch_path}") - print(f"crop_path: {crop_path}") - print(f"json_path: {json_path}") - print(f"image_files: {image_files}") - print(f"scene_id: {scene_id}") - start_time = time.time() # Start the timer ds_path = UPath(scratch_path) @@ -344,8 +332,6 @@ def predict_pipeline( ) ) - # if tmp_dir: - # tmp_dir.cleanup() elapsed_time = time.time() - start_time # Calculate elapsed time print(f"Prediction pipeline completed in {elapsed_time:.2f} seconds") diff --git a/rslp/utils/rslearn.py b/rslp/utils/rslearn.py index caf4a212..555bb254 100644 --- a/rslp/utils/rslearn.py +++ b/rslp/utils/rslearn.py @@ -34,14 +34,12 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = dataset, workers=workers, group=group, - use_initial_job=False, ) apply_on_windows( MaterializeHandler(), dataset, workers=workers, group=group, - use_initial_job=False, ) From cbdeec9df144c0d2ad8ad181d1ecbf8a6339a04a Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Fri, 18 Oct 2024 21:42:01 +0000 Subject: [PATCH 12/30] update dockerfile --- Dockerfile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3a11b397..7e9fbb6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,13 @@ FROM pytorch/pytorch:2.4.0-cuda11.8-cudnn9-runtime@sha256:58a28ab734f23561aa146fbaf777fb319a953ca1e188832863ed57d510c9f197 -# TEMPORARY Until RSLEARN Is Public -ARG GIT_USERNAME -ARG GIT_TOKEN - RUN apt update RUN apt install -y libpq-dev ffmpeg libsm6 libxext6 git -RUN git clone https://${GIT_USERNAME}:${GIT_TOKEN}@github.com/allenai/rslearn.git /opt/rslearn_projects/rslearn +RUN git clone https://github.com/allenai/rslearn.git /opt/rslearn_projects/rslearn RUN pip install -r /opt/rslearn_projects/rslearn/requirements.txt RUN pip install -r /opt/rslearn_projects/rslearn/extra_requirements.txt COPY requirements.txt /opt/rslearn_projects/requirements.txt RUN pip install -r /opt/rslearn_projects/requirements.txt -# Not sure if we want to replace to `pip install rslearn[extra]` now, the current setup always import the latest version of rslearn # We need rslp to be pip installed as well ENV PYTHONPATH="${PYTHONPATH}:/opt/rslearn_projects/rslearn:." From 9df535d8328d739e56529a8164369518ed4b2ec3 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Fri, 18 Oct 2024 21:43:13 +0000 Subject: [PATCH 13/30] set use_initial_job = True --- rslp/utils/rslearn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rslp/utils/rslearn.py b/rslp/utils/rslearn.py index 555bb254..caf4a212 100644 --- a/rslp/utils/rslearn.py +++ b/rslp/utils/rslearn.py @@ -34,12 +34,14 @@ def materialize_dataset(ds_path: UPath, group: str | None = None, workers: int = dataset, workers=workers, group=group, + use_initial_job=False, ) apply_on_windows( MaterializeHandler(), dataset, workers=workers, group=group, + use_initial_job=False, ) From a1b6d1ea28036c498314f846c5f5e5baf82361e8 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Sat, 19 Oct 2024 20:54:57 +0000 Subject: [PATCH 14/30] move example to scripts folder --- rslp/landsat_vessels/{ => scripts}/example_request.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename rslp/landsat_vessels/{ => scripts}/example_request.py (100%) diff --git a/rslp/landsat_vessels/example_request.py b/rslp/landsat_vessels/scripts/example_request.py similarity index 100% rename from rslp/landsat_vessels/example_request.py rename to rslp/landsat_vessels/scripts/example_request.py From 8691f53a1e4988ebe946ba8443ffd600ebfa9129 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Mon, 21 Oct 2024 18:02:50 +0000 Subject: [PATCH 15/30] changes based on comments --- docker-compose.yaml | 31 -------------- .../phase123_config.yaml | 10 +++-- rslp/landsat_vessels/docker-compose.yaml | 33 +++++++++++++++ rslp/landsat_vessels/predict_pipeline.py | 19 ++++++++- .../scripts/example_request.py | 42 ------------------- 5 files changed, 56 insertions(+), 79 deletions(-) create mode 100644 rslp/landsat_vessels/docker-compose.yaml delete mode 100644 rslp/landsat_vessels/scripts/example_request.py diff --git a/docker-compose.yaml b/docker-compose.yaml index 161402f2..1a2fa426 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,34 +3,3 @@ version: "3.9" services: test: build: . - - # Define the base image - base-image: - build: - context: . - dockerfile: Dockerfile - image: base-image:latest # Tag it as "base-image" - - # Define the landsat-vessels service - landsat-vessels: - build: - context: ./rslp/landsat_vessels - dockerfile: Dockerfile - shm_size: '10G' # This adds the shared memory size - depends_on: - - base-image - ports: - - "5555:5555" - environment: - - RSLP_BUCKET - - S3_ACCESS_KEY_ID - - S3_SECRET_ACCESS_KEY - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY - - NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible - deploy: - resources: - reservations: - devices: - - capabilities: [gpu] # Ensure this service can access GPUs - runtime: nvidia # Use the NVIDIA runtime diff --git a/landsat/recheck_landsat_labels/phase123_config.yaml b/landsat/recheck_landsat_labels/phase123_config.yaml index b612405d..c954137f 100644 --- a/landsat/recheck_landsat_labels/phase123_config.yaml +++ b/landsat/recheck_landsat_labels/phase123_config.yaml @@ -22,10 +22,10 @@ model: plateau_patience: 10 plateau_min_lr: 0 plateau_cooldown: 0 - restore_config: - restore_path: gcs://rslearn-eai/datasets/landsat_vessel_detection/artifacts/2024-03-13-landsat-vessels/vessel02_satlas_freeze_crop512_nosatlas_b8first2/best.pth - remap_prefixes: - - ["backbone.backbone.", "encoder.0.model."] + # restore_config: + # restore_path: gcs://rslearn-eai/datasets/landsat_vessel_detection/artifacts/2024-03-13-landsat-vessels/vessel02_satlas_freeze_crop512_nosatlas_b8first2/best.pth + # remap_prefixes: + # - ["backbone.backbone.", "encoder.0.model."] data: class_path: rslearn.train.data_module.RslearnDataModule init_args: @@ -52,6 +52,8 @@ data: allow_invalid: true skip_unknown_categories: true prob_property: "prob" + positive_class: "correct" + positive_class_threshold: 0.85 input_mapping: class: label: "targets" diff --git a/rslp/landsat_vessels/docker-compose.yaml b/rslp/landsat_vessels/docker-compose.yaml new file mode 100644 index 00000000..bf86c31d --- /dev/null +++ b/rslp/landsat_vessels/docker-compose.yaml @@ -0,0 +1,33 @@ +version: "3.9" + +services: + # Define the base image + base-image: + build: + context: ../.. + dockerfile: Dockerfile + image: base-image:latest # Tag it as "base-image" + + # Define the landsat-vessels service + landsat-vessels: + build: + context: . + dockerfile: Dockerfile + shm_size: '10G' # This adds the shared memory size + depends_on: + - base-image + ports: + - "5555:5555" + environment: + - RSLP_BUCKET + - S3_ACCESS_KEY_ID + - S3_SECRET_ACCESS_KEY + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] # Ensure this service can access GPUs + runtime: nvidia # Use the NVIDIA runtime diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index 9ea80227..e35b7305 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -205,6 +205,7 @@ def predict_pipeline( specified. """ start_time = time.time() # Start the timer + time_profile = {} ds_path = UPath(scratch_path) ds_path.mkdir(parents=True, exist_ok=True) @@ -264,15 +265,23 @@ def predict_pipeline( dst_geom.time_range[1] + timedelta(minutes=30), ) + time_profile["setup"] = time.time() - start_time + # Run pipeline. - print("get vessel detections") + step_start_time = time.time() + print("run detector") detections = get_vessel_detections( ds_path, projection, scene_bounds, time_range=time_range ) + time_profile["get_vessel_detections"] = time.time() - step_start_time + + step_start_time = time.time() print("run classifier") detections = run_classifier(ds_path, detections, time_range=time_range) + time_profile["run_classifier"] = time.time() - step_start_time # Write JSON and crops. + step_start_time = time.time() crop_path = UPath(crop_path) crop_path.mkdir(parents=True, exist_ok=True) @@ -332,12 +341,18 @@ def predict_pipeline( ) ) + time_profile["write_json_and_crops"] = time.time() - step_start_time + elapsed_time = time.time() - start_time # Calculate elapsed time - print(f"Prediction pipeline completed in {elapsed_time:.2f} seconds") + time_profile["total"] = elapsed_time if json_path: json_path = UPath(json_path) with json_path.open("w") as f: json.dump(json_data, f) + print(f"Prediction pipeline completed in {elapsed_time:.2f} seconds") + for step, duration in time_profile.items(): + print(f"{step} took {duration:.2f} seconds") + return json_data diff --git a/rslp/landsat_vessels/scripts/example_request.py b/rslp/landsat_vessels/scripts/example_request.py deleted file mode 100644 index 4e71c69d..00000000 --- a/rslp/landsat_vessels/scripts/example_request.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Use this script to inference the API with locally stored data.""" - -import json -import os - -import requests - -PORT = os.getenv("LANDSAT_PORT", default=5555) -LANDSAT_ENDPOINT = f"http://localhost:{PORT}/detections" -TIMEOUT_SECONDS = 60000 -SCENE_ID = "LC09_L1GT_106084_20241002_20241002_02_T2" -# TODO: change the paths -CROP_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_crops" -SCRATCH_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/temp_scratch" -JSON_PATH = "/home/yawenz/rslearn_projects/rslp/landsat_vessels/vessels.json" - - -def sample_request() -> None: - """Sample request for files stored locally.""" - REQUEST_BODY = { - "scene_id": SCENE_ID, - "crop_path": CROP_PATH, - "scratch_path": SCRATCH_PATH, - "json_path": JSON_PATH, - "image_files": None, - } - - response = requests.post( - LANDSAT_ENDPOINT, json=REQUEST_BODY, timeout=TIMEOUT_SECONDS - ) - output_filename = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "response.json" - ) - if response.ok: - with open(output_filename, "w") as outfile: - json.dump(response.json(), outfile) - else: - print(f"Error: {response.status_code} - {response.text}") - - -if __name__ == "__main__": - sample_request() From 1d7d4d617555e7ce56beb7493f6a5c97dd3a0f32 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Mon, 21 Oct 2024 18:11:34 +0000 Subject: [PATCH 16/30] uncomment restore config --- landsat/recheck_landsat_labels/phase123_config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/landsat/recheck_landsat_labels/phase123_config.yaml b/landsat/recheck_landsat_labels/phase123_config.yaml index c954137f..b984687d 100644 --- a/landsat/recheck_landsat_labels/phase123_config.yaml +++ b/landsat/recheck_landsat_labels/phase123_config.yaml @@ -22,10 +22,10 @@ model: plateau_patience: 10 plateau_min_lr: 0 plateau_cooldown: 0 - # restore_config: - # restore_path: gcs://rslearn-eai/datasets/landsat_vessel_detection/artifacts/2024-03-13-landsat-vessels/vessel02_satlas_freeze_crop512_nosatlas_b8first2/best.pth - # remap_prefixes: - # - ["backbone.backbone.", "encoder.0.model."] + restore_config: + restore_path: gcs://rslearn-eai/datasets/landsat_vessel_detection/artifacts/2024-03-13-landsat-vessels/vessel02_satlas_freeze_crop512_nosatlas_b8first2/best.pth + remap_prefixes: + - ["backbone.backbone.", "encoder.0.model."] data: class_path: rslearn.train.data_module.RslearnDataModule init_args: From 164e895491ca2516afa5f247926f4e20619c93c2 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 14:37:18 -0700 Subject: [PATCH 17/30] Add test for Landsat prediction pipeline --- .../landsat_vessels/test_fastapi.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tests/integration/landsat_vessels/test_fastapi.py diff --git a/tests/integration/landsat_vessels/test_fastapi.py b/tests/integration/landsat_vessels/test_fastapi.py new file mode 100644 index 00000000..94bbb257 --- /dev/null +++ b/tests/integration/landsat_vessels/test_fastapi.py @@ -0,0 +1,26 @@ +import tempfile + +from fastapi.testclient import TestClient + +from rslp.landsat_vessels.api_main import app + +client = TestClient(app) + + +def test_singapore_dense_scene(): + # LC08_L1TP_125059_20240913_20240920_02_T1 is a scene that includes southeast coast + # of Singapore where there are hundreds of vessels. + with tempfile.TemporaryDirectory() as tmp_dir: + response = client.post( + "/detections", + json={ + "scene_id": "LC08_L1TP_125059_20240913_20240920_02_T1", + "scratch_path": tmp_dir, + "json_path": "", + "crop_path": "", + }, + ) + assert response.status_code == 200 + predictions = response.json()["predictions"] + # There are many correct vessels in this scene. + assert len(predictions) >= 100 From 0babbbef775db443196a09faffff6c767f22ae72 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 14:50:26 -0700 Subject: [PATCH 18/30] add aws credentials --- .github/workflows/build_test.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 67c78053..fef3db0c 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -94,7 +94,10 @@ jobs: - name: Run tests with Docker Compose run: | - docker compose -f docker-compose.yaml run test pytest tests/ + docker compose -f docker-compose.yaml run + -e AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \ + -e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \ + test pytest tests/ - name: Clean up if: always() From 12d6a5b0a7a2a30b3a99328edb556b085b5a4c29 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 14:55:18 -0700 Subject: [PATCH 19/30] fix --- .github/workflows/build_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index fef3db0c..8e4cebcd 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -94,7 +94,7 @@ jobs: - name: Run tests with Docker Compose run: | - docker compose -f docker-compose.yaml run + docker compose -f docker-compose.yaml run \ -e AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \ -e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \ test pytest tests/ From 67f29b50eb4b033274ff299393ad2704f54b2942 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Mon, 21 Oct 2024 22:17:21 +0000 Subject: [PATCH 20/30] makes path arguments as optional --- rslp/landsat_vessels/api_main.py | 6 ++--- rslp/landsat_vessels/predict_pipeline.py | 20 ++++++++++++++++ .../landsat_vessels/test_fastapi.py | 23 ++++++------------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/rslp/landsat_vessels/api_main.py b/rslp/landsat_vessels/api_main.py index 4b424016..41938437 100644 --- a/rslp/landsat_vessels/api_main.py +++ b/rslp/landsat_vessels/api_main.py @@ -45,9 +45,9 @@ class LandsatRequest(BaseModel): scene_id: str | None = None image_files: dict[str, str] | None = None - crop_path: str - scratch_path: str - json_path: str + crop_path: str | None = None + scratch_path: str | None = None + json_path: str | None = None @app.on_event("startup") diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index e35b7305..d583f600 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -1,6 +1,7 @@ """Landsat vessel prediction pipeline.""" import json +import tempfile import time from datetime import datetime, timedelta from typing import Any @@ -207,6 +208,19 @@ def predict_pipeline( start_time = time.time() # Start the timer time_profile = {} + # Use temporary directory if scratch_path or crop_path are not specified. + if scratch_path is None: + with tempfile.TemporaryDirectory() as tmp_scratch_dir: + scratch_path = tmp_scratch_dir + else: + tmp_scratch_dir = None + + if crop_path is None: + with tempfile.TemporaryDirectory() as tmp_crop_dir: + crop_path = tmp_crop_dir + else: + tmp_crop_dir = None + ds_path = UPath(scratch_path) ds_path.mkdir(parents=True, exist_ok=True) @@ -346,6 +360,12 @@ def predict_pipeline( elapsed_time = time.time() - start_time # Calculate elapsed time time_profile["total"] = elapsed_time + # Clean up any temporary directories. + if tmp_scratch_dir: + tmp_scratch_dir.cleanup() + if tmp_crop_dir: + tmp_crop_dir.cleanup() + if json_path: json_path = UPath(json_path) with json_path.open("w") as f: diff --git a/tests/integration/landsat_vessels/test_fastapi.py b/tests/integration/landsat_vessels/test_fastapi.py index 94bbb257..38e0c5bb 100644 --- a/tests/integration/landsat_vessels/test_fastapi.py +++ b/tests/integration/landsat_vessels/test_fastapi.py @@ -1,5 +1,3 @@ -import tempfile - from fastapi.testclient import TestClient from rslp.landsat_vessels.api_main import app @@ -10,17 +8,10 @@ def test_singapore_dense_scene(): # LC08_L1TP_125059_20240913_20240920_02_T1 is a scene that includes southeast coast # of Singapore where there are hundreds of vessels. - with tempfile.TemporaryDirectory() as tmp_dir: - response = client.post( - "/detections", - json={ - "scene_id": "LC08_L1TP_125059_20240913_20240920_02_T1", - "scratch_path": tmp_dir, - "json_path": "", - "crop_path": "", - }, - ) - assert response.status_code == 200 - predictions = response.json()["predictions"] - # There are many correct vessels in this scene. - assert len(predictions) >= 100 + response = client.post( + "/detections", json={"scene_id": "LC08_L1TP_125059_20240913_20240920_02_T1"} + ) + assert response.status_code == 200 + predictions = response.json()["predictions"] + # There are many correct vessels in this scene. + assert len(predictions) >= 100 From 3f546bcd094bfaa174fda4b466e3f2dc7d04d2aa Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 15:20:26 -0700 Subject: [PATCH 21/30] add gcs access --- .github/workflows/build_test.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 8e4cebcd..c74d1073 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -91,12 +91,19 @@ jobs: run: | COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker compose -f docker-compose.yaml build + - name: Authenticate into gcp + uses: "google-github-actions/auth@v2" + with: + credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} - name: Run tests with Docker Compose run: | docker compose -f docker-compose.yaml run \ -e AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \ -e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \ + -v ${{env.GOOGLE_GHA_CREDS_PATH}}:/tmp/gcp-credentials.json:ro \ + -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json \ + -e RSLP_BUCKET rslearn-eai \ test pytest tests/ - name: Clean up From 2fd1847bc1fc5f68db516c07615af5c2369b8c57 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Mon, 21 Oct 2024 22:24:21 +0000 Subject: [PATCH 22/30] make sure either scene_id or image_files are provided --- rslp/landsat_vessels/api_main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/rslp/landsat_vessels/api_main.py b/rslp/landsat_vessels/api_main.py index 41938437..42358344 100644 --- a/rslp/landsat_vessels/api_main.py +++ b/rslp/landsat_vessels/api_main.py @@ -72,8 +72,15 @@ async def home() -> dict: @app.post("/detections", response_model=LandsatResponse) async def get_detections(info: LandsatRequest, response: Response) -> LandsatResponse: """Returns vessel detections Response object for a given Request object.""" + # Ensure that either scene_id or image_files is specified. + if info.scene_id is None and info.image_files is None: + raise ValueError("Either scene_id or image_files must be specified.") + try: - logger.info(f"Received request with scene_id: {info.scene_id}") + if info.scene_id is not None: + logger.info(f"Received request with scene_id: {info.scene_id}") + elif info.image_files is not None: + logger.info("Received request with image_files") json_data = predict_pipeline( crop_path=info.crop_path, scene_id=info.scene_id, From 5a1149c74603941e4868dc78897f7c44f94b62bf Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Mon, 21 Oct 2024 22:28:41 +0000 Subject: [PATCH 23/30] write crops only if specified path --- rslp/landsat_vessels/predict_pipeline.py | 36 ++++++++++-------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index d583f600..848ab70d 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -210,16 +210,10 @@ def predict_pipeline( # Use temporary directory if scratch_path or crop_path are not specified. if scratch_path is None: - with tempfile.TemporaryDirectory() as tmp_scratch_dir: - scratch_path = tmp_scratch_dir + with tempfile.TemporaryDirectory() as tmp_dir: + scratch_path = tmp_dir else: - tmp_scratch_dir = None - - if crop_path is None: - with tempfile.TemporaryDirectory() as tmp_crop_dir: - crop_path = tmp_crop_dir - else: - tmp_crop_dir = None + tmp_dir = None ds_path = UPath(scratch_path) ds_path.mkdir(parents=True, exist_ok=True) @@ -296,8 +290,9 @@ def predict_pipeline( # Write JSON and crops. step_start_time = time.time() - crop_path = UPath(crop_path) - crop_path.mkdir(parents=True, exist_ok=True) + if crop_path: + crop_path = UPath(crop_path) + crop_path.mkdir(parents=True, exist_ok=True) json_data = [] for idx, detection in enumerate(detections): @@ -329,13 +324,14 @@ def predict_pipeline( [images["B4_sharp"], images["B3_sharp"], images["B2_sharp"]], axis=2 ) - rgb_fname = crop_path / f"{idx}_rgb.png" - with rgb_fname.open("wb") as f: - Image.fromarray(rgb).save(f, format="PNG") + if crop_path: + rgb_fname = crop_path / f"{idx}_rgb.png" + with rgb_fname.open("wb") as f: + Image.fromarray(rgb).save(f, format="PNG") - b8_fname = crop_path / f"{idx}_b8.png" - with b8_fname.open("wb") as f: - Image.fromarray(images["B8"]).save(f, format="PNG") + b8_fname = crop_path / f"{idx}_b8.png" + with b8_fname.open("wb") as f: + Image.fromarray(images["B8"]).save(f, format="PNG") # Get longitude/latitude. src_geom = STGeometry( @@ -361,10 +357,8 @@ def predict_pipeline( time_profile["total"] = elapsed_time # Clean up any temporary directories. - if tmp_scratch_dir: - tmp_scratch_dir.cleanup() - if tmp_crop_dir: - tmp_crop_dir.cleanup() + if tmp_dir: + tmp_dir.cleanup() if json_path: json_path = UPath(json_path) From ee1665029fc175c6daa5eaaf2f0799b0e061d80f Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 16:02:10 -0700 Subject: [PATCH 24/30] fix bug with b8_fname not defined and tmp_dir.cleanup not exist --- rslp/landsat_vessels/predict_pipeline.py | 37 ++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index 848ab70d..99de4dd2 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -210,8 +210,8 @@ def predict_pipeline( # Use temporary directory if scratch_path or crop_path are not specified. if scratch_path is None: - with tempfile.TemporaryDirectory() as tmp_dir: - scratch_path = tmp_dir + tmp_dir = tempfile.TemporaryDirectory() + scratch_path = tmp_dir.name else: tmp_dir = None @@ -296,6 +296,20 @@ def predict_pipeline( json_data = [] for idx, detection in enumerate(detections): + # Get longitude/latitude. + src_geom = STGeometry( + detection.projection, shapely.Point(detection.col, detection.row), None + ) + dst_geom = src_geom.to_projection(WGS84_PROJECTION) + lon = dst_geom.shp.x + lat = dst_geom.shp.y + + json_dict = dict( + longitude=lon, + latitude=lat, + score=detection.score, + ) + # Load crops from the window directory. images = {} for band in ["B2", "B3", "B4", "B8"]: @@ -333,23 +347,10 @@ def predict_pipeline( with b8_fname.open("wb") as f: Image.fromarray(images["B8"]).save(f, format="PNG") - # Get longitude/latitude. - src_geom = STGeometry( - detection.projection, shapely.Point(detection.col, detection.row), None - ) - dst_geom = src_geom.to_projection(WGS84_PROJECTION) - lon = dst_geom.shp.x - lat = dst_geom.shp.y + json_dict["rgb_fname"] = str(rgb_fname) + json_dict["b8_fname"] = str(b8_fname) - json_data.append( - dict( - longitude=lon, - latitude=lat, - score=detection.score, - rgb_fname=str(rgb_fname), - b8_fname=str(b8_fname), - ) - ) + json_data.append(json_dict) time_profile["write_json_and_crops"] = time.time() - step_start_time From dd674b9175e441146924acd9dc4af45aa2a532ee Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 21:26:42 -0400 Subject: [PATCH 25/30] fix --- .github/workflows/build_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index c74d1073..859b93ce 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -103,7 +103,7 @@ jobs: -e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \ -v ${{env.GOOGLE_GHA_CREDS_PATH}}:/tmp/gcp-credentials.json:ro \ -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json \ - -e RSLP_BUCKET rslearn-eai \ + -e RSLP_BUCKET=rslearn-eai \ test pytest tests/ - name: Clean up From ec987afe686eafc5d71e2b4af1b3c1555d15d561 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 21:57:53 -0400 Subject: [PATCH 26/30] fix --- rslp/landsat_vessels/predict_pipeline.py | 36 +++++++++++++----------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index 99de4dd2..ac78694d 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -296,20 +296,6 @@ def predict_pipeline( json_data = [] for idx, detection in enumerate(detections): - # Get longitude/latitude. - src_geom = STGeometry( - detection.projection, shapely.Point(detection.col, detection.row), None - ) - dst_geom = src_geom.to_projection(WGS84_PROJECTION) - lon = dst_geom.shp.x - lat = dst_geom.shp.y - - json_dict = dict( - longitude=lon, - latitude=lat, - score=detection.score, - ) - # Load crops from the window directory. images = {} for band in ["B2", "B3", "B4", "B8"]: @@ -346,11 +332,27 @@ def predict_pipeline( b8_fname = crop_path / f"{idx}_b8.png" with b8_fname.open("wb") as f: Image.fromarray(images["B8"]).save(f, format="PNG") + else: + rgb_fname = "" + b8_fname = "" - json_dict["rgb_fname"] = str(rgb_fname) - json_dict["b8_fname"] = str(b8_fname) + # Get longitude/latitude. + src_geom = STGeometry( + detection.projection, shapely.Point(detection.col, detection.row), None + ) + dst_geom = src_geom.to_projection(WGS84_PROJECTION) + lon = dst_geom.shp.x + lat = dst_geom.shp.y - json_data.append(json_dict) + json_data.append( + dict( + longitude=lon, + latitude=lat, + score=detection.score, + rgb_fname=rgb_fname, + b8_fname=b8_fname, + ) + ) time_profile["write_json_and_crops"] = time.time() - step_start_time From d505f18e5265fab6ed7d94a3e0406ecd4a08c785 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 21 Oct 2024 22:06:49 -0400 Subject: [PATCH 27/30] don't run slow test in CI --- .github/workflows/build_test.yaml | 2 +- .../landsat_vessels/test_fastapi.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{integration => integration_slow}/landsat_vessels/test_fastapi.py (100%) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 859b93ce..fa6fbb09 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -104,7 +104,7 @@ jobs: -v ${{env.GOOGLE_GHA_CREDS_PATH}}:/tmp/gcp-credentials.json:ro \ -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json \ -e RSLP_BUCKET=rslearn-eai \ - test pytest tests/ + test pytest tests/ --ignore tests/integration_slow/ - name: Clean up if: always() diff --git a/tests/integration/landsat_vessels/test_fastapi.py b/tests/integration_slow/landsat_vessels/test_fastapi.py similarity index 100% rename from tests/integration/landsat_vessels/test_fastapi.py rename to tests/integration_slow/landsat_vessels/test_fastapi.py From e2790b761835b58889dfea540b9a27117325bcc5 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Tue, 22 Oct 2024 09:58:17 -0700 Subject: [PATCH 28/30] fix lint issues --- rslp/landsat_vessels/Dockerfile | 2 +- rslp/landsat_vessels/api_main.py | 18 +++++------------ rslp/landsat_vessels/predict_pipeline.py | 20 ++++++++++++++----- .../landsat_vessels/test_fastapi.py | 2 +- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/rslp/landsat_vessels/Dockerfile b/rslp/landsat_vessels/Dockerfile index 0e46ab55..58a788b4 100644 --- a/rslp/landsat_vessels/Dockerfile +++ b/rslp/landsat_vessels/Dockerfile @@ -9,4 +9,4 @@ ENV LANDSAT_PORT=5555 EXPOSE $LANDSAT_PORT # Run app.py when the container launches -CMD ["python3", "rslp/landsat_vessels/api_main.py"] \ No newline at end of file +CMD ["python3", "rslp/landsat_vessels/api_main.py"] diff --git a/rslp/landsat_vessels/api_main.py b/rslp/landsat_vessels/api_main.py index 42358344..c1e3bf23 100644 --- a/rslp/landsat_vessels/api_main.py +++ b/rslp/landsat_vessels/api_main.py @@ -9,9 +9,8 @@ import uvicorn from fastapi import FastAPI, Response from pydantic import BaseModel -from typing_extensions import TypedDict -from rslp.landsat_vessels import predict_pipeline +from rslp.landsat_vessels.predict_pipeline import FormattedPrediction, predict_pipeline app = FastAPI() @@ -23,16 +22,6 @@ LANDSAT_PORT = 5555 -class FormattedPrediction(TypedDict): - """Formatted prediction for a single vessel detection.""" - - latitude: float - longitude: float - score: float - rgb_fname: str - b8_fname: str - - class LandsatResponse(BaseModel): """Response object for vessel detections.""" @@ -88,7 +77,10 @@ async def get_detections(info: LandsatRequest, response: Response) -> LandsatRes scratch_path=info.scratch_path, json_path=info.json_path, ) - return LandsatResponse(status=["success"], predictions=json_data) + return LandsatResponse( + status=["success"], + predictions=[pred for pred in json_data], + ) except ValueError as e: logger.error(f"Value error during prediction pipeline: {e}") return LandsatResponse(status=["error"], predictions=[]) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index 791513fa..b18a7834 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -4,7 +4,6 @@ import tempfile import time from datetime import datetime, timedelta -from typing import Any import numpy as np import rasterio @@ -17,6 +16,7 @@ from rslearn.dataset import Dataset, Window from rslearn.utils import Projection, STGeometry from rslearn.utils.get_utm_ups_crs import get_utm_ups_projection +from typing_extensions import TypedDict from upath import UPath from rslp.utils.rslearn import materialize_dataset, run_model_predict @@ -57,6 +57,16 @@ def __init__( self.crop_window_dir = crop_window_dir +class FormattedPrediction(TypedDict): + """Formatted prediction for a single vessel detection.""" + + latitude: float + longitude: float + score: float + rgb_fname: str + b8_fname: str + + def get_vessel_detections( ds_path: UPath, projection: Projection, @@ -183,12 +193,12 @@ def run_classifier( def predict_pipeline( - crop_path: str, + crop_path: str | None = None, scratch_path: str | None = None, json_path: str | None = None, image_files: dict[str, str] | None = None, scene_id: str | None = None, -) -> list[dict[str, Any]]: +) -> list[FormattedPrediction]: """Run the Landsat vessel prediction pipeline. This inputs a Landsat scene (consisting of per-band GeoTIFFs) and produces the @@ -349,13 +359,13 @@ def predict_pipeline( lat = dst_geom.shp.y json_data.append( - dict( + FormattedPrediction( longitude=lon, latitude=lat, score=detection.score, rgb_fname=rgb_fname, b8_fname=b8_fname, - ) + ), ) time_profile["write_json_and_crops"] = time.time() - step_start_time diff --git a/tests/integration_slow/landsat_vessels/test_fastapi.py b/tests/integration_slow/landsat_vessels/test_fastapi.py index 38e0c5bb..62516101 100644 --- a/tests/integration_slow/landsat_vessels/test_fastapi.py +++ b/tests/integration_slow/landsat_vessels/test_fastapi.py @@ -5,7 +5,7 @@ client = TestClient(app) -def test_singapore_dense_scene(): +def test_singapore_dense_scene() -> None: # LC08_L1TP_125059_20240913_20240920_02_T1 is a scene that includes southeast coast # of Singapore where there are hundreds of vessels. response = client.post( From eb0b5a51ed8abe24a1a76dcad013b01934d2a250 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Tue, 22 Oct 2024 17:09:08 +0000 Subject: [PATCH 29/30] minor change --- rslp/landsat_vessels/predict_pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rslp/landsat_vessels/predict_pipeline.py b/rslp/landsat_vessels/predict_pipeline.py index ac78694d..a83e130b 100644 --- a/rslp/landsat_vessels/predict_pipeline.py +++ b/rslp/landsat_vessels/predict_pipeline.py @@ -208,7 +208,6 @@ def predict_pipeline( start_time = time.time() # Start the timer time_profile = {} - # Use temporary directory if scratch_path or crop_path are not specified. if scratch_path is None: tmp_dir = tempfile.TemporaryDirectory() scratch_path = tmp_dir.name From b1cea8935510807d7f23f900bb0d41c309efc926 Mon Sep 17 00:00:00 2001 From: yawenzzzz Date: Tue, 22 Oct 2024 17:47:18 +0000 Subject: [PATCH 30/30] reduce batch_size to resolve the error: received 0 items of ancdata --- landsat/recheck_landsat_labels/phase123_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/landsat/recheck_landsat_labels/phase123_config.yaml b/landsat/recheck_landsat_labels/phase123_config.yaml index b984687d..af416441 100644 --- a/landsat/recheck_landsat_labels/phase123_config.yaml +++ b/landsat/recheck_landsat_labels/phase123_config.yaml @@ -57,7 +57,7 @@ data: input_mapping: class: label: "targets" - batch_size: 64 + batch_size: 32 num_workers: 32 default_config: transforms: