allenai
diff --git a/‎rslp/landsat_vessels/predict_pipeline.py‎
Lines changed: 50 additions & 15 deletions b/‎rslp/landsat_vessels/predict_pipeline.py‎
Lines changed: 50 additions & 15 deletions
diff --git a/‎rslp/launch_beaker.py‎
Lines changed: 6 additions & 2 deletions b/‎rslp/launch_beaker.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎rslp/lightning_cli.py‎
Lines changed: 1 addition & 0 deletions b/‎rslp/lightning_cli.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎rslp/sentinel2_vessels/predict_pipeline.py‎
Lines changed: 29 additions & 18 deletions b/‎rslp/sentinel2_vessels/predict_pipeline.py‎
Lines changed: 29 additions & 18 deletions
diff --git a/‎rslp/transforms/__init__.py‎
Lines changed: 7 additions & 0 deletions b/‎rslp/transforms/__init__.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎rslp/utils/__init__.py‎
Lines changed: 7 additions & 0 deletions b/‎rslp/utils/__init__.py‎
Lines changed: 7 additions & 0 deletions
@@ -11,16 +11,17 @@
 import shapely
 from PIL import Image
 from rslearn.const import WGS84_PROJECTION
-from rslearn.data_sources import data_source_from_config
+from rslearn.data_sources import Item, data_source_from_config
 from rslearn.data_sources.aws_landsat import LandsatOliTirs
-from rslearn.dataset import Dataset, Window
+from rslearn.dataset import Dataset, Window, WindowLayerData
 from rslearn.utils import Projection, STGeometry
 from rslearn.utils.get_utm_ups_crs import get_utm_ups_projection
 from typing_extensions import TypedDict
 from upath import UPath
 
 from rslp.utils.rslearn import materialize_dataset, run_model_predict
 
+LANDSAT_LAYER_NAME = "landsat"
 LOCAL_FILES_DATASET_CONFIG = "data/landsat_vessels/predict_dataset_config.json"
 AWS_DATASET_CONFIG = "data/landsat_vessels/predict_dataset_config_aws.json"
 DETECT_MODEL_CONFIG = "data/landsat_vessels/config.yaml"
@@ -72,6 +73,7 @@ def get_vessel_detections(
     projection: Projection,
     bounds: tuple[int, int, int, int],
     time_range: tuple[datetime, datetime] | None = None,
+    item: Item | None = None,
 ) -> list[VesselDetection]:
     """Apply the vessel detector.
 
@@ -85,22 +87,30 @@ def get_vessel_detections(
         bounds: the bounds to apply the detector in.
         time_range: optional time range to apply the detector in (in case the data
             source needs an actual time range).
+        item: only ingest this item. This is set if we are getting the scene directly
+            from a Landsat data source, not local file.
     """
     # Create a window for applying detector.
     group = "default"
     window_path = ds_path / "windows" / group / "default"
-    Window(
+    window = Window(
         path=window_path,
         group=group,
         name="default",
         projection=projection,
         bounds=bounds,
         time_range=time_range,
-    ).save()
+    )
+    window.save()
+
+    # Restrict to the item if set.
+    if item:
+        layer_data = WindowLayerData(LANDSAT_LAYER_NAME, [[item.serialize()]])
+        window.save_layer_datas(dict(LANDSAT_LAYER_NAME=layer_data))
 
     print("materialize dataset")
     materialize_dataset(ds_path, group=group)
-    assert (window_path / "layers" / "landsat" / "B8" / "geotiff.tif").exists()
+    assert (window_path / "layers" / LANDSAT_LAYER_NAME / "B8" / "geotiff.tif").exists()
 
     # Run object detector.
     run_model_predict(DETECT_MODEL_CONFIG, ds_path)
@@ -131,6 +141,7 @@ def run_classifier(
     ds_path: UPath,
     detections: list[VesselDetection],
     time_range: tuple[datetime, datetime] | None = None,
+    item: Item | None = None,
 ) -> list[VesselDetection]:
     """Run the classifier to try to prune false positive detections.
 
@@ -140,6 +151,7 @@ def run_classifier(
         detections: the detections from the detector.
         time_range: optional time range to apply the detector in (in case the data
             source needs an actual time range).
+        item: only ingest this item.
 
     Returns:
         the subset of detections that pass the classifier.
@@ -161,20 +173,27 @@ def run_classifier(
             detection.col + CLASSIFY_WINDOW_SIZE // 2,
             detection.row + CLASSIFY_WINDOW_SIZE // 2,
         ]
-        Window(
+        window = Window(
             path=window_path,
             group=group,
             name=window_name,
             projection=detection.projection,
             bounds=bounds,
             time_range=time_range,
-        ).save()
+        )
+        window.save()
         window_paths.append(window_path)
 
+        if item:
+            layer_data = WindowLayerData(LANDSAT_LAYER_NAME, [[item.serialize()]])
+            window.save_layer_datas(dict(LANDSAT_LAYER_NAME=layer_data))
+
     print("materialize dataset")
     materialize_dataset(ds_path, group=group)
     for window_path in window_paths:
-        assert (window_path / "layers" / "landsat" / "B8" / "geotiff.tif").exists()
+        assert (
+            window_path / "layers" / LANDSAT_LAYER_NAME / "B8" / "geotiff.tif"
+        ).exists()
 
     # Run classification model.
     run_model_predict(CLASSIFY_MODEL_CONFIG, ds_path)
@@ -225,6 +244,7 @@ def predict_pipeline(
 
     ds_path = UPath(scratch_path)
     ds_path.mkdir(parents=True, exist_ok=True)
+    item = None
 
     if image_files:
         # Setup the dataset configuration file with the provided image files.
@@ -238,7 +258,7 @@ def predict_pipeline(
             cfg["src_dir"] = str(UPath(image_path).parent)
             item_spec["fnames"].append(image_path)
             item_spec["bands"].append([band])
-        cfg["layers"]["landsat"]["data_source"]["item_specs"] = [item_spec]
+        cfg["layers"][LANDSAT_LAYER_NAME]["data_source"]["item_specs"] = [item_spec]
 
         with (ds_path / "config.json").open("w") as f:
             json.dump(cfg, f)
@@ -251,7 +271,12 @@ def predict_pipeline(
                 )
                 left = int(raster.transform.c / projection.x_resolution)
                 top = int(raster.transform.f / projection.y_resolution)
-                scene_bounds = [left, top, left + raster.width, top + raster.height]
+                scene_bounds = (
+                    left,
+                    top,
+                    left + int(raster.width),
+                    top + int(raster.height),
+                )
 
         time_range = None
 
@@ -264,7 +289,7 @@ def predict_pipeline(
         # Get the projection and scene bounds using the Landsat data source.
         dataset = Dataset(ds_path)
         data_source: LandsatOliTirs = data_source_from_config(
-            dataset.layers["landsat"], dataset.path
+            dataset.layers[LANDSAT_LAYER_NAME], dataset.path
         )
         item = data_source.get_item_by_name(scene_id)
         wgs84_geom = item.geometry.to_projection(WGS84_PROJECTION)
@@ -275,7 +300,12 @@ def predict_pipeline(
             -LANDSAT_RESOLUTION,
         )
         dst_geom = item.geometry.to_projection(projection)
-        scene_bounds = [int(value) for value in dst_geom.shp.bounds]
+        scene_bounds = (
+            int(dst_geom.shp.bounds[0]),
+            int(dst_geom.shp.bounds[1]),
+            int(dst_geom.shp.bounds[2]),
+            int(dst_geom.shp.bounds[3]),
+        )
         time_range = (
             dst_geom.time_range[0] - timedelta(minutes=30),
             dst_geom.time_range[1] + timedelta(minutes=30),
@@ -289,14 +319,15 @@ def predict_pipeline(
     detections = get_vessel_detections(
         ds_path,
         projection,
-        scene_bounds,  # type: ignore
+        scene_bounds,
         time_range=time_range,
+        item=item,
     )
     time_profile["get_vessel_detections"] = time.time() - step_start_time
 
     step_start_time = time.time()
     print("run classifier")
-    detections = run_classifier(ds_path, detections, time_range=time_range)
+    detections = run_classifier(ds_path, detections, time_range=time_range, item=item)
     time_profile["run_classifier"] = time.time() - step_start_time
 
     # Write JSON and crops.
@@ -313,7 +344,11 @@ def predict_pipeline(
             raise ValueError("Crop window directory is None")
         for band in ["B2", "B3", "B4", "B8"]:
             image_fname = (
-                detection.crop_window_dir / "layers" / "landsat" / band / "geotiff.tif"
+                detection.crop_window_dir
+                / "layers"
+                / LANDSAT_LAYER_NAME
+                / band
+                / "geotiff.tif"
             )
             with image_fname.open("rb") as f:
                 with rasterio.open(f) as src:
 
@@ -57,13 +57,17 @@ def launch_job(
                 value="prior-satlas",  # nosec
             ),
             EnvVar(
-                name="S3_ACCESS_KEY_ID",  # nosec
+                name="WEKA_ACCESS_KEY_ID",  # nosec
                 secret="RSLEARN_WEKA_KEY",  # nosec
             ),
             EnvVar(
-                name="S3_SECRET_ACCESS_KEY",  # nosec
+                name="WEKA_SECRET_ACCESS_KEY",  # nosec
                 secret="RSLEARN_WEKA_SECRET",  # nosec
             ),
+            EnvVar(
+                name="WEKA_ENDPOINT_URL",  # nosec
+                value="https://weka-aus.beaker.org:9000",  # nosec
+            ),
             EnvVar(
                 name="RSLP_PROJECT",  # nosec
                 value=project_id,
 
@@ -10,6 +10,7 @@
 from rslearn.main import RslearnLightningCLI
 from upath import UPath
 
+import rslp.utils.fs  # noqa: F401 (imported but unused)
 from rslp import launcher_lib
 
 CHECKPOINT_DIR = "gs://{rslp_bucket}/projects/{project_id}/{experiment_id}/checkpoints/"
 
@@ -8,15 +8,16 @@
 import shapely
 from PIL import Image
 from rslearn.const import WGS84_PROJECTION
-from rslearn.data_sources import data_source_from_config
+from rslearn.data_sources import Item, data_source_from_config
 from rslearn.data_sources.gcp_public_data import Sentinel2
-from rslearn.dataset import Dataset, Window
+from rslearn.dataset import Dataset, Window, WindowLayerData
 from rslearn.utils import Projection, STGeometry
 from rslearn.utils.get_utm_ups_crs import get_utm_ups_projection
 from upath import UPath
 
 from rslp.utils.rslearn import materialize_dataset, run_model_predict
 
+SENTINEL2_LAYER_NAME = "sentinel2"
 DATASET_CONFIG = "data/sentinel2_vessels/config.json"
 DETECT_MODEL_CONFIG = "data/sentinel2_vessels/config.yaml"
 SENTINEL2_RESOLUTION = 10
@@ -59,6 +60,7 @@ def get_vessel_detections(
     projection: Projection,
     bounds: tuple[int, int, int, int],
     ts: datetime,
+    item: Item,
 ) -> list[VesselDetection]:
     """Apply the vessel detector.
 
@@ -71,22 +73,30 @@ def get_vessel_detections(
         projection: the projection to apply the detector in.
         bounds: the bounds to apply the detector in.
         ts: timestamp to apply the detector on.
+        item: the item to ingest.
     """
     # Create a window for applying detector.
     group = "detector_predict"
     window_path = ds_path / "windows" / group / "default"
-    Window(
+    window = Window(
         path=window_path,
         group=group,
         name="default",
         projection=projection,
         bounds=bounds,
         time_range=(ts - timedelta(minutes=20), ts + timedelta(minutes=20)),
-    ).save()
+    )
+    window.save()
+
+    if item:
+        layer_data = WindowLayerData(SENTINEL2_LAYER_NAME, [[item.serialize()]])
+        window.save_layer_datas(dict(SENTINEL2_LAYER_NAME=layer_data))
 
     print("materialize dataset")
     materialize_dataset(ds_path, group=group, workers=1)
-    assert (window_path / "layers" / "sentinel2" / "R_G_B" / "geotiff.tif").exists()
+    assert (
+        window_path / "layers" / SENTINEL2_LAYER_NAME / "R_G_B" / "geotiff.tif"
+    ).exists()
 
     # Run object detector.
     run_model_predict(DETECT_MODEL_CONFIG, ds_path)
@@ -141,7 +151,7 @@ def predict_pipeline(
     # Determine the bounds and timestamp of this scene using the data source.
     dataset = Dataset(ds_path)
     data_source: Sentinel2 = data_source_from_config(
-        dataset.layers["sentinel2"], dataset.path
+        dataset.layers[SENTINEL2_LAYER_NAME], dataset.path
     )
     item = data_source.get_item_by_name(scene_id)
     wgs84_geom = item.geometry.to_projection(WGS84_PROJECTION)
@@ -152,12 +162,15 @@ def predict_pipeline(
         -SENTINEL2_RESOLUTION,
     )
     dst_geom = item.geometry.to_projection(projection)
-    bounds = tuple(int(value) for value in dst_geom.shp.bounds)
-    if len(bounds) != 4:
-        raise ValueError(f"Expected 4 bounds, got {len(bounds)}")
+    bounds = (
+        int(dst_geom.shp.bounds[0]),
+        int(dst_geom.shp.bounds[1]),
+        int(dst_geom.shp.bounds[2]),
+        int(dst_geom.shp.bounds[3]),
+    )
     ts = item.geometry.time_range[0]
 
-    detections = get_vessel_detections(ds_path, projection, bounds, ts)  # type: ignore
+    detections = get_vessel_detections(ds_path, projection, bounds, ts, item)
 
     # Create windows just to collect crops for each detection.
     group = "crops"
@@ -166,13 +179,11 @@ def predict_pipeline(
         window_name = f"{detection.col}_{detection.row}"
         window_path = ds_path / "windows" / group / window_name
         detection.crop_window_dir = window_path
-        bounds = tuple(
-            [
-                detection.col - CROP_WINDOW_SIZE // 2,
-                detection.row - CROP_WINDOW_SIZE // 2,
-                detection.col + CROP_WINDOW_SIZE // 2,
-                detection.row + CROP_WINDOW_SIZE // 2,
-            ]
+        bounds = (
+            detection.col - CROP_WINDOW_SIZE // 2,
+            detection.row - CROP_WINDOW_SIZE // 2,
+            detection.col + CROP_WINDOW_SIZE // 2,
+            detection.row + CROP_WINDOW_SIZE // 2,
         )
         Window(
             path=window_path,
@@ -193,7 +204,7 @@ def predict_pipeline(
     for detection, crop_window_path in zip(detections, window_paths):
         # Get RGB crop.
         image_fname = (
-            crop_window_path / "layers" / "sentinel2" / "R_G_B" / "geotiff.tif"
+            crop_window_path / "layers" / SENTINEL2_LAYER_NAME / "R_G_B" / "geotiff.tif"
         )
         with image_fname.open("rb") as f:
             with rasterio.open(f) as src:
 
@@ -0,0 +1,7 @@
+"""rslp transforms.
+
+These transforms should be ones that are not general enough to include in rslearn, but
+still relevant across multiple rslp projects.
+
+If it is project-specific, it should go in rslp/[project_name]/train.py or similar.
+"""
@@ -0,0 +1,7 @@
+"""rslp utilities.
+
+These utilities should be ones that are not general enough to include in rslearn, but
+still relevant across multiple rslp projects.
+
+If it is project-specific, it should go in rslp/[project_name]/util.py or similar.
+"""