microsoft · isaaccorley · Dec 17, 2023 · Dec 12, 2023 · Dec 12, 2023 · Dec 13, 2023
diff --git a/docs/api/datamodules.rst b/docs/api/datamodules.rst
@@ -85,6 +85,16 @@ LandCover.ai
 
 .. autoclass:: LandCoverAIDataModule
 
+LEVIR-CD
+^^^^^^^^
+
+.. autoclass:: LEVIRCDDataModule
+
+LEVIR-CD+
+^^^^^^^^^
+
+.. autoclass:: LEVIRCDPlusDataModule
+
 LoveDA
 ^^^^^^
 

diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
@@ -267,6 +267,12 @@ LandCover.ai
 
 .. autoclass:: LandCoverAI
 
+LEVIR-CD
+^^^^^^^^
+
+.. autoclass:: LEVIRCDBase
+.. autoclass:: LEVIRCD
+
 LEVIR-CD+
 ^^^^^^^^^
 

diff --git a/docs/api/non_geo_datasets.csv b/docs/api/non_geo_datasets.csv
@@ -18,6 +18,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
 `IDTReeS`_,"OD,C",Aerial,"CC-BY-4.0",591,33,200x200,0.1--1,RGB
 `Inria Aerial Image Labeling`_,S,Aerial,-,360,2,"5,000x5,000",0.3,RGB
 `LandCover.ai`_,S,Aerial,"CC-BY-NC-SA-4.0","10,674",5,512x512,0.25--0.5,RGB
+`LEVIR-CD`_,CD,Google Earth,-,637,2,"1,024x1,024",0.5,RGB
 `LEVIR-CD+`_,CD,Google Earth,-,985,2,"1,024x1,024",0.5,RGB
 `LoveDA`_,S,Google Earth,"CC-BY-NC-SA-4.0","5,987",7,"1,024x1,024",0.3,RGB
 `MapInWild`_,S,"Sentinel-1/2, ESA WorldCover, NOAA VIIRS DNB","CC-BY-4.0",1018,1,1920x1920,10--463.83,"SAR, MSI, 2020_Map, avg_rad"

diff --git a/tests/data/levircd/LEVIR-CD+.zip b/tests/data/levircd/LEVIR-CD+.zip
diff --git a/tests/data/levircd/levircd/data.py b/tests/data/levircd/levircd/data.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import hashlib
+import os
+import shutil
+import zipfile
+
+import numpy as np
+from PIL import Image
+
+np.random.seed(0)
+
+
+def create_image(path: str) -> None:
+    Z = np.random.randint(255, size=(1, 1, 3), dtype=np.uint8)
+    img = Image.fromarray(Z).convert("RGB")
+    img.save(path)
+
+
+def create_mask(path: str) -> None:
+    Z = np.random.randint(2, size=(1, 1, 3), dtype=np.uint8) * 255
+    img = Image.fromarray(Z).convert("L")
+    img.save(path)
+
+
+if __name__ == "__main__":
+    splits = ["train", "val", "test"]
+    filenames = ["train.zip", "val.zip", "test.zip"]
+    directories = ["A", "B", "label"]
+
+    for split, filename in zip(splits, filenames):
+        for directory in directories:
+            os.mkdir(directory)
+
+        for i in range(2):
+            path = os.path.join("A", f"{split}_{i}.png")
+            create_image(path)
+
+            path = os.path.join("B", f"{split}_{i}.png")
+            create_image(path)
+
+            path = os.path.join("label", f"{split}_{i}.png")
+            create_mask(path)
+
+        # compress data
+        with zipfile.ZipFile(filename, mode="a") as f:
+            for directory in directories:
+                for file in os.listdir(directory):
+                    f.write(os.path.join(directory, file))
+
+        for directory in directories:
+            shutil.rmtree(directory)
+
+        # compute checksum
+        with open(filename, "rb") as f:
+            md5 = hashlib.md5(f.read()).hexdigest()
+            print(f"{filename}: {md5}")
diff --git a/tests/data/levircd/levircd/test.zip b/tests/data/levircd/levircd/test.zip
diff --git a/tests/data/levircd/levircd/train.zip b/tests/data/levircd/levircd/train.zip
diff --git a/tests/data/levircd/levircd/val.zip b/tests/data/levircd/levircd/val.zip
diff --git a/tests/data/levircd/levircdplus/LEVIR-CD+.zip b/tests/data/levircd/levircdplus/LEVIR-CD+.zip
diff --git a/tests/data/levircd/levircdplus/data.py b/tests/data/levircd/levircdplus/data.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import hashlib
+import os
+import shutil
+
+import numpy as np
+from PIL import Image
+
+np.random.seed(0)
+
+
+def create_image(path: str) -> None:
+    Z = np.random.randint(255, size=(1, 1, 3), dtype=np.uint8)
+    img = Image.fromarray(Z).convert("RGB")
+    img.save(path)
+
+
+def create_mask(path: str) -> None:
+    Z = np.random.randint(2, size=(1, 1, 3), dtype=np.uint8) * 255
+    img = Image.fromarray(Z).convert("L")
+    img.save(path)
+
+
+if __name__ == "__main__":
+    root = "LEVIR-CD+"
+    splits = ["train", "test"]
+    directories = ["A", "B", "label"]
+
+    if os.path.exists(root):
+        shutil.rmtree(root)
+
+    for split in splits:
+        for directory in directories:
+            os.makedirs(os.path.join(root, split, directory))
+
+        for i in range(2):
+            folder = os.path.join(root, split, "A")
+            path = os.path.join(folder, f"0{i}.png")
+            create_image(path)
+
+            folder = os.path.join(root, split, "B")
+            path = os.path.join(folder, f"0{i}.png")
+            create_image(path)
+
+            folder = os.path.join(root, split, "label")
+            path = os.path.join(folder, f"0{i}.png")
+            create_mask(path)
+
+    # Compress data
+    shutil.make_archive(root, "zip", ".", root)
+
+    # compute checksum
+    with open(f"{root}.zip", "rb") as f:
+        md5 = hashlib.md5(f.read()).hexdigest()
+        print(f"{root}.zip: {md5}")
+
+    shutil.rmtree(root)
diff --git a/tests/datamodules/test_levircd.py b/tests/datamodules/test_levircd.py
@@ -6,32 +6,61 @@
 from pathlib import Path
 
 import pytest
+import torchvision.transforms.functional as F
 from lightning.pytorch import Trainer
 from pytest import MonkeyPatch
+from torch import Tensor
+from torchvision.transforms import InterpolationMode
 
 import torchgeo.datasets.utils
-from torchgeo.datamodules import LEVIRCDPlusDataModule
-from torchgeo.datasets import LEVIRCDPlus
+from torchgeo.datamodules import LEVIRCDDataModule, LEVIRCDPlusDataModule
+from torchgeo.datasets import LEVIRCD, LEVIRCDPlus
 
 
 def download_url(url: str, root: str, *args: str) -> None:
     shutil.copy(url, root)
 
 
+def transforms(sample: dict[str, Tensor]) -> dict[str, Tensor]:
+    sample["image1"] = F.resize(
+        sample["image1"],
+        size=[1024, 1024],
+        antialias=True,
+        interpolation=InterpolationMode.BILINEAR,
+    )
+    sample["image2"] = F.resize(
+        sample["image2"],
+        size=[1024, 1024],
+        antialias=True,
+        interpolation=InterpolationMode.BILINEAR,
+    )
+    sample["mask"] = F.resize(
+        sample["mask"].unsqueeze(dim=0),
+        size=[1024, 1024],
+        interpolation=InterpolationMode.NEAREST,
+    )
+    return sample
+
+
 class TestLEVIRCDPlusDataModule:
     @pytest.fixture
     def datamodule(
         self, monkeypatch: MonkeyPatch, tmp_path: Path
     ) -> LEVIRCDPlusDataModule:
         monkeypatch.setattr(torchgeo.datasets.utils, "download_url", download_url)
-        md5 = "1adf156f628aa32fb2e8fe6cada16c04"
+        md5 = "0ccca34310bfe7096dadfbf05b0d180f"
         monkeypatch.setattr(LEVIRCDPlus, "md5", md5)
-        url = os.path.join("tests", "data", "levircd", "LEVIR-CD+.zip")
+        url = os.path.join("tests", "data", "levircd", "levircdplus", "LEVIR-CD+.zip")
         monkeypatch.setattr(LEVIRCDPlus, "url", url)
 
         root = str(tmp_path)
         dm = LEVIRCDPlusDataModule(
-            root=root, download=True, num_workers=0, checksum=True, val_split_pct=0.5
+            root=root,
+            download=True,
+            num_workers=0,
+            checksum=True,
+            val_split_pct=0.5,
+            transforms=transforms,
         )
         dm.prepare_data()
         dm.trainer = Trainer(accelerator="cpu", max_epochs=1)
@@ -57,10 +86,14 @@ def test_val_dataloader(self, datamodule: LEVIRCDPlusDataModule) -> None:
         batch = next(iter(datamodule.val_dataloader()))
         batch = datamodule.on_after_batch_transfer(batch, 0)
         if datamodule.val_split_pct > 0.0:
-            assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
-            assert batch["image1"].shape[0] == batch["mask"].shape[0] == 8
-            assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
-            assert batch["image2"].shape[0] == batch["mask"].shape[0] == 8
+            assert (
+                batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+            )
+            assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
+            assert (
+                batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+            )
+            assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
             assert batch["image1"].shape[1] == 3
             assert batch["image2"].shape[1] == 3
 
@@ -70,9 +103,85 @@ def test_test_dataloader(self, datamodule: LEVIRCDPlusDataModule) -> None:
             datamodule.trainer.testing = True
         batch = next(iter(datamodule.test_dataloader()))
         batch = datamodule.on_after_batch_transfer(batch, 0)
+        assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+        assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
+        assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+        assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
+        assert batch["image1"].shape[1] == 3
+        assert batch["image2"].shape[1] == 3
+
+
+class TestLEVIRCDDataModule:
+    @pytest.fixture
+    def datamodule(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> LEVIRCDDataModule:
+        directory = os.path.join("tests", "data", "levircd", "levircd")
+        splits = {
+            "train": {
+                "url": os.path.join(directory, "train.zip"),
+                "filename": "train.zip",
+                "md5": "7c2e24b3072095519f1be7eb01fae4ff",
+            },
+            "val": {
+                "url": os.path.join(directory, "val.zip"),
+                "filename": "val.zip",
+                "md5": "5c320223ba88b6fc8ff9d1feebc3b84e",
+            },
+            "test": {
+                "url": os.path.join(directory, "test.zip"),
+                "filename": "test.zip",
+                "md5": "021db72d4486726d6a0702563a617b32",
+            },
+        }
+        monkeypatch.setattr(torchgeo.datasets.utils, "download_url", download_url)
+        monkeypatch.setattr(LEVIRCD, "splits", splits)
+
+        root = str(tmp_path)
+        dm = LEVIRCDDataModule(
+            root=root,
+            download=True,
+            num_workers=0,
+            checksum=True,
+            transforms=transforms,
+        )
+        dm.prepare_data()
+        dm.trainer = Trainer(accelerator="cpu", max_epochs=1)
+        return dm
+
+    def test_train_dataloader(self, datamodule: LEVIRCDDataModule) -> None:
+        datamodule.setup("fit")
+        if datamodule.trainer:
+            datamodule.trainer.training = True
+        batch = next(iter(datamodule.train_dataloader()))
+        batch = datamodule.on_after_batch_transfer(batch, 0)
         assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
         assert batch["image1"].shape[0] == batch["mask"].shape[0] == 8
         assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
         assert batch["image2"].shape[0] == batch["mask"].shape[0] == 8
         assert batch["image1"].shape[1] == 3
         assert batch["image2"].shape[1] == 3
+
+    def test_val_dataloader(self, datamodule: LEVIRCDDataModule) -> None:
+        datamodule.setup("validate")
+        if datamodule.trainer:
+            datamodule.trainer.validating = True
+        batch = next(iter(datamodule.val_dataloader()))
+        batch = datamodule.on_after_batch_transfer(batch, 0)
+        assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+        assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
+        assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+        assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
+        assert batch["image1"].shape[1] == 3
+        assert batch["image2"].shape[1] == 3
+
+    def test_test_dataloader(self, datamodule: LEVIRCDDataModule) -> None:
+        datamodule.setup("test")
+        if datamodule.trainer:
+            datamodule.trainer.testing = True
+        batch = next(iter(datamodule.test_dataloader()))
+        batch = datamodule.on_after_batch_transfer(batch, 0)
+        assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+        assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
+        assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
+        assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
+        assert batch["image1"].shape[1] == 3
+        assert batch["image2"].shape[1] == 3