Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LEVIR-CD Dataset and Datamodule #1770

Merged
merged 10 commits into from
Dec 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/api/datamodules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@ LandCover.ai

.. autoclass:: LandCoverAIDataModule

LEVIR-CD
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prob just have a single header with 2 versions

^^^^^^^^

.. autoclass:: LEVIRCDDataModule

LEVIR-CD+
^^^^^^^^^

.. autoclass:: LEVIRCDPlusDataModule

LoveDA
^^^^^^

Expand Down
6 changes: 6 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ LandCover.ai

.. autoclass:: LandCoverAI

LEVIR-CD
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

^^^^^^^^

.. autoclass:: LEVIRCDBase
.. autoclass:: LEVIRCD

LEVIR-CD+
^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`IDTReeS`_,"OD,C",Aerial,"CC-BY-4.0",591,33,200x200,0.1--1,RGB
`Inria Aerial Image Labeling`_,S,Aerial,-,360,2,"5,000x5,000",0.3,RGB
`LandCover.ai`_,S,Aerial,"CC-BY-NC-SA-4.0","10,674",5,512x512,0.25--0.5,RGB
`LEVIR-CD`_,CD,Google Earth,-,637,2,"1,024x1,024",0.5,RGB
`LEVIR-CD+`_,CD,Google Earth,-,985,2,"1,024x1,024",0.5,RGB
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could prob just combine

`LoveDA`_,S,Google Earth,"CC-BY-NC-SA-4.0","5,987",7,"1,024x1,024",0.3,RGB
`MapInWild`_,S,"Sentinel-1/2, ESA WorldCover, NOAA VIIRS DNB","CC-BY-4.0",1018,1,1920x1920,10--463.83,"SAR, MSI, 2020_Map, avg_rad"
Expand Down
Binary file removed tests/data/levircd/LEVIR-CD+.zip
Binary file not shown.
60 changes: 60 additions & 0 deletions tests/data/levircd/levircd/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil
import zipfile

import numpy as np
from PIL import Image

np.random.seed(0)


def create_image(path: str) -> None:
Z = np.random.randint(255, size=(1, 1, 3), dtype=np.uint8)
img = Image.fromarray(Z).convert("RGB")
img.save(path)


def create_mask(path: str) -> None:
Z = np.random.randint(2, size=(1, 1, 3), dtype=np.uint8) * 255
img = Image.fromarray(Z).convert("L")
img.save(path)


if __name__ == "__main__":
splits = ["train", "val", "test"]
filenames = ["train.zip", "val.zip", "test.zip"]
directories = ["A", "B", "label"]

for split, filename in zip(splits, filenames):
for directory in directories:
os.mkdir(directory)

for i in range(2):
path = os.path.join("A", f"{split}_{i}.png")
create_image(path)

path = os.path.join("B", f"{split}_{i}.png")
create_image(path)

path = os.path.join("label", f"{split}_{i}.png")
create_mask(path)

# compress data
with zipfile.ZipFile(filename, mode="a") as f:
for directory in directories:
for file in os.listdir(directory):
f.write(os.path.join(directory, file))

for directory in directories:
shutil.rmtree(directory)

# compute checksum
with open(filename, "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
print(f"{filename}: {md5}")
Binary file added tests/data/levircd/levircd/test.zip
Binary file not shown.
Binary file added tests/data/levircd/levircd/train.zip
Binary file not shown.
Binary file added tests/data/levircd/levircd/val.zip
Binary file not shown.
Binary file added tests/data/levircd/levircdplus/LEVIR-CD+.zip
Binary file not shown.
61 changes: 61 additions & 0 deletions tests/data/levircd/levircdplus/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil

import numpy as np
from PIL import Image

np.random.seed(0)


def create_image(path: str) -> None:
isaaccorley marked this conversation as resolved.
Show resolved Hide resolved
Z = np.random.randint(255, size=(1, 1, 3), dtype=np.uint8)
img = Image.fromarray(Z).convert("RGB")
img.save(path)


def create_mask(path: str) -> None:
Z = np.random.randint(2, size=(1, 1, 3), dtype=np.uint8) * 255
img = Image.fromarray(Z).convert("L")
img.save(path)


if __name__ == "__main__":
root = "LEVIR-CD+"
splits = ["train", "test"]
directories = ["A", "B", "label"]

if os.path.exists(root):
shutil.rmtree(root)

for split in splits:
for directory in directories:
os.makedirs(os.path.join(root, split, directory))

for i in range(2):
folder = os.path.join(root, split, "A")
path = os.path.join(folder, f"0{i}.png")
create_image(path)

folder = os.path.join(root, split, "B")
path = os.path.join(folder, f"0{i}.png")
create_image(path)

folder = os.path.join(root, split, "label")
path = os.path.join(folder, f"0{i}.png")
create_mask(path)

# Compress data
shutil.make_archive(root, "zip", ".", root)

# compute checksum
with open(f"{root}.zip", "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
print(f"{root}.zip: {md5}")

shutil.rmtree(root)
127 changes: 118 additions & 9 deletions tests/datamodules/test_levircd.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file will eventually be removed once we have a trainer for CD

Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,61 @@
from pathlib import Path

import pytest
import torchvision.transforms.functional as F
from lightning.pytorch import Trainer
from pytest import MonkeyPatch
from torch import Tensor
from torchvision.transforms import InterpolationMode

import torchgeo.datasets.utils
from torchgeo.datamodules import LEVIRCDPlusDataModule
from torchgeo.datasets import LEVIRCDPlus
from torchgeo.datamodules import LEVIRCDDataModule, LEVIRCDPlusDataModule
from torchgeo.datasets import LEVIRCD, LEVIRCDPlus


def download_url(url: str, root: str, *args: str) -> None:
shutil.copy(url, root)


def transforms(sample: dict[str, Tensor]) -> dict[str, Tensor]:
sample["image1"] = F.resize(
sample["image1"],
size=[1024, 1024],
antialias=True,
interpolation=InterpolationMode.BILINEAR,
)
sample["image2"] = F.resize(
sample["image2"],
size=[1024, 1024],
antialias=True,
interpolation=InterpolationMode.BILINEAR,
)
sample["mask"] = F.resize(
sample["mask"].unsqueeze(dim=0),
size=[1024, 1024],
interpolation=InterpolationMode.NEAREST,
)
return sample


class TestLEVIRCDPlusDataModule:
@pytest.fixture
def datamodule(
self, monkeypatch: MonkeyPatch, tmp_path: Path
) -> LEVIRCDPlusDataModule:
monkeypatch.setattr(torchgeo.datasets.utils, "download_url", download_url)
md5 = "1adf156f628aa32fb2e8fe6cada16c04"
md5 = "0ccca34310bfe7096dadfbf05b0d180f"
monkeypatch.setattr(LEVIRCDPlus, "md5", md5)
url = os.path.join("tests", "data", "levircd", "LEVIR-CD+.zip")
url = os.path.join("tests", "data", "levircd", "levircdplus", "LEVIR-CD+.zip")
monkeypatch.setattr(LEVIRCDPlus, "url", url)

root = str(tmp_path)
dm = LEVIRCDPlusDataModule(
root=root, download=True, num_workers=0, checksum=True, val_split_pct=0.5
root=root,
download=True,
num_workers=0,
checksum=True,
val_split_pct=0.5,
transforms=transforms,
)
dm.prepare_data()
dm.trainer = Trainer(accelerator="cpu", max_epochs=1)
Expand All @@ -57,10 +86,14 @@ def test_val_dataloader(self, datamodule: LEVIRCDPlusDataModule) -> None:
batch = next(iter(datamodule.val_dataloader()))
batch = datamodule.on_after_batch_transfer(batch, 0)
if datamodule.val_split_pct > 0.0:
assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
assert batch["image1"].shape[0] == batch["mask"].shape[0] == 8
assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
assert batch["image2"].shape[0] == batch["mask"].shape[0] == 8
assert (
batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
)
assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
assert (
batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
)
assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image1"].shape[1] == 3
assert batch["image2"].shape[1] == 3

Expand All @@ -70,9 +103,85 @@ def test_test_dataloader(self, datamodule: LEVIRCDPlusDataModule) -> None:
datamodule.trainer.testing = True
batch = next(iter(datamodule.test_dataloader()))
batch = datamodule.on_after_batch_transfer(batch, 0)
assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image1"].shape[1] == 3
assert batch["image2"].shape[1] == 3


class TestLEVIRCDDataModule:
@pytest.fixture
def datamodule(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> LEVIRCDDataModule:
directory = os.path.join("tests", "data", "levircd", "levircd")
splits = {
"train": {
"url": os.path.join(directory, "train.zip"),
"filename": "train.zip",
"md5": "7c2e24b3072095519f1be7eb01fae4ff",
},
"val": {
"url": os.path.join(directory, "val.zip"),
"filename": "val.zip",
"md5": "5c320223ba88b6fc8ff9d1feebc3b84e",
},
"test": {
"url": os.path.join(directory, "test.zip"),
"filename": "test.zip",
"md5": "021db72d4486726d6a0702563a617b32",
},
}
monkeypatch.setattr(torchgeo.datasets.utils, "download_url", download_url)
monkeypatch.setattr(LEVIRCD, "splits", splits)

root = str(tmp_path)
dm = LEVIRCDDataModule(
root=root,
download=True,
num_workers=0,
checksum=True,
transforms=transforms,
)
dm.prepare_data()
dm.trainer = Trainer(accelerator="cpu", max_epochs=1)
return dm

def test_train_dataloader(self, datamodule: LEVIRCDDataModule) -> None:
datamodule.setup("fit")
if datamodule.trainer:
datamodule.trainer.training = True
batch = next(iter(datamodule.train_dataloader()))
batch = datamodule.on_after_batch_transfer(batch, 0)
assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
assert batch["image1"].shape[0] == batch["mask"].shape[0] == 8
assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (256, 256)
assert batch["image2"].shape[0] == batch["mask"].shape[0] == 8
assert batch["image1"].shape[1] == 3
assert batch["image2"].shape[1] == 3

def test_val_dataloader(self, datamodule: LEVIRCDDataModule) -> None:
datamodule.setup("validate")
if datamodule.trainer:
datamodule.trainer.validating = True
batch = next(iter(datamodule.val_dataloader()))
batch = datamodule.on_after_batch_transfer(batch, 0)
assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image1"].shape[1] == 3
assert batch["image2"].shape[1] == 3

def test_test_dataloader(self, datamodule: LEVIRCDDataModule) -> None:
datamodule.setup("test")
if datamodule.trainer:
datamodule.trainer.testing = True
batch = next(iter(datamodule.test_dataloader()))
batch = datamodule.on_after_batch_transfer(batch, 0)
assert batch["image1"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
assert batch["image1"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image2"].shape[-2:] == batch["mask"].shape[-2:] == (1024, 1024)
assert batch["image2"].shape[0] == batch["mask"].shape[0] == 1
assert batch["image1"].shape[1] == 3
assert batch["image2"].shape[1] == 3
Loading