Finish up 3DUnet reference implementation with final model

nvpohanh · Jul 3, 2020 · d728971 · d728971
1 parent 6e800eb
commit d728971
Show file tree

Hide file tree

Showing 17 changed files with 701 additions and 79 deletions.
diff --git a/v0.7/medical_imaging/3d-unet/Dockerfile b/v0.7/medical_imaging/3d-unet/Dockerfile
@@ -27,7 +27,13 @@ RUN cd /tmp \
  && rm -rf inference
 
 # Install dependencies
-RUN python3 -m pip install onnx onnxruntime numpy==1.18.0 Pillow==7.0.0
+RUN python3 -m pip install wrapt --upgrade --ignore-installed
+RUN python3 -m pip install onnx numpy==1.18.0 Pillow==7.0.0 tensorflow
+RUN python3 -m pip install tensorflow-addons https://github.com/onnx/onnx-tensorflow/archive/master.zip
+
+# Install onnxruntime
+# TODO: Switch to release version with 3D TransposedConvolution support.
+RUN python3 -m pip install -i https://test.pypi.org/simple/ ort-nightly
 
 # Install nnUnet
 COPY nnUnet /workspace/nnUnet

diff --git a/v0.7/medical_imaging/3d-unet/Makefile b/v0.7/medical_imaging/3d-unet/Makefile
@@ -33,8 +33,12 @@ POSTPROCESSED_DATA_DIR := $(BUILD_DIR)/postprocessed_data
 MODEL_DIR := $(BUILD_DIR)/model
 RESULT_DIR := $(BUILD_DIR)/result
 MLPERF_CONF := $(BUILD_DIR)/mlperf.conf
-PYTORCH_MODEL := $(RESULT_DIR)/fold_4.zip
-ONNX_MODEL := $(MODEL_DIR)/192_224_192.onnx
+PYTORCH_MODEL := $(RESULT_DIR)/fold_1.zip
+ONNX_MODEL := $(MODEL_DIR)/224_224_160.onnx
+ONNX_DYNAMIC_BS_MODEL := $(MODEL_DIR)/224_224_160_dynamic_bs.onnx
+TF_MODEL := $(MODEL_DIR)/224_224_160.pb
+OPENVINO_MODEL := $(MODEL_DIR)/brats_model_checkpoint_final_fold1_H224_W224_D160_C4.bin
+OPENVINO_MODEL_METADATA := $(MODEL_DIR)/brats_model_checkpoint_final_fold1_H224_W224_D160_C4.xml
 
 # Env variables needed by nnUnet
 export nnUNet_raw_data_base=$(RAW_DATA_DIR)
@@ -43,27 +47,20 @@ export RESULTS_FOLDER=$(RESULT_DIR)
 
 HAS_GPU := $(shell command -v nvidia-smi 2> /dev/null)
 
-ifndef $HAS_GPU
-    DOCKER_RUN_CMD := docker run
+ifeq ($(HAS_GPU),)
+	DOCKER_RUN_CMD := docker run
 else
-   # Handle different nvidia-docker version
-   ifneq ($(wildcard /usr/bin/nvidia-docker),)
-	DOCKER_RUN_CMD := nvidia-docker run
-   else
-	DOCKER_RUN_CMD := docker run --gpus=all
-   endif
-
+	# Handle different nvidia-docker version
+	ifneq ($(wildcard /usr/bin/nvidia-docker),)
+		DOCKER_RUN_CMD := nvidia-docker run
+	else
+		DOCKER_RUN_CMD := docker run --gpus=all
+	endif
 endif
 
 .PHONY: setup
-setup: check_download_data_dir
+setup: check_download_data_dir create_directories
 	@echo "Running basic setup..."
-	@if [ ! -e $(BUILD_DIR) ]; then \
-		mkdir $(BUILD_DIR); \
-	fi
-	@if [ ! -e $(RESULT_DIR) ]; then \
-		mkdir $(RESULT_DIR); \
-	fi
 	@if [ ! -e $(MLPERF_CONF) ]; then \
 		cp ../../mlperf.conf $(MLPERF_CONF); \
 	fi
@@ -76,6 +73,18 @@ check_download_data_dir:
 		echo "Please set environment variable DOWNLOAD_DATA_DIR to <path/to/MICCAI_BraTS_2019_Data_Training>" && false ; \
 	fi
 
+.PHONY: create_directories
+create_directories:
+	@if [ ! -e $(BUILD_DIR) ]; then \
+		mkdir $(BUILD_DIR); \
+	fi
+	@if [ ! -e $(MODEL_DIR) ]; then \
+		mkdir $(MODEL_DIR); \
+	fi
+	@if [ ! -e $(RESULT_DIR) ]; then \
+		mkdir $(RESULT_DIR); \
+	fi
+
 .PHONY: init_submodule
 init_submodule:
 	@echo "Initialize nnUnet submodule.."
@@ -86,24 +95,56 @@ download_model:
 	@echo "Download models..."
 	@$(MAKE) -f $(MAKEFILE_NAME) download_pytorch_model
 	@$(MAKE) -f $(MAKEFILE_NAME) download_onnx_model
+	@$(MAKE) -f $(MAKEFILE_NAME) download_tf_model
+	@$(MAKE) -f $(MAKEFILE_NAME) download_openvino_model
 
 .PHONY: download_pytorch_model
-download_pytorch_model:
-	# Will download model from Zenodo
-	# @if [ ! -e $(PYTORCH_MODEL)/model.pytorch ]; then \
-	# 	wget -O ; \
-	# fi
-	# For now, assume that fold_4.zip is in build/result
+download_pytorch_model: create_directories
+	@echo "Downloading PyTorch model from Zenodo..."
 	@if [ ! -e $(PYTORCH_MODEL) ]; then \
-		echo "For now, please manually download PyTorch model to $(PYTORCH_MODEL)/"; \
+		wget -O $(PYTORCH_MODEL) https://zenodo.org/record/3904106/files/fold_1.zip?download=1 \
+		&& cd $(RESULT_DIR) && unzip -o fold_1.zip; \
 	fi
-	@cd $(RESULT_DIR) && unzip -o fold_4.zip
 
 .PHONY: download_onnx_model
-download_onnx_model:
-	# Will download model from Zenodo
+download_onnx_model: create_directories
+	@echo "Downloading ONNX model from Zenodo..."
+	@if [ ! -e $(ONNX_MODEL) ]; then \
+		wget -O $(ONNX_MODEL) https://zenodo.org/record/3928973/files/224_224_160.onnx?download=1; \
+	fi
+	@if [ ! -e $(ONNX_DYNAMIC_BS_MODEL) ]; then \
+		wget -O $(ONNX_DYNAMIC_BS_MODEL) https://zenodo.org/record/3928973/files/224_224_160_dyanmic_bs.onnx?download=1; \
+	fi
+
+.PHONY: download_tf_model
+download_tf_model: create_directories
+	@echo "Downloading TF model from Zenodo..."
+	@if [ ! -e $(TF_MODEL) ]; then \
+		wget -O $(TF_MODEL) https://zenodo.org/record/3928991/files/224_224_160.pb?download=1; \
+	fi
+
+.PHONY: download_openvino_model
+download_openvino_model: create_directories
+	@echo "Downloading OpenVINO model from Zenodo..."
+	@if [ ! -e $(OPENVINO_MODEL) ]; then \
+		wget -O $(OPENVINO_MODEL) https://zenodo.org/record/3929002/files/brats_model_checkpoint_final_fold1_H224_W224_D160_C4.bin?download=1; \
+	fi
+	@if [ ! -e $(OPENVINO_MODEL_METADATA) ]; then \
+		wget -O $(OPENVINO_MODEL_METADATA) https://zenodo.org/record/3929002/files/brats_model_checkpoint_final_fold1_H224_W224_D160_C4.xml?download=1; \
+	fi
+
+.PHONY: convert_onnx_model
+convert_onnx_model: download_pytorch_model
+	@echo "Converting PyTorch model to ONNX model..."
 	@if [ ! -e $(ONNX_MODEL) ]; then \
-		echo "For now, please manually download ONNX model to $(ONNX_MODEL)"; \
+		python3 unet_pytorch_to_onnx.py; \
+	fi
+
+.PHONY: convert_tf_model
+convert_tf_model: convert_onnx_model
+	@echo "Converting ONNX model to TF model..."
+	@if [ ! -e $(TF_MODEL) ]; then \
+		python3 unet_onnx_to_tf.py; \
 	fi
 
 .PHONY: build_docker
@@ -126,7 +167,7 @@ launch_docker: check_download_data_dir
 		--user $(UID):$(GROUPID) --net host --device /dev/fuse --cap-add SYS_ADMIN $(DOCKER_ARGS) mlperf-inference-3d-unet
 
 .PHONY: preprocess_data
-preprocess_data:
+preprocess_data: create_directories
 	@echo "Restructuring raw data to $(RAW_DATA_DIR)..."
 	@if [ ! -e $(RAW_DATA_DIR) ]; then \
 		mkdir $(RAW_DATA_DIR); \
@@ -154,15 +195,23 @@ run_pytorch_accuracy: mkdir_postprocessed_data
 
 .PHONY: run_onnxruntime_performance
 run_onnxruntime_performance:
-	@python3 run.py --backend=onnxruntime
+	@python3 run.py --backend=onnxruntime --model=build/model/224_224_160.onnx
 
 .PHONY: run_onnxruntime_accuracy
 run_onnxruntime_accuracy: mkdir_postprocessed_data
-	@python3 run.py --backend=onnxruntime --accuracy
+	@python3 run.py --backend=onnxruntime --model=build/model/224_224_160.onnx --accuracy
+
+.PHONY: run_tf_performance
+run_tf_performance:
+	@python3 run.py --backend=tf --model=build/model/224_224_160.pb
+
+.PHONY: run_tf_accuracy
+run_tf_accuracy: mkdir_postprocessed_data
+	@python3 run.py --backend=tf --model=build/model/224_224_160.pb --accuracy
 
 .PHONY: evaluate
 evaluate:
-	@python3 brats_eval.py
+	@python3 accuracy-brats.py
 
 .PHONY: clean
 clean:

diff --git a/v0.7/medical_imaging/3d-unet/README.md b/v0.7/medical_imaging/3d-unet/README.md
@@ -1,53 +1,47 @@
 # MLPerf Inference Benchmarks for Medical Image 3D Segmentation
 
-This is the reference implementation for MLPerf Inference benchmarks for Medical Image 3D Segmentation.
-
 The chosen model is 3D-Unet in [nnUnet](https://github.com/MIC-DKFZ/nnUNet) performing [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) brain tumor segmentation task.
 
 ## Prerequisites
 
+If you would like to run on NVIDIA GPU, you will need:
+
 - [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
 - Any NVIDIA GPU supported by TensorFlow or PyTorch
 
 ## Supported Models
 
 | model | framework | accuracy | dataset | model link | model source | precision | notes |
 | ----- | --------- | -------- | ------- | ---------- | ------------ | --------- | ----- |
-| 3D-Unet | PyTorch | mean = 0.82400, whole tumor = 0.8922, tumor core = 0.8158, enhancing tumor = 0.7640 | last 20% of BraTS 2019 Training Dataset (67 samples) | [from zenodo](???) | Trained in PyTorch using codes from [nnUnet](https://github.com/MIC-DKFZ/nnUNet) on the first 80% of BraTS 2019 Training Dataset. | fp32 | |
-| 3D-Unet | ONNX | mean = 0.82400, whole tumor = 0.8922, tumor core = 0.8158, enhancing tumor = 0.7640 | last 20% of BraTS 2019 Training Dataset (67 samples) | [from zenodo](???) | Converted from the PyTorch model using ??? script. | fp32 | |
+| 3D-Unet | PyTorch | **mean = 0.85300** (whole tumor = 0.9141, tumor core = 0.8679, enhancing tumor = 0.7770) | [Fold 1](folds/fold1_validation.txt) of [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) Training Dataset | [from zenodo](https://zenodo.org/record/3904106) | Trained in PyTorch using codes from [nnUnet](https://github.com/MIC-DKFZ/nnUNet) on [Fold 0](folds/fold0_validation.txt), [Fold 2](folds/fold2_validation.txt), [Fold 3](folds/fold3_validation.txt), and [Fold 4](folds/fold4_validation.txt) of [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) Training Dataset. | fp32 | |
+| 3D-Unet | ONNX | **mean = 0.85300** (whole tumor = 0.9141, tumor core = 0.8679, enhancing tumor = 0.7770) | [Fold 1](folds/fold1_validation.txt) of [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) Training Dataset | [from zenodo](https://zenodo.org/record/3928973) | Converted from the PyTorch model using [script](unet_pytorch_to_onnx.py). | fp32 | |
+| 3D-Unet | Tensorflow | **mean = 0.85300** (whole tumor = 0.9141, tumor core = 0.8679, enhancing tumor = 0.7770) | [Fold 1](folds/fold1_validation.txt) of [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) Training Dataset | [from zenodo](https://zenodo.org/record/3928991) | Converted from the ONNX model using [script](unet_onnx_to_tf.py). | fp32 | |
+| 3D-Unet | OpenVINO | **mean = 0.85300** (whole tumor = 0.9141, tumor core = 0.8679, enhancing tumor = 0.7770) | [Fold 1](folds/fold1_validation.txt) of [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) Training Dataset | [from zenodo](https://zenodo.org/record/3929002) | Converted from the ONNX model. | fp32 | |
+
 
 ## Disclaimer
 This benchmark app is a reference implementation that is not meant to be the fastest implementation possible.
 
-## TODO
-
-[ ] Update the models (PyTorch and ONNX) to the final volume size (160, 224, 224).
-[ ] Upload the models to Zenodo, and fill in Zenodo link, and modify Makefile so that it downloads models from Zenodo.
-[ ] Update the accuracy metric.
-[ ] Add PyTorch -> ONNX script.
-[ ] Update the onnxruntime in the docker container to a version which supports 3D ConvTranspose op.
-
 ## Commands
 
 Please download [BraTS 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) separately and unzip the dataset.
 
 Please run the following commands:
 
 - `export DOWNLOAD_DATA_DIR=<path/to/MICCAI_BraTS_2019_Data_Training>`: point to location of downloaded BraTS 2019 Training dataset.
-- **Temporary:** Download the (192, 224, 192) PyTorch model named `fold_4.zip` to `build/result/`.
-- **Temporary:** Download the (192, 224, 192) ONNX model named `192_224_192.onnx` to `build/`.
 - `make setup`: initialize submodule and download models.
 - `make build_docker`: build docker image.
 - `make launch_docker`: launch docker container with an interaction session.
 - `make preprocess_data`: preprocess the BraTS 2019 dataset.
-- `python3 run.py --backend=[pytorch|onnxruntime] --scenario=[Offline|SingleStream|MultiStream|Server] [--accuracy]`: run the harness inside the docker container. Performance or Accuracy results will be printed in console.
+- `python3 run.py --backend=[tf|pytorch|onnxruntime] --scenario=[Offline|SingleStream|MultiStream|Server] [--accuracy] --model=[path/to/model_file(tf/onnx only)]`: run the harness inside the docker container. Performance or Accuracy results will be printed in console.
+- `python3 accuracy-brats.py --log_file=<LOADGEN_LOG> --output_dtype=<DTYPE>`: compute accuracy from a LoadGen accuracy JSON log file. 
 
 ## Details
 
-- SUT implementations are in [pytorch_SUT.py](pytorch_SUT.py) and [onnxruntime_SUT.py](onnxruntime_SUT.py). QSL implementation is in [brats_QSL.py](brats_QSL.py).
-- The script [brats_eval.py](brats_eval.py) parses LoadGen accuracy log, post-processes it, and computes the accuracy.
+- SUT implementations are in [pytorch_SUT.py](pytorch_SUT.py), [onnxruntime_SUT.py](onnxruntime_SUT.py), and [tf_SUT.py](tf_SUT.py). QSL implementation is in [brats_QSL.py](brats_QSL.py).
+- The script [accuracy-brats.py](accuracy-brats.py) parses LoadGen accuracy log, post-processes it, and computes the accuracy.
 - Preprocessing and evaluation (including post-processing) are not included in the timed path.
-- The input to the SUT is a volume of size `[4, 192, 224, 192]`. The output from SUT is a volume of size `[4, 192, 224, 192]` with predicted label logits for each voxel.
+- The input to the SUT is a volume of size `[4, 224, 224, 160]`. The output from SUT is a volume of size `[4, 224, 224, 160]` with predicted label logits for each voxel.
 
 ## License
 

diff --git a/v0.7/medical_imaging/3d-unet/brats_eval.py → ...medical_imaging/3d-unet/accuracy-brats.py b/v0.7/medical_imaging/3d-unet/brats_eval.py → ...medical_imaging/3d-unet/accuracy-brats.py
@@ -84,8 +84,7 @@ def load_loadgen_log(log_file, result_dtype, dictionaries):
 
     assert len(predictions) == len(dictionaries), "Number of predictions does not match number of samples in validation set!"
 
-    # TODO: need to change to [160, 224, 224]
-    padded_shape = [192, 224, 192]
+    padded_shape = [224, 224, 160]
     results = [None for i in range(len(predictions))]
     for prediction in predictions:
         qsl_idx = prediction["qsl_idx"]

diff --git a/v0.7/medical_imaging/3d-unet/fold4_validation.npy b/v0.7/medical_imaging/3d-unet/fold4_validation.npy
diff --git a/v0.7/medical_imaging/3d-unet/folds/fold0_validation.txt b/v0.7/medical_imaging/3d-unet/folds/fold0_validation.txt
@@ -0,0 +1,67 @@
+HGG__BraTS19_2013_22_1
+HGG__BraTS19_2013_23_1
+HGG__BraTS19_2013_3_1
+HGG__BraTS19_2013_5_1
+HGG__BraTS19_2013_7_1
+HGG__BraTS19_CBICA_AAB_1
+HGG__BraTS19_CBICA_AAL_1
+HGG__BraTS19_CBICA_ABN_1
+HGG__BraTS19_CBICA_ALU_1
+HGG__BraTS19_CBICA_AME_1
+HGG__BraTS19_CBICA_ANG_1
+HGG__BraTS19_CBICA_AOC_1
+HGG__BraTS19_CBICA_AOD_1
+HGG__BraTS19_CBICA_APZ_1
+HGG__BraTS19_CBICA_AQD_1
+HGG__BraTS19_CBICA_AQJ_1
+HGG__BraTS19_CBICA_AQN_1
+HGG__BraTS19_CBICA_ASA_1
+HGG__BraTS19_CBICA_ASK_1
+HGG__BraTS19_CBICA_ASO_1
+HGG__BraTS19_CBICA_AWH_1
+HGG__BraTS19_CBICA_AWV_1
+HGG__BraTS19_CBICA_AYA_1
+HGG__BraTS19_CBICA_AYC_1
+HGG__BraTS19_CBICA_AYI_1
+HGG__BraTS19_CBICA_BFB_1
+HGG__BraTS19_CBICA_BGN_1
+HGG__BraTS19_CBICA_BGR_1
+HGG__BraTS19_CBICA_BJY_1
+HGG__BraTS19_TCIA01_231_1
+HGG__BraTS19_TCIA01_378_1
+HGG__BraTS19_TCIA01_390_1
+HGG__BraTS19_TCIA01_412_1
+HGG__BraTS19_TCIA02_135_1
+HGG__BraTS19_TCIA02_179_1
+HGG__BraTS19_TCIA02_208_1
+HGG__BraTS19_TCIA02_274_1
+HGG__BraTS19_TCIA02_314_1
+HGG__BraTS19_TCIA02_430_1
+HGG__BraTS19_TCIA02_608_1
+HGG__BraTS19_TCIA03_121_1
+HGG__BraTS19_TCIA03_138_1
+HGG__BraTS19_TCIA03_375_1
+HGG__BraTS19_TCIA03_498_1
+HGG__BraTS19_TCIA06_184_1
+HGG__BraTS19_TCIA06_372_1
+HGG__BraTS19_TCIA08_113_1
+HGG__BraTS19_TCIA08_162_1
+HGG__BraTS19_TCIA08_218_1
+HGG__BraTS19_TCIA08_469_1
+LGG__BraTS19_2013_6_1
+LGG__BraTS19_TCIA09_141_1
+LGG__BraTS19_TCIA09_255_1
+LGG__BraTS19_TCIA09_402_1
+LGG__BraTS19_TCIA09_451_1
+LGG__BraTS19_TCIA09_462_1
+LGG__BraTS19_TCIA09_620_1
+LGG__BraTS19_TCIA10_266_1
+LGG__BraTS19_TCIA10_413_1
+LGG__BraTS19_TCIA10_628_1
+LGG__BraTS19_TCIA10_629_1
+LGG__BraTS19_TCIA10_640_1
+LGG__BraTS19_TCIA12_298_1
+LGG__BraTS19_TCIA12_470_1
+LGG__BraTS19_TCIA13_621_1
+LGG__BraTS19_TCIA13_624_1
+LGG__BraTS19_TCIA13_654_1