diff --git a/config/yoeo-rev-7-anchor.cfg b/config/yoeo-rev-7-anchor.cfg new file mode 100644 index 0000000..d7697cf --- /dev/null +++ b/config/yoeo-rev-7-anchor.cfg @@ -0,0 +1,337 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +#### +# Like YOEO rev 2 but with deeper skip connections +#### + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=39 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 7, 11, 15,24, 25, 52, 48,65, 90,132, 88,237 +classes=8 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=39 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 7, 11, 15,24, 25, 52, 48,65, 90,132, 88,237 +classes=8 +classes=8 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, 10 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, 2 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, 0 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=6 +size=1 +stride=1 +pad=1 +activation=leaky + +[seg] +classes=5 diff --git a/config/yoeo-rev-7.cfg b/config/yoeo-rev-7.cfg index 78e32d8..699d18c 100644 --- a/config/yoeo-rev-7.cfg +++ b/config/yoeo-rev-7.cfg @@ -19,8 +19,8 @@ learning_rate=0.001 burn_in=100 max_batches = 4000 policy=steps -steps=50000,60000 -scales=.1,.1 +steps=15000,30000 +scales=.2,.1 #### # Like YOEO rev 2 but with deeper skip connections @@ -213,15 +213,13 @@ activation=leaky size=1 stride=1 pad=1 -filters=24 +filters=39 activation=linear - - [yolo] mask = 3,4,5 anchors = 17, 32, 13,171, 37, 67, 30,224, 69,112, 116,212 -classes=3 +classes=8 num=6 jitter=.3 ignore_thresh = .7 @@ -257,13 +255,13 @@ activation=leaky size=1 stride=1 pad=1 -filters=24 +filters=39 activation=linear [yolo] mask = 0,1,2 anchors = 17, 32, 13,171, 37, 67, 30,224, 69,112, 116,212 -classes=3 +classes=8 num=6 jitter=.3 ignore_thresh = .7 @@ -328,11 +326,11 @@ activation=leaky [convolutional] batch_normalize=1 -filters=3 +filters=6 size=1 stride=1 pad=1 activation=leaky [seg] -classes=2 +classes=5 diff --git a/yoeo/detect.py b/yoeo/detect.py index 98ff357..d6fcb29 100755 --- a/yoeo/detect.py +++ b/yoeo/detect.py @@ -205,19 +205,14 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa ax.imshow( - SegmentationMapsOnImage( - seg[ - int(pad_y) : int(img_size - pad_y), - int(pad_x) : int(img_size - pad_x), - ], shape=img.shape).draw_on_image(img)[0]) + SegmentationMapsOnImage(seg, shape=img.shape).draw_on_image(img, alpha=0.5)[0]) # Rescale boxes to original image detections = rescale_boxes(detections, img_size, img.shape[:2]) unique_labels = detections[:, -1].cpu().unique() n_cls_preds = len(unique_labels) # Bounding-box colors cmap = plt.get_cmap("tab20b") - colors = [cmap(i) for i in np.linspace(0, 1, n_cls_preds)] - bbox_colors = random.sample(colors, n_cls_preds) + colors = [cmap(i) for i in np.linspace(0, 1, len(classes))] for x1, y1, x2, y2, conf, cls_pred in detections: print(f"\t+ Label: {classes[int(cls_pred)]} | Confidence: {conf.item():0.4f}") @@ -225,9 +220,8 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa box_w = x2 - x1 box_h = y2 - y1 - color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] # Create a Rectangle patch - bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none") + bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=1, edgecolor=colors[int(cls_pred)], facecolor="none") # Add the bbox to the plot ax.add_patch(bbox) # Add label @@ -237,7 +231,7 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa s=classes[int(cls_pred)], color="white", verticalalignment="top", - bbox={"color": color, "pad": 0}) + bbox={"color": colors[int(cls_pred)], "pad": 0}) # Save generated image with detections plt.axis("off") @@ -245,7 +239,7 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa plt.gca().yaxis.set_major_locator(NullLocator()) filename = os.path.basename(image_path).split(".")[0] output_path_1 = os.path.join(output_path, f"{filename}.png") - plt.savefig(output_path_1, bbox_inches="tight", pad_inches=0.0) + plt.savefig(output_path_1, bbox_inches="tight", pad_inches=0.5) plt.close() diff --git a/yoeo/train.py b/yoeo/train.py index 665d4e5..6301293 100755 --- a/yoeo/train.py +++ b/yoeo/train.py @@ -48,7 +48,7 @@ def _create_data_loader(img_path, batch_size, img_size, n_cpu, multiscale_traini img_path, img_size=img_size, multiscale=multiscale_training, - transform=DEFAULT_TRANSFORMS) + transform=AUGMENTATION_TRANSFORMS) dataloader = DataLoader( dataset, batch_size=batch_size, diff --git a/yoeo/utils/augmentations.py b/yoeo/utils/augmentations.py index c29df04..000e84a 100644 --- a/yoeo/utils/augmentations.py +++ b/yoeo/utils/augmentations.py @@ -1,6 +1,6 @@ import imgaug.augmenters as iaa from torchvision import transforms -from yoeo.utils.transforms import ToTensor, PadSquare, RelativeLabels, AbsoluteLabels, ImgAug +from yoeo.utils.transforms import ToTensor, PadSquare, RelativeLabels, AbsoluteLabels, ImgAug, ResizeToSquare class DefaultAug(ImgAug): @@ -29,7 +29,7 @@ def __init__(self, ): AUGMENTATION_TRANSFORMS = transforms.Compose([ AbsoluteLabels(), DefaultAug(), - PadSquare(), + ResizeToSquare(), RelativeLabels(), ToTensor(), ]) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 4e3634d..5241001 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -6,6 +6,10 @@ import os import warnings import numpy as np +import json +from tqdm import tqdm +from collections import defaultdict +from pathlib import Path from PIL import Image from PIL import ImageFile @@ -57,28 +61,32 @@ def __len__(self): class ListDataset(Dataset): - def __init__(self, list_path, img_size=416, multiscale=True, transform=None): - with open(list_path, "r") as file: - self.img_files = file.readlines() - - self.label_files = [] - for path in self.img_files: - image_dir = os.path.dirname(path) - label_dir = "labels".join(image_dir.rsplit("images", 1)) - assert label_dir != image_dir, \ - f"Image path must contain a folder named 'images'! \n'{image_dir}'" - label_file = os.path.join(label_dir, os.path.basename(path)) - label_file = os.path.splitext(label_file)[0] + '.txt' - self.label_files.append(label_file) + def __init__(self, data_path, img_size=416, multiscale=True, transform=None): + + # Get all color images for e.g. the test set + self.img_files = [str(path) for path in Path(data_path).rglob("*.png")] + + self.annotations = defaultdict(list) + for dset in ['train', 'val']: + with open(os.path.abspath(os.path.join(data_path, "../../", f"annotations/instancesonly_filtered_gtFine_{dset}.json")), "r") as f: + annotation_file = json.load(f) + for annotation in tqdm(annotation_file["annotations"]): + img_id = annotation["image_id"] + category_id = annotation["category_id"] + bbox = annotation["bbox"] + self.annotations[ + os.path.basename( + list(filter( + lambda x:x["id"]==img_id, + annotation_file["images"]))[0]["file_name"]) + ].append((img_id, category_id, bbox)) self.mask_files = [] for path in self.img_files: - image_dir = os.path.dirname(path) - mask_dir = "segmentations".join(image_dir.rsplit("images", 1)) - assert mask_dir != image_dir, \ - f"Image path must contain a folder named 'images'! \n'{image_dir}'" + path = str(path).replace("leftImg8bit", "gtFine") + mask_dir = os.path.dirname(path) mask_file = os.path.join(mask_dir, os.path.basename(path)) - mask_file = os.path.splitext(mask_file)[0] + '.png' + mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png' self.mask_files.append(mask_file) self.img_size = img_size @@ -106,12 +114,19 @@ def __getitem__(self, index): # Label # --------- try: - label_path = self.label_files[index % len(self.img_files)].rstrip() - - # Ignore warning if file is empty - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - boxes = np.loadtxt(label_path).reshape(-1, 5) + labels = self.annotations[os.path.basename(img_path)] + + boxes = np.zeros((len(labels), 5)) + + for idx, label in enumerate(labels): + # label_idx x_center y_center width height + boxes[idx] = np.array([ + label[1] - 1, + label[2][0] / img.shape[1] + label[2][2] / img.shape[1] / 2, + label[2][1] / img.shape[0] + label[2][3] / img.shape[0] / 2, + label[2][2] / img.shape[1], + label[2][3] / img.shape[0] + ]) except Exception: print(f"Could not read label '{label_path}'.") return @@ -122,7 +137,15 @@ def __getitem__(self, index): try: mask_path = self.mask_files[index % len(self.img_files)].rstrip() # Load segmentation mask as numpy array - mask = np.array(Image.open(mask_path).convert('RGB')) // 127 + mask = np.array(Image.open(mask_path).convert('RGB')) + # Group classes together + mask[mask <= 6] = 0 + mask[np.logical_and(mask > 6, mask <= 10)] = 1 + mask[np.logical_and(mask > 10, mask <= 16)] = 2 + mask[np.logical_and(mask > 16, mask <= 20)] = 3 + mask[np.logical_and(mask > 20, mask <= 22)] = 4 + mask[mask == 23] = 5 + mask[mask > 23] = 0 except FileNotFoundError as e: print(f"Could not load mask '{mask_path}'.") return diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py index 63765f5..6f7aa72 100644 --- a/yoeo/utils/loss.py +++ b/yoeo/utils/loss.py @@ -106,7 +106,7 @@ def compute_loss(combined_predictions, combined_targets, model): # Classification of the objectness # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position - tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets + tobj[b, anchor, grid_j, grid_i] = 1#iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets # Classification of the class # Check if we need to do a classification (number of classes > 1) diff --git a/yoeo/utils/transforms.py b/yoeo/utils/transforms.py index 800c457..391db9c 100644 --- a/yoeo/utils/transforms.py +++ b/yoeo/utils/transforms.py @@ -94,6 +94,13 @@ def __init__(self, ): ]) +class ResizeToSquare(ImgAug): + def __init__(self, ): + self.augmentations = iaa.Sequential([ + iaa.Resize(416).to_deterministic() # TODO dynamic resolution + ]) + + class ToTensor(object): def __init__(self, ): pass @@ -102,7 +109,7 @@ def __call__(self, data): img, boxes, seg = data # Extract image as PyTorch tensor img = transforms.ToTensor()(img) - seg = transforms.ToTensor()(seg) * 255 # Because troch maps this to 0-1 instead of 0-255 + seg = transforms.ToTensor()(seg) * 255 # Because torch maps this to 0-1 instead of 0-255 bb_targets = torch.zeros((len(boxes), 6)) bb_targets[:, 1:] = transforms.ToTensor()(boxes) @@ -123,7 +130,7 @@ def __call__(self, data): DEFAULT_TRANSFORMS = transforms.Compose([ AbsoluteLabels(), - PadSquare(), + ResizeToSquare(), RelativeLabels(), ToTensor(), ]) diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py index d178d21..e684a63 100644 --- a/yoeo/utils/utils.py +++ b/yoeo/utils/utils.py @@ -65,19 +65,11 @@ def rescale_boxes(boxes, current_dim, original_shape): """ orig_h, orig_w = original_shape - # The amount of padding that was added - pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) - pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) - - # Image height and width after padding is removed - unpad_h = current_dim - pad_y - unpad_w = current_dim - pad_x - # Rescale bounding boxes to dimension of original image - boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w - boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h - boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w - boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h + boxes[:, 0] = boxes[:, 0] * (orig_w / current_dim) + boxes[:, 1] = boxes[:, 1] * (orig_h / current_dim) + boxes[:, 2] = boxes[:, 2] * (orig_w / current_dim) + boxes[:, 3] = boxes[:, 3] * (orig_h / current_dim) return boxes