Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
54020e2
Update README.md
LashaO May 4, 2023
c06b62e
renames to miew_id
LashaO May 4, 2023
79ca1e5
updates naming convention
LashaO May 4, 2023
147efb4
adds gradcam for plugin inference
LashaO May 15, 2023
45044fe
fixes gradcam display order
LashaO May 17, 2023
4e826f6
fixes gradcam for varying image sizes
LashaO May 17, 2023
cd1cb8b
adds gradcam support for images with bboxes
LashaO May 18, 2023
1f92ca2
adds batched gradcam
LashaO May 25, 2023
a849076
Merge pull request #1 from WildMeOrg/add-gradcam-batched
LashaO May 25, 2023
e9170a1
add pilot and bottlenose whales
holmbergius May 26, 2023
fa4b579
Adds data preprocessing printouts
LashaO May 27, 2023
4554d55
fixes formatting
LashaO May 27, 2023
4e92755
FIX label definitions
holmbergius May 28, 2023
bab6619
Merge remote-tracking branch 'origin/main'
holmbergius May 28, 2023
7ec7db2
fixes overlap metrics calculation
LashaO May 28, 2023
a1f40fd
FIX label definitions
holmbergius Jun 1, 2023
57dc9fb
Add lion and cougar MiewId support
holmbergius Jun 13, 2023
e2b5b0a
Add panthera_leo mapping
holmbergius Jun 15, 2023
029ca12
adds best model checkpointing
LashaO Jun 18, 2023
51c6ff9
adds optuna sweep script
LashaO Jun 18, 2023
23ff8a9
adds CLI options to sweep scrip
LashaO Jun 18, 2023
c33cd3b
fixes formatting
LashaO Jun 18, 2023
96d245d
adds optuna to requirements
LashaO Jun 19, 2023
179b064
Add more finned species
holmbergius Jun 21, 2023
4603c4a
Adds final trial run checkpointing
LashaO Jun 21, 2023
5e34f4d
adds gradcam progress messages
LashaO Jun 21, 2023
05d616d
adds viewpoint flip for train
LashaO Jun 22, 2023
6a54247
Cross-application to more whales and dolphins
holmbergius Jun 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
wbia_tbd/data/
wbia_tbd/wandb/
wbia_miew_id/data/
wbia_miew_id/wandb/
*.pyc
wbia_tbd/runs/
wbia_miew_id/runs/
.env
TODO.md
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

# WILDBOOK IA - ID Plugin
# WILDBOOK IA - MIEW-ID Plugin

A plugin for re-identificaiton of wildlife individuals using learned embeddings.
A plugin for matching and interpreting embeddings for wildlife identification.


## Setup
Expand All @@ -19,7 +19,7 @@ WANDB_MODE={'online'/'offline'}
You can create a new line in a code block in markdown by using two spaces at the end of the line followed by a line break. Here's an example:

```
cd wbia_tbd
cd wbia_miew_id
python train.py
```

Expand Down Expand Up @@ -80,4 +80,4 @@ A config file path can be set by:

## Notes

This is an initial commit which includes training, inference and WBIA integration capabilities. Release of additional features is underway.
This is an initial commit which includes training, inference and WBIA integration capabilities. Release of additional features is underway.
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ timm==0.6.12
torch==2.0.0
torchvision==0.15.1
tqdm==4.65.0
python-dotenv=1.0.0
python-dotenv==1.0.0
grad-cam==1.4.6
optuna==3.2.0
4 changes: 4 additions & 0 deletions wbia_miew_id/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from wbia_miew_id import _plugin # NOQA


__version__ = '0.0.0'
283 changes: 188 additions & 95 deletions wbia_tbd/_plugin.py → wbia_miew_id/_plugin.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ data:
engine:
num_workers: 0
train_batch_size: 6
valid_batch_size: 24
valid_batch_size: 12
epochs: 30
seed: 42
device: cuda
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@
import cv2
import torch
from torch.utils.data import Dataset
import numpy as np

class TbdDataset(Dataset):
def __init__(self, csv, images_dir, transforms=None):
class MiewIdDataset(Dataset):
def __init__(self, csv, images_dir, transforms=None, fliplr=False, fliplr_view=[]):

self.csv = csv#.reset_index()
self.augmentations = transforms
self.images_dir = images_dir
self.fliplr = fliplr
self.fliplr_view = fliplr_view

def __len__(self):
return self.csv.shape[0]

def __getitem__(self, index):
row = self.csv.iloc[index]

image_path = os.path.join(self.images_dir, row['file_name'])
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
Expand All @@ -24,5 +27,9 @@ def __getitem__(self, index):
augmented = self.augmentations(image=image)
image = augmented['image']

if self.fliplr:
if row['viewpoint'] in self.fliplr_view:
image = np.fliplr(image)


return {"image": image, "label":torch.tensor(row['name']), "image_idx": self.csv.index[index]}
return {"image": image, "label":torch.tensor(row['name']), "image_idx": self.csv.index[index], "file_path": image_path}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from torch.utils.data import Dataset
import cv2
import numpy as np
import torch


class PluginDataset(Dataset):
Expand Down Expand Up @@ -67,6 +68,6 @@ def __getitem__(self, idx):
image = augmented['image']
# image = self.transform(image.copy())

return image, self.names[idx]
return image, self.names[idx], self.image_paths[idx], torch.Tensor(self.bboxes[idx])


File renamed without changes.
File renamed without changes.
File renamed without changes.
15 changes: 8 additions & 7 deletions wbia_tbd/engine/run_fn.py → wbia_miew_id/engine/run_fn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@

def run_fn(config, model, train_loader, valid_loader, criterion, optimizer, scheduler, device, checkpoint_dir, use_wandb=True):

best_loss = np.inf
best_score = 0
for epoch in range(config.engine.epochs):

train_loss = train_fn(train_loader, model,criterion, optimizer, device,scheduler=scheduler,epoch=epoch, use_wandb=use_wandb)


torch.save(model.state_dict(), f'{checkpoint_dir}/model_{epoch}.bin')

valid_loss = eval_fn(valid_loader, model, device, use_wandb=use_wandb)
valid_score = eval_fn(valid_loader, model, device, use_wandb=use_wandb)

# if valid_loss.avg < best_loss:
# best_loss = valid_loss.avg
# torch.save(model.state_dict(),f'model_{config.model_name}_IMG_SIZE_{config.data.image_size[0]}_{config.engine.loss_module}.bin')
# print('best model found for epoch {}'.format(epoch))
if valid_score > best_score:
best_score = valid_score
torch.save(model.state_dict(), f'{checkpoint_dir}/model_best.bin')
print('best model found for epoch {}'.format(epoch))

return best_score
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
50 changes: 7 additions & 43 deletions wbia_tbd/etl/preprocess.py → wbia_miew_id/etl/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,69 +38,33 @@ def convert_name_to_id(names):
names_id = le.fit_transform(names)
return names_id



def preprocess_data(anno_path, name_keys=['name'], convert_names_to_ids=True, viewpoint_list=None, n_filter_min=None, n_subsample_max=None):

df = load_to_df(anno_path)

print(f'** Loaded {anno_path} **')
print(' ', f'Found {len(df)} annotations')

df['name'] = df[name_keys].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
df['name_orig'] = df['name'].copy()

if viewpoint_list:
df = filter_viewpoint_df(df, viewpoint_list)
print(' ', len(df), 'annotations remain after filtering by viewpoint list', viewpoint_list)

if n_filter_min:
df = filter_min_names_df(df, n_filter_min)
print(' ', len(df), 'annotations remain after filtering by min', n_filter_min)

if n_subsample_max:
df = subsample_max_df(df, n_subsample_max)
print(' ', len(df), 'annotations remain after subsampling by max', n_subsample_max)

if convert_names_to_ids:
names = df['name'].values
names_id = convert_name_to_id(names)
df['name'] = names_id
return df


# def make_dataframes():
# DATA_DIR = "data/beluga-coco-v0-full"
# IMAGES_DIR = "data/beluga-440"

# # anno_dir = os.path.join(DATA_DIR, "annotations")
# anno_dir = os.path.join(DATA_DIR, "")
# anno_file = lambda split: f"instances_{split}2023.json"

# train_anno_path = os.path.join(anno_dir, anno_file("train"))
# val_anno_path = os.path.join(anno_dir, anno_file("val"))
# test_anno_path = os.path.join(anno_dir, anno_file("test"))

# df_train = load_to_df(train_anno_path)
# df_val = load_to_df(val_anno_path)

# df_train = df_train[df_train['viewpoint']=='up']
# df_val = df_val[df_val['viewpoint']=='up']

# ## NOTE have to safely handle this case
# df_train['name'] = df_train['name'].astype(int)
# df_val['name'] = df_val['name'].astype(int)

# df_train = df_train.groupby('name').filter(lambda g: len(g)>=4)
# df_val = df_val.groupby('name').filter(lambda g: len(g)>=2)

# # df_train.groupby('name')['name'].count().hist()
# # df_val.groupby('name')['name'].count().hist()

# le = LabelEncoder()
# df_train['name'] = le.fit_transform(df_train['name'])
# print('generated {n_train_classes} labels for the training set'.format(n_train_classes=df_train['name'].nunique()))
# # print(df_train['name'].max(), df_train['name'].nunique())

# ## NOTE column filtering can be done earlier to save memory for merge
# # df_train = df_train['name', 'file_name', 'viewpoint']
# # df_val = df_val['name', 'file_name', 'viewpoint']


# return df_train, df_val

if __name__ == "__main__":
pass
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class Config(DictableClass):


def get_config(file_path: str) -> Config:
print(f"Loading config from path: {file_path}")
with open(file_path, 'r') as file:
config_dict = yaml.safe_load(file)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# import torch
# from models import TbdNet
# from datasets import TbdDataset
# from models import MiewIdNet
# from datasets import MiewIdDataset

# def get_model(cfg, checkpoint_path=None, use_gpu=True):

# model = TbdNet(**dict(cfg.model_params))
# model = MiewIdNet(**dict(cfg.model_params))


# if use_gpu:
Expand All @@ -18,7 +18,7 @@
# return model

# def get_dataloader(df_data, images_dir, cfg, transforms, shuffle=True):
# dataset = TbdDataset(
# dataset = MiewIdDataset(
# csv=df_data,
# images_dir = images_dir,
# transforms=transforms,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions wbia_tbd/models/model.py → wbia_miew_id/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __repr__(self):



class TbdNet(nn.Module):
class MiewIdNet(nn.Module):

def __init__(self,
n_classes,
Expand All @@ -67,7 +67,7 @@ def __init__(self,
:param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling')
:param loss_module: One of ('arcface', 'cosface', 'softmax')
"""
super(TbdNet, self).__init__()
super(MiewIdNet, self).__init__()
print('Building Model Backbone for {} model'.format(model_name))

self.backbone = timm.create_model(model_name, pretrained=pretrained)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import torch
import sys
# sys.path.append('..'); from wbia_pie_v2.models import TbdNet
# from datasets import TbdDataset
from .model import TbdNet
# sys.path.append('..'); from wbia_pie_v2.models import MiewIdNet
# from datasets import MiewIdDataset
from .model import MiewIdNet

def get_model(cfg, checkpoint_path=None, use_gpu=True):

model = TbdNet(**dict(cfg.model_params))
model = MiewIdNet(**dict(cfg.model_params))


if use_gpu:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import warnings
from torch.optim.lr_scheduler import _LRScheduler

class TbdScheduler(_LRScheduler):
class MiewIdScheduler(_LRScheduler):
def __init__(self, optimizer, lr_start=5e-6, lr_max=1e-5,
lr_min=1e-6, lr_ramp_ep=5, lr_sus_ep=0, lr_decay=0.8,
last_epoch=-1):
Expand All @@ -11,7 +11,7 @@ def __init__(self, optimizer, lr_start=5e-6, lr_max=1e-5,
self.lr_ramp_ep = lr_ramp_ep
self.lr_sus_ep = lr_sus_ep
self.lr_decay = lr_decay
super(TbdScheduler, self).__init__(optimizer, last_epoch)
super(MiewIdScheduler, self).__init__(optimizer, last_epoch)

def get_lr(self):
if not self._get_lr_called_within_step:
Expand Down
75 changes: 75 additions & 0 deletions wbia_miew_id/sweep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import optuna
import yaml
from train import run
from helpers import get_config
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
import pickle


import argparse


def parse_args():
parser = argparse.ArgumentParser(description="Load configuration file.")
parser.add_argument(
"--config",
type=str,
default="configs/default_config.yaml",
help="Path to the YAML configuration file. Default: configs/default_config.yaml",
)
return parser.parse_args()


def objective(trial, config):

# Specify the parameters you want to optimize
config.data.train_n_filter_min = trial.suggest_int("train_n_filter_min", 2, 5)
image_size = trial.suggest_categorical("image_size", [192, 256, 384, 440, 512])
config.data.image_size = [image_size, image_size]
n_epochs = trial.suggest_int("epochs", 20, 40)
config.engine.epochs = n_epochs
config.model_params.margin = trial.suggest_uniform("margin", 0.1, 0.7)
config.model_params.s = trial.suggest_uniform("s", 20, 64)

# The scheduler params are derived from one base paremeter to minimize the number of parameters to optimzie
lr_base = trial.suggest_loguniform("lr_base", 1e-6, 1e-2)
config.scheduler_params.lr_start = lr_base
config.scheduler_params.lr_max = lr_base * 10
config.scheduler_params.lr_min = lr_base / 2
result = run(config)

print("cfg", config.engine)

return result


if __name__ == "__main__":
# args = parse_args()
config_path = "configs/default_config.yaml" # args.config

config = get_config(config_path)

study = optuna.create_study(
sampler=TPESampler(), pruner=MedianPruner(), direction="maximize"
)

comb_objective = lambda trial: objective(trial, config)

study.optimize(comb_objective, n_trials=100)

print("Best trial:")
trial_ = study.best_trial

print(f"Value: {trial_.value}")

print("Best parameters:")
for key, value in trial_.params.items():
print(f" {key}: {value}")

# saves best parameters
save_dict = trial_.params
save_dict['best_score'] = trial_.value

with open('sweep.pkl', 'wb') as f:
pickle.dump(save_dict, f, pickle.HIGHEST_PROTOCOL)
Loading