-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.py
98 lines (86 loc) · 4.56 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Generic imports
import pandas as pd
import numpy as np
from PIL import Image
import os
import pdb
import random
import torch
import csv
import nltk
from collections import defaultdict
# Torch imports
from torchvision import transforms
from torch.utils.data.dataset import Dataset # For custom datasets
from utils import readLangs, indexFromSentence
class CustomDatasetFromImages(Dataset):
def __init__(self, csv_path, data_dir='/data/sachelar/fundus_images'):
"""
Args:
csv_path (string): path to csv file
img_path (string): path to the folder where images are
transform: pytorch transforms for transforms and tensor conversion
"""
self.label2idx1 = {'melanoma':0, 'glaucoma':1, 'amd':2, 'diabetic retinopathy':3, 'normal':4}
# self.label2idx1 = {'not applicable':0, 'not classified':1, 'diabetes no retinopathy':2}
# 541 classes
# self.label2idx2 = {j.strip().lower(): (int(i.strip().lower()) -1) for i, j in list(csv.reader(open('labels.txt', 'r'), delimiter='\t'))}
self.label2idx2 = {j.strip().lower(): (int(i.strip().lower()) - 1) for
i, j in list(csv.reader(open('/home/sachelar/multitask-eye-disease-recognition/labels2.txt', 'r'), delimiter='\t'))}
self.to_tensor = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
self.data_info = pd.read_csv(csv_path, header=None)
# change to -4?
self.image_arr = np.asarray([os.path.join(data_dir,i.split('/')[-1].replace('%','')) for i in self.data_info.iloc[:,0]])
self.label_arr1 = [self.label2idx1[i.lower()] for i in np.asarray(self.data_info.iloc[:, 1])]
self.label_arr2 = []
self.lang, self.pairs = readLangs(self.data_info.iloc[:, 2], 15)
for i,z in enumerate(np.asarray(self.data_info.iloc[:, 2])):
self.label_arr2.append(self.label2idx2[z.strip().lower()])
# self.label_arr2 = [self.label2idx2[i] for i in np.asarray(self.data_info.iloc[:, -1])]
# self.operation_arr = np.asarray(self.data_info.iloc[:, 2])
self.data_len = len(self.data_info.index)
def get_lang(self):
return self.lang
def __getitem__(self, index):
single_image_name = self.image_arr[index]
img_as_img = Image.open(single_image_name).convert('RGB')
img_as_tensor = self.to_tensor(img_as_img)
single_image_label = self.label_arr1[index]
fine_grained_label = self.label_arr2[index]
text, length = indexFromSentence(self.lang, self.data_info.iloc[index, 2])
text = torch.LongTensor(text).view(-1, 1)
return (single_image_name, img_as_tensor, single_image_label, fine_grained_label, text)
def __len__(self):
return self.data_len
class GradedDatasetFromImages(Dataset):
def __init__(self, csv_path, data_dir='/data/sachelar/fundus_images'):
"""
Args:
csv_path (string): path to csv file
img_path (string): path to the folder where images are
transform: pytorch transforms for transforms and tensor conversion
"""
self.label2idx1 = {'melanoma':0, 'glaucoma':1, 'amd':2, 'diabetic retinopathy':3, 'normal':4}
self.to_tensor = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
self.data_info = pd.read_csv(csv_path, header=None)
self.image_arr = np.asarray([os.path.join(data_dir, i.replace('%','')) for i in self.data_info.iloc[:,0]])
self.label_arr1 = [self.label2idx1[i.lower()] for i in np.asarray(self.data_info.iloc[:, 1])]
self.data_len = len(self.data_info.index)
def __getitem__(self, index):
single_image_name = self.image_arr[index]
img_as_img = Image.open(single_image_name).convert('RGB')
img_as_tensor = self.to_tensor(img_as_img)
single_image_label = self.label_arr1[index]
return img_as_tensor, single_image_label
def __len__(self):
return self.data_len