-
Notifications
You must be signed in to change notification settings - Fork 7
/
dataset.py
67 lines (53 loc) · 1.74 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Here, we create a custom dataset
"""
import torch
import pickle
from utils.types import PathT
from torch.utils.data import Dataset
from typing import Any, Tuple, Dict, List
class MyDataset(Dataset):
"""
Custom dataset template. Implement the empty functions.
"""
def __init__(self, path: PathT) -> None:
# Set variables
self.path = path
# Load features
self.features = self._get_features()
# Create list of entries
self.entries = self._get_entries()
def __getitem__(self, index: int) -> Tuple:
return self.entries[index]['x'], self.entries[index]['y']
def __len__(self) -> int:
"""
:return: the length of the dataset (number of sample).
"""
return len(self.entries)
def _get_features(self) -> Any:
"""
Load all features into a structure (not necessarily dictionary). Think if you need/can load all the features
into the memory.
:return:
:rtype:
"""
with open(self.path, "rb") as features_file:
features = pickle.load(features_file)
return features
def _get_entries(self) -> List:
"""
This function create a list of all the entries. We will use it later in __getitem__
:return: list of samples
"""
entries = []
for idx, item in self.features.items():
entries.append(self._get_entry(item))
return entries
@staticmethod
def _get_entry(item: Dict) -> Dict:
"""
:item: item from the data. In this example, {'input': Tensor, 'y': int}
"""
x = item['input']
y = torch.Tensor([1, 0]) if item['label'] else torch.Tensor([0, 1])
return {'x': x, 'y': y}