-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataloader.py
34 lines (28 loc) · 1.27 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from sampler import Sampler
from partitioner import Partitioner
class DataLoader:
def __init__(self, dataset, estimator, sample, sprobability, ssize, partition, pnodes):
self.dataset = dataset
self.estimator = estimator
self.sample = sample
self.sprobability = sprobability
self.ssize = ssize
self.partition = partition
self.pnodes = pnodes
def get_data(self):
# sample a sub-dataset from the raw dataset
sampler = Sampler(self.dataset, self.sample, self.sprobability, self.ssize)
if self.estimator == 0:
sampled_data, sprobability = sampler.cardinality_sample()
elif self.estimator == 1:
sampled_data, sprobability = sampler.frequency_sample()
elif self.estimator == 2:
sampled_data, sprobability = sampler.persistency_sample()
partitioner = Partitioner(sampled_data, self.partition, self.pnodes)
if self.estimator == 0:
partitioned_data = partitioner.cardinality_partition()
elif self.estimator == 1:
partitioned_data = partitioner.frequency_partition()
elif self.estimator == 2:
partitioned_data = partitioner.persistency_partition()
return partitioned_data, sprobability