-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasetGenerator.py
58 lines (40 loc) · 1.21 KB
/
datasetGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from random import Random, shuffle
import json
import math
REGIONS = 4143
SEED = 42
with open('data.json', 'r') as dataFile:
data = json.load(dataFile)
buff = list(data.items())
#print(data)
Random(SEED).shuffle(buff)
trainData = []
valData = []
testData = []
idx = 0
trainDataSize = math.ceil(0.7 * REGIONS)
valDataSize = math.ceil(0.15 * REGIONS)
trainCount = 0
valCount = 0
while trainCount < trainDataSize:
trainData.append(buff[idx])
trainCount += len(buff[idx][1]['regions'])
idx += 1
while valCount < valDataSize:
valData.append(buff[idx])
valCount += len(buff[idx][1]['regions'])
idx += 1
testData = buff[idx:]
print(len(trainData))
print(len(valData))
print(len(testData))
trainData = dict(trainData)
valData = dict(valData)
testData = dict(testData)
print(trainData)
with open('train_via_region_data.json', 'w') as trainFile:
json.dump(trainData, trainFile)
with open('val_via_region_data.json', 'w') as valFile:
json.dump(valData, valFile)
with open('test_via_region_data.json', 'w') as testFile:
json.dump(testData, testFile)