-
Notifications
You must be signed in to change notification settings - Fork 1
/
make_dataset_split.py
79 lines (66 loc) · 2.83 KB
/
make_dataset_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#standard libraries
import os
import glob
#external libraries
import numpy as np
import joblib
# from keras.applications.inception_v3 import InceptionV3 # ,preprocess_input
# from keras.applications.resnet50 import ResNet50
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.inception_v3 import preprocess_input as preprocess_input_inception
from keras.applications.resnet50 import preprocess_input as preprocess_input_resnet
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelBinarizer
exclude_movie = False # SET: True or False
X_inception = []
X_resnet = []
y = []
def get_pca(data):
images = data.mean(axis=3)
# flatten images
images = images.reshape(-1, 299*299)
pca = PCA(n_components=2048)
return pca.fit_transform(images)
def preprocess(imgs, model):
X = []
for img in imgs:
img = np.expand_dims(img, axis=0)
if model == 'inceptionnet': img = preprocess_input_inception(img)
if model == 'resnet': img = preprocess_input_resnet(img)
X.extend(img)
return np.array(X)
print('Exlcuding movie in labels: {}'.format(exclude_movie))
for folder in sorted(glob.glob("./WinEarthPhotosByKeyword/*")): # SPECIFY DIRECTORY
# exlcude movie filter
if exclude_movie and 'Movies3of36PhotosEach' in folder:
continue
print(folder)
filenames = sorted(glob.glob(os.path.join(folder, "*.jpg")))
for filename in filenames:
image_inception = img_to_array(load_img(filename, target_size=(299,299)))
image_resnet= img_to_array(load_img(filename, target_size=(224,224)))
X_inception.append(image_inception)
X_resnet.append(image_resnet)
y.append(os.path.basename(folder))
# preprocess imgs for finetuning InceptionNet and ResNet
X_inception = preprocess(X_inception, 'inceptionnet')
X_resnet = preprocess(X_inception, 'resnet')
# initilize models
# inception_model = InceptionV3(include_top=False, pooling='avg')
# resnet_model = ResNet50(include_top=False, pooling='avg')
# generate different image representation
# inception_reps = inception_model.predict(np.array(X_inception))
# resnet_reps = resnet_model.predict(np.array(X_resnet))
# pca_reps = get_pca(np.array(X_inception))
# one hot encoding
encoder = LabelBinarizer()
one_hot_encoded = encoder.fit_transform(y)
# save the representations to disk
path = 'without_movie/' if exclude_movie else 'with_movie/'
os.makedirs(path, exist_ok=True)
# joblib.dump(pca_reps, path + 'pca_representation.joblib')
# joblib.dump((inception_reps), path +"inception_representations.joblib")
# joblib.dump((resnet_reps), path + "resnet_representations.joblib")
joblib.dump((X_inception), path +"inception_preprocessed.joblib")
joblib.dump((X_resnet), path +"resnet_preprocessed.joblib")
joblib.dump((one_hot_encoded, y, encoder), path + "labels.joblib")