-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluate_IVE.py
217 lines (178 loc) · 7.79 KB
/
evaluate_IVE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import pickle as pk
import os
import numpy as np
import evaluation.load_diveface as load_diveface
import evaluation.load_UTKFace as load_UTKFace
import evaluation.utils as eval_utils
import evaluation.simple_classifier as simple_classifier
import evaluation.verification_performance as vp
import pandas as pd
import random
import evaluation.hyperparameter_finetuning as hyp_ft
def zeros_pca(x_total, mask):
# store indexes where the mask is False.
indexes = [i for i, m in enumerate(mask) if not m]
# The features related to those indexes need to be zero.
for ind in indexes:
x_total[:, ind] = np.zeros((len(x_total)))
return x_total
def fix_pca_masks(masks):
new_masks = {'1': [], '2': [], '3': []}
for i in range(1, 4):
for epoch in range(len(masks[str(i)])):
# first epoch does not need any fix
if len(new_masks[str(i)]) == 0:
new_masks[str(i)].append(masks[str(i)][0])
else:
prev_mask = new_masks[str(i)][-1]
prev_indexes = [i for i, m in enumerate(prev_mask) if not m]
curr_mask = masks[str(i)][epoch]
# fix the indexes removed at the previous iteration in the new mask to store
for ix in prev_indexes:
curr_mask = np.insert(curr_mask, ix, False)
new_masks[str(i)].append(curr_mask)
return new_masks
def execute_evaluation(db, classifiers, transform, seed, blocked_features=0):
# fine-tune classifiers every N_EPOCHS_FT epochs, evaluate the algorithm every N_EPOCHS_EVAL epochs
N_EPOCHS_FT = 25
N_EPOCHS_EVAL = 5
folder = str(seed) + ('_' + transform)
if transform == 'pca' or transform == 'ica':
folder = folder + '_k=' + str(blocked_features)
# load scaler and PCA
if transform == 'pca':
pca = pk.load(open(os.path.join(os.path.join('results', folder), 'pca.pkl'), 'rb'))
if transform == 'ica':
ica = pk.load(open(os.path.join(os.path.join('results', folder), 'ica.pkl'), 'rb'))
std_sc = pk.load(open(os.path.join(os.path.join('results', folder), 'std_sc.pkl'), 'rb'))
# load masks
masks = {'1': [], '2': [], '3': []}
num_epochs = 170
for i in range(1, 4):
for epoch in range(num_epochs):
filename = "{0:03}.npy".format(epoch)
mask = np.load(os.path.join('results', folder, 'method' + str(i), filename))
masks[str(i)].append(mask)
if transform == 'pca' or transform == 'ica':
masks = fix_pca_masks(masks)
# load data for evaluation
if db == 'diveface':
# create
if not os.path.isfile('data/diveface_df.csv'):
diveface_df, length_embeddings = load_diveface.get_diveface_df(r'data\diveface_embeddings', seed,
save_files=True, limit_size=True)
else:
diveface_df = pd.read_csv('data/diveface_df.csv')
length_embeddings = diveface_df.shape[1] - 5
train_indexes, test_indexes = load_diveface.get_sb_train_test_indexes(diveface_df, seed, length_embeddings)
verification_indexes = load_diveface.get_verification_indexes(diveface_df, seed, length_embeddings)
# get data ready to perform evaluation of SB and verification
x_total = load_diveface.get_x_ready(diveface_df, length_embeddings)
labels = eval_utils.get_labels(db)
y = load_diveface.get_y_ready(diveface_df, labels)
elif db == 'utkface':
# create
if not os.path.isfile('data/utkface_df.csv'):
utkface_df, length_embeddings = load_UTKFace.get_utkface_df(r'data\utkface_embeddings', seed,
save_files=True, limit_size=True)
else:
utkface_df = pd.read_csv('data/utkface_df.csv')
length_embeddings = utkface_df.shape[1] - 2
train_indexes, test_indexes = load_UTKFace.get_sb_train_test_indexes(utkface_df, seed, length_embeddings)
# get data ready to perform evaluation of SB and verification
x_total = load_UTKFace.get_x_ready(utkface_df, length_embeddings)
labels = eval_utils.get_labels(db)
y = load_UTKFace.get_y_ready(utkface_df, labels)
x_total = std_sc.transform(x_total)
x_first = np.copy(x_total)
x_second = np.copy(x_total)
x_third = np.copy(x_total)
metrics = eval_utils.get_metric_dict(classifiers, db)
dict_ft_classifiers = {'0': {}, '1': {}, '2': {}}
for epoch in range(num_epochs):
if (epoch % N_EPOCHS_EVAL) == 0:
for ive_method, x in enumerate([x_first, x_second, x_third]):
x_train = x[train_indexes]
x_test = x[test_indexes]
y_train = y[train_indexes]
y_test = y[test_indexes]
scores = []
for i, label in enumerate(labels):
# every n epochs finetune the classifiers
if (epoch % N_EPOCHS_FT) == 0:
all_ft_classifiers = hyp_ft.get_finetuned_classifiers(classifiers, seed, x_train, y_train[:, i])
dict_ft_classifiers[str(ive_method)][label] = all_ft_classifiers
print('Classifiers fine-tuned! Epoch ' + str(epoch) + ', label ' + label)
# get the list of ft-classifiers specific for method and label
ft_classifiers = dict_ft_classifiers[str(ive_method)][label]
# compute the scores of every sb-classifier and store them
score = simple_classifier.train_evaluate_classifier(x_train, y_train[:, i], x_test, y_test[:, i],
classifiers, ft_classifiers, seed)
scores.append(score)
txt = 'Epoch ' + str(epoch) + ' (Method ' + str(ive_method) + ') --> '
for s, l in zip(scores, labels):
# only to have an idea of the trend
txt += l + ': ' + str(s[classifiers[0]]) + '---'
if db == 'diveface' and ('mlp' in classifiers):
eer_verification = vp.evaluate_verification(verification_indexes, x, seed)
else:
eer_verification = 'na'
txt += 'EER: ' + str(eer_verification)
print(txt + ('' if not ive_method == 2 else '\n'))
metrics = eval_utils.store_metrics(metrics, ive_method, scores, eer_verification, db)
if transform == 'pca':
# transform in PCA
x_first = pca.transform(x_first)
x_second = pca.transform(x_second)
x_third = pca.transform(x_third)
# eliminate features in PCA
x_first = zeros_pca(x_first, masks['1'][epoch])
x_second = zeros_pca(x_second, masks['2'][epoch])
x_third = zeros_pca(x_third, masks['3'][epoch])
# transform from PCA to original domain
x_first = pca.inverse_transform(x_first)
x_second = pca.inverse_transform(x_second)
x_third = pca.inverse_transform(x_third)
elif transform == 'ica':
# transform in PCA
x_first = ica.transform(x_first)
x_second = ica.transform(x_second)
x_third = ica.transform(x_third)
# eliminate features in PCA
x_first = zeros_pca(x_first, masks['1'][epoch])
x_second = zeros_pca(x_second, masks['2'][epoch])
x_third = zeros_pca(x_third, masks['3'][epoch])
# transform from PCA to original domain
x_first = ica.inverse_transform(x_first)
x_second = ica.inverse_transform(x_second)
x_third = ica.inverse_transform(x_third)
else:
x_first = x_first[:, masks['1'][epoch]]
x_second = x_second[:, masks['2'][epoch]]
x_third = x_third[:, masks['3'][epoch]]
eval_utils.plot_metrics(metrics, folder, db, save_files=True)
execute_evaluation('diveface', ['mlp'], 'pca', 0, 0)
# for seed in range(10):
# execute_evaluation('utkface', ['mlp'], 'NO_pca', seed)
# for k in [0, 3, 5]:
# execute_evaluation('utkface', ['mlp'], 'pca', seed, k)
# execute_evaluation('utkface', ['mlp'], 'ica', seed)
#
# other_classifiers = ['svm_lin', 'et', 'log_reg']
# databases = ['diveface', 'utkface']
# for db in databases:
# for seed in range(10):
# execute_evaluation(db, other_classifiers, 'NO_pca', seed)
# for k in [0, 3, 5]:
# execute_evaluation(db, other_classifiers, 'pca', seed, k)
# execute_evaluation(db, other_classifiers, 'ica', seed)
# selected --> ['mlp', 'svm_lin', 'et', 'log_reg']
# all --> ['mlp', 'svm_lin', 'svm_rbf', 'rf', 'gb', 'nb', 'et', 'log_reg']
# other_classifiers = ['svm_lin', 'et', 'log_reg']
# databases = ['diveface', 'utkface']
# for seed in range(10):
# for db in databases:
# execute_evaluation(db, other_classifiers, 'NO_pca', seed)
# for k in [0, 3, 5]:
# execute_evaluation(db, other_classifiers, 'pca', seed, k)
# execute_evaluation(db, other_classifiers, 'ica', seed)