-
Notifications
You must be signed in to change notification settings - Fork 0
/
4 Chapter Four Metrics.py
320 lines (266 loc) · 11.9 KB
/
4 Chapter Four Metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
%reset -f
#----------------------------------------------------
# Chapter Four
#----------------------------------------------------
# 4. Metrics
# TP, FN, FP, FN
# Classification Report
# Confusion Matrix
# ROC-AUC
# Precision-Recall AUC
# Log Loss / Cross-Entropy Loss
# Cohen's Kappa
# MCC
# Precision
# Recall (sensitivity)
# Unweighted Average Recall
# Specificity
# F1-Score
# Accuracy
# Balanced Accuracy
# Hamming Loss
# Jaccard Index
#----------------------------------------------------
import numpy as np
import pandas as pd
import os
import warnings
import time
from scipy.interpolate import interp1d
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
classification_report, confusion_matrix, roc_auc_score,
average_precision_score, log_loss, cohen_kappa_score,
matthews_corrcoef, balanced_accuracy_score,
precision_score, recall_score, f1_score, accuracy_score,
hamming_loss, jaccard_score
)
from xgboost import XGBClassifier
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Multiply
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_selection import SelectKBest, chi2
#----------------------------------------------------
# Record the start time
start_time = time.time()
# Suppress warnings
warnings.filterwarnings("ignore")
# Parse BVH files
def parse_bvh(file_path):
"""Parse the BVH file to extract the header and motion data."""
print(f"Parsing file: {file_path}")
with open(file_path, 'r') as file:
lines = file.readlines()
header, motion_data = [], []
capture_data = False
for line in lines:
if "MOTION" in line:
capture_data = True
elif capture_data:
if line.strip().startswith("Frames") or line.strip().startswith("Frame Time"):
continue
motion_data.append(np.fromstring(line, sep=' '))
else:
header.append(line)
print(f"Finished parsing: {file_path}")
return header, np.array(motion_data)
def read_bvh(filename):
"""Reads a BVH file and returns motion data parsed."""
header, motion_data = parse_bvh(filename)
return motion_data
def interpolate_frames(motion_data, target_frame_count):
"""Interpolates BVH frames to match a target frame count."""
print(f"Interpolating frames to match target frame count: {target_frame_count}")
original_frame_count = len(motion_data)
original_time = np.linspace(0, 1, original_frame_count)
target_time = np.linspace(0, 1, target_frame_count)
interpolated_frames = []
for frame in np.array(motion_data).T:
interpolator = interp1d(original_time, frame.astype(float), kind='linear')
interpolated_frame = interpolator(target_time)
interpolated_frames.append(interpolated_frame)
print(f"Finished interpolation for frames: Original frame count = {original_frame_count}, Target frame count = {target_frame_count}")
return np.array(interpolated_frames).T
def find_max_frames(folder_path):
"""Finds the maximum number of frames among all BVH files in a folder."""
print(f"Finding maximum number of frames in folder: {folder_path}")
max_frames = 0
for filename in os.listdir(folder_path):
if filename.endswith('.bvh'):
motion_data = read_bvh(os.path.join(folder_path, filename))
max_frames = max(max_frames, len(motion_data))
print(f"Maximum frames found: {max_frames}")
return max_frames
def process_bvh_files(folder_path, max_frames):
"""Processes each BVH file in the folder after interpolating to the same number of frames."""
print(f"Processing BVH files in folder: {folder_path}")
all_features = []
for filename in os.listdir(folder_path):
if filename.endswith('.bvh'):
print(f"Processing file: {filename}")
full_path = os.path.join(folder_path, filename)
motion_data = read_bvh(full_path)
interpolated_frames = interpolate_frames(motion_data, max_frames)
all_features.append(interpolated_frames) # Keep the 2D array before flattening for feature extraction
print(f"Finished processing file: {filename}")
print(f"Finished processing all BVH files in folder: {folder_path}")
return np.array(all_features)
#------------------------------------------------------------------
# Feature Extraction
#------------------------------------------------------------------
def extract_motion_features(motion_data):
"""Extracts key motion features such as angular velocity, acceleration, jerk, and range of motion."""
print("Extracting motion features...")
channels_per_joint = 3 # 3 channels per joint (e.g., X, Y, Z rotations)
num_joints = motion_data.shape[1] // channels_per_joint
features = []
for i in range(num_joints):
joint_rotations = motion_data[:, i * channels_per_joint:(i + 1) * channels_per_joint]
# Angular velocity: derivative of joint rotation
angular_velocity = np.diff(joint_rotations, axis=0)
# Acceleration: derivative of angular velocity
acceleration = np.diff(angular_velocity, axis=0)
# Jerk: derivative of acceleration
jerk = np.diff(acceleration, axis=0)
# Range of Motion: max rotation - min rotation
range_of_motion = np.max(joint_rotations, axis=0) - np.min(joint_rotations, axis=0)
# Flatten all features for this joint and append
joint_features = np.concatenate([angular_velocity.flatten(), acceleration.flatten(), jerk.flatten(), range_of_motion])
features.append(joint_features)
# Flatten the entire features array for all joints
flattened_features = np.concatenate(features)
print("Feature extraction complete.")
return flattened_features
# -------------------------------
# Train Folder
train_folder_path = 'Small Dataset/'
# -------------------------------
# Find maximum frame size in the training data
print("Finding maximum frames in the training dataset...")
max_frames_train = find_max_frames(train_folder_path)
# Process BVH files using the maximum frame size
print("Processing training data...")
all_features_train = process_bvh_files(train_folder_path, max_frames_train)
# Extract motion features for each processed BVH file
print("Extracting features from processed data...")
X_features = np.array([extract_motion_features(interpolate_frames(motion_data, max_frames_train)) for motion_data in all_features_train])
# Train Labels
print("Assigning labels to the dataset...")
C_Angry = [0] * 42
C_Depressed = [1] * 42
C_Neutral = [2] * 42
C_Proud = [3] * 32
Labels = np.array(C_Angry + C_Depressed + C_Neutral + C_Proud, dtype=np.int32)
print("Labels assigned.")
# Standardize the data
print("Standardizing the data...")
scaler = StandardScaler()
X_standardized = scaler.fit_transform(all_features_train.reshape(len(all_features_train), -1)) # Flatten for standardization
print("Data standardized.")
# Normalize the data (min-max scaling)
print("Normalizing the data...")
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X_standardized)
print("Data normalized.")
# Handle NaN values
print("Handling NaN values...")
imputer = SimpleImputer(strategy='mean')
X_cleaned = imputer.fit_transform(X_normalized)
print("NaN values handled.")
# Final feature set and labels
X = X_cleaned
Y = Labels
# -------------------------------------------
# Perform feature selection-------------------------------------------
k = 1000 # Number of top features
selector = SelectKBest(chi2, k=k)
X = selector.fit_transform(X, Y)
# Display the indices of the selected features
selected_indices = selector.get_support(indices=True)
# -------------------------------------------
# X_features contains the flattened extracted features
print(f"Feature extraction completed. X_features shape: {X_features.shape}")
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
# XGBoost Classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
print("Training the XGBoost classifier...")
xgb_classifier.fit(X_train, Y_train)
print("Training completed.")
# Predict probabilities and labels
print("Making predictions...")
y_proba_XGB = xgb_classifier.predict_proba(X_test)
y_pred_XGB = xgb_classifier.predict(X_test)
# Confusion Matrix and Basic Elements: TP, FP, FN, TN
# Confusion Matrix and metrics for multiclass problem
cm = confusion_matrix(Y_test, y_pred_XGB)
print(f"Confusion Matrix:\n{cm}")
# For multiclass, we'll sum the diagonal for True Positives (TP), and calculate FP, FN, and TN
TP = np.diag(cm) # True Positives are the diagonal elements of the confusion matrix
FP = cm.sum(axis=0) - TP # False Positives are column-wise sums minus the True Positives
FN = cm.sum(axis=1) - TP # False Negatives are row-wise sums minus the True Positives
TN = cm.sum() - (FP + FN + TP) # True Negatives are the total sum minus FP, FN, and TP
tn=TN
fp=FP
# Sum them up to get overall values if needed
TP_sum = TP.sum()
FP_sum = FP.sum()
FN_sum = FN.sum()
TN_sum = TN.sum()
print(f"True Positives (TP): {TP_sum}, False Positives (FP): {FP_sum}, False Negatives (FN): {FN_sum}, True Negatives (TN): {TN_sum}")
# Metrics Calculation
print("Calculating metrics...")
# 1. ROC-AUC (for Multiclass)
roc_auc_XGB = roc_auc_score(Y_test, y_proba_XGB, multi_class='ovr')
print(f"XGBoost ROC-AUC: {roc_auc_XGB:.4f}")
# 2. Precision-Recall AUC
pr_auc_XGB = average_precision_score(Y_test, y_proba_XGB, average='macro')
print(f"XGBoost Precision-Recall AUC: {pr_auc_XGB:.4f}")
# 3. Log Loss (Cross-Entropy Loss)
log_loss_XGB = log_loss(Y_test, y_proba_XGB)
print(f"XGBoost Log Loss: {log_loss_XGB:.4f}")
# 4. Cohen's Kappa
cohen_kappa_XGB = cohen_kappa_score(Y_test, y_pred_XGB)
print(f"XGBoost Cohen's Kappa: {cohen_kappa_XGB:.4f}")
# 5. Matthews Correlation Coefficient (MCC)
mcc_XGB = matthews_corrcoef(Y_test, y_pred_XGB)
print(f"XGBoost MCC: {mcc_XGB:.4f}")
# 6. Precision
precision_XGB = precision_score(Y_test, y_pred_XGB, average='macro')
print(f"XGBoost Precision: {precision_XGB:.4f}")
# 7. Recall (Sensitivity)
recall_XGB = recall_score(Y_test, y_pred_XGB, average='macro')
print(f"XGBoost Recall (Sensitivity): {recall_XGB:.4f}")
# 8. Unweighted Average Recall (UAR)
uar_XGB = recall_score(Y_test, y_pred_XGB, average='macro')
print(f"XGBoost Unweighted Average Recall (UAR): {uar_XGB:.4f}")
# 9. Specificity (True Negative Rate)
specificity_XGB = tn / (tn + fp)
# Calculate and print average specificity
avg_specificity_XGB = np.mean(specificity_XGB)
print(f"XGBoost Average Specificity (True Negative Rate): {avg_specificity_XGB:.4f}")
# 10. F1-Score
f1_XGB = f1_score(Y_test, y_pred_XGB, average='macro')
print(f"XGBoost F1-Score: {f1_XGB:.4f}")
# 11. Accuracy
accuracy_XGB = accuracy_score(Y_test, y_pred_XGB)
print(f"XGBoost Accuracy: {accuracy_XGB:.4f}")
# 12. Balanced Accuracy
balanced_accuracy_XGB = balanced_accuracy_score(Y_test, y_pred_XGB)
print(f"XGBoost Balanced Accuracy: {balanced_accuracy_XGB:.4f}")
# 13. Hamming Loss
hamming_loss_XGB = hamming_loss(Y_test, y_pred_XGB)
print(f"XGBoost Hamming Loss: {hamming_loss_XGB:.4f}")
# 14. Jaccard Index (Intersection over Union)
jaccard_XGB = jaccard_score(Y_test, y_pred_XGB, average='macro')
print(f"XGBoost Jaccard Index: {jaccard_XGB:.4f}")
# 15. Classification Report
class_report_XGB = classification_report(Y_test, y_pred_XGB)
print(f"Classification Report:\n{class_report_XGB}")
# Print completion message
end_time = time.time()
print(f"Pre-processing, training, and metrics calculation completed in {end_time - start_time:.2f} seconds.")