diff --git a/assets/rf_confusion_matrix.png b/assets/rf_confusion_matrix.png
index e3a88a5..4f5ea3d 100644
Binary files a/assets/rf_confusion_matrix.png and b/assets/rf_confusion_matrix.png differ
diff --git a/src/musicNet/main.py b/src/musicNet/main.py
index 34d6e2c..184036f 100644
--- a/src/musicNet/main.py
+++ b/src/musicNet/main.py
@@ -7,6 +7,7 @@
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import f1_score
+from sklearn.metrics import roc_auc_score
import xgboost as xgb
path = 'src/musicNet/processed_data'
@@ -28,7 +29,7 @@
labels = ['Bach', 'Beethoven', 'Brahms', 'Mozart', 'Schubert']
-dt_clf = DecisionTreeClassifier(random_state=42)
+dt_clf = DecisionTreeClassifier(max_depth=10, random_state=42)
dt_clf.fit(X_train, y_train)
y_pred = dt_clf.predict(X_test)
training_accuracy = dt_clf.score(X_train, y_train)
@@ -73,9 +74,19 @@
plt.show()
plt.close()
-dt_clf = DecisionTreeClassifier(random_state=42)
+dt_clf = DecisionTreeClassifier(max_depth=10, random_state=42)
dt_clf.fit(X_train, y_train)
ypred = dt_clf.predict(X_test)
+training_accuracy = dt_clf.score(X_train, y_train)
+accuracy = dt_clf.score(X_test, y_test)
+print("Decision Tree Classifier")
+print(f"Training Accuracy: {training_accuracy}")
+print(f"Test Accuracy: {accuracy}")
+print(f"Test F1-Score: {f1_score(y_test, ypred, average='weighted')}")
+ypred_proba = dt_clf.predict_proba(X_test)
+print(f"Test 1v1 AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovo')}")
+print(f"Test 1vRest AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovr')}\n")
+print(dt_clf.get_depth())
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
@@ -90,11 +101,14 @@
rf_clf.fit(X_train, y_train)
training_accuracy = rf_clf.score(X_train, y_train)
accuracy = rf_clf.score(X_test, y_test)
-y_pred = rf_clf.predict(X_test)
+ypred = rf_clf.predict(X_test)
print("Random Forest Classifier")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
-print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}")
+print(f"Test F1-Score{f1_score(y_test, ypred, average='weighted')}")
+ypred_proba = rf_clf.predict_proba(X_test)
+print(f"Test 1v1 AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovo')}")
+print(f"Test 1vRest AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovr')}\n")
max_depth = 0
for tree in rf_clf.estimators_:
if max_depth < tree.get_depth():
@@ -121,7 +135,10 @@
print("XGBoost Classifier - 20 estimators, max_depth of 15, learning rate of 0.8, softmax objective function.")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
-print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")
+print(f"Test F1-Score{f1_score(y_test, ypred, average='weighted')}")
+ypred_proba = bst.predict_proba(X_test)
+print(f"Test 1v1 AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovo')}")
+print(f"Test 1vRest AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovr')}\n")
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
@@ -194,7 +211,10 @@
print("XGBoost Classifier - 1000 estimators, max_depth of 15, learning rate of 0.8, softmax objective function.")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
-print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")
+print(f"Test F1-Score{f1_score(y_test, ypred, average='weighted')}")
+ypred_proba = xgb_clf.predict_proba(X_test)
+print(f"Test 1v1 AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovo')}")
+print(f"Test 1vRest AUC-Score: {roc_auc_score(y_test, ypred_proba, average='weighted', multi_class='ovr')}\n")
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
diff --git a/tabs/final_report.md b/tabs/final_report.md
index ce220c9..144ffc4 100644
--- a/tabs/final_report.md
+++ b/tabs/final_report.md
@@ -202,12 +202,14 @@ We fit our decision tree with the cost complexity hyperparameter described [prev
-A note on F1-Score: For this section, we use a weighted average F1 score since this is a multi-class classification task and we believe this method of aggregated pairwise F1-scores is best for our imbalanced dataset.
+**A note on F1-Score and AUC:** For this section, we use a weighted average F1-score and weighted average Area Under the receiver operating Curve (AUC). The reason we weight these scores is due to the imbalance in the classes for this dataset. The F1-score metric is the harmonic mean of precision and reall. Thus, it acts as an aggregated metric for both precision and recall. Because it's defined on a binary case of true/false postive/negatives, each class has its corresponding F1-score. These values are then aggregated by a weighted average into one value, as reported below. The AUC metric is an aggregate measurement of true and false positive rates derived from the ROC plot, which plots the true positive rate (TPR) against the false positive rate (FPR) at each threshold setting. Similarly to the F1-score, this is a binary classification statistics. Therefore, each class has their own AUC score which is aggregated into a single reported AUC. We use both the 1 vs Rest and 1 vs 1 methods. 1 vs Rest divides the data into two classes as the 1 class we are measuring (positive), and the rest (negatives). The 1 vs 1 method only looks at pairwise comparisons between each class as the positives and negatives. Both of the metrics for measuring classification performance are highly regarded and tend to perform better than accuracy alone, especially in imbalanced datasets such as this one [[5.]](#references), [[6.]].
Decision Tree Classifier Results:
- Training Accuracy: 1.0
- Test Accuracy: 0.6458333333333334
- Test F1-Score: 0.6475694444444445
+- Test 1v1 AUC-Score: 0.7684729549963925
+- Test 1vRest AUC-Score: 0.7462224419541492
We can see the model does actually quite well for how little training data there is and how poorly the data is distributed. This landmark shows that our processing algorithm for the MIDI is effective to at least some extent in distinguishing certain composers from others.
@@ -221,8 +223,10 @@ Random Forest Classifier Results:
- Training Accuracy: 1.0
- Test Accuracy: 0.8541666666666666
- Test F1-Score: 0.8519282808470453
+- Test 1v1 AUC-Score: 0.9701817279942282
+- Test 1vRest AUC-Score: 0.9668985078283857
-We can see that random forests drastically improve classification results. Since random forests are highly interpretable and cost efficient we would opt for this model over other less interpretable and cost ineffecitve models. This idea is showcased in the subsequent section with the introduction of gradient-boosted trees.
+We can see that random forests drastically improve classification results. Since random forests are highly interpretable and cost efficient, we would opt for this model over other less interpretable and cost ineffecitve models. This idea is showcased in the subsequent section with the introduction of gradient-boosted trees.
#### Gradient-Boosted Trees
@@ -255,7 +259,9 @@ Model 1 Training Table:
XGBoost Model 1 Results:
- Training Accuracy: 0.9652777777777778
- Test Accuracy: 0.8541666666666666
-- Test F1-Score: 0.8519282808470453
+- Test F1-Score: 0.7749568668046929
+- Test 1v1 AUC-Score: 0.9282258973665223
+- Test 1vRest AUC-Score: 0.936447444831591
@@ -288,7 +294,9 @@ Model 2 Training Table:
XGBoost Model 2 Results:
- Training Accuracy: 0.9930555555555556
- Test Accuracy: 0.8541666666666666
-- Test F1-Score: 0.8519282808470453
+- Test F1-Score: 0.7664231763068972
+- Test 1v1 AUC-Score: 0.9095452290764792
+- Test 1vRest AUC-Score: 0.910000647424428
@@ -360,3 +368,8 @@ Link to Gantt Chart: [Gantt Chart](https://gtvault-my.sharepoint.com/:x:/g/perso
[3.] Pál, T., & Várkonyi, D.T. (2020). Comparison of Dimensionality Reduction Techniques on Audio Signals. Conference on Theory and Practice of Information Technologies.
+[4.] (https://www.kaggle.com/code/imsparsh/gtzan-genre-classification-deep-learning-val-92-4) Gupta, S. (2021). GTZAN-Genre Classification-Deep Learning-Val-92.4%.
+
+[5.] Ferrer, L. (n.d.). Analysis and comparison of classification metrics - arxiv.org. Arxiv. https://arxiv.org/pdf/2209.05355.pdf
+
+[6.] Brownlee, J. (2021, April 30). Tour of evaluation metrics for imbalanced classification. MachineLearningMastery.com. https://machinelearningmastery.com/tour-of-evaluation-metrics-for-imbalanced-classification/