Skip to content

Commit

Permalink
Merge pull request #52 from abarton51/austin
Browse files Browse the repository at this point in the history
work
  • Loading branch information
abarton51 authored Dec 5, 2023
2 parents dd8ea8c + 429c068 commit 5610c66
Show file tree
Hide file tree
Showing 11 changed files with 2,355 additions and 470 deletions.
Binary file added assets/cc_accuracy_path.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/dt_cc_path.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/dt_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/rf_confusion_mat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/xgboost_model1_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/xgboost_model2_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/xgboost_model3_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
146 changes: 110 additions & 36 deletions src/musicNet/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import f1_score
import xgboost as xgb

path = 'src/musicNet/processed_data'
Expand All @@ -28,18 +29,58 @@
labels = ['Bach', 'Beethoven', 'Brahms', 'Mozart', 'Schubert']

dt_clf = DecisionTreeClassifier(random_state=42)

dt_clf.fit(X_train, y_train)
y_pred = dt_clf.predict(X_test)
training_accuracy = dt_clf.score(X_train, y_train)
accuracy = dt_clf.score(X_test, y_test)
print(training_accuracy)
print(accuracy)
print("Decision Tree Classifier")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")
print(dt_clf.get_depth())

path = dt_clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities
fig, ax = plt.subplots()
ax.plot(ccp_alphas[:-1], impurities[:-1], marker="o", drawstyle="steps-post")
ax.set_xlabel("Effective alpha")
ax.set_ylabel("Total impurity of leaves")
ax.set_title("Total Impurity vs Effective alpha for training set")
plt.show()
plt.close()

dt_clfs1 = []
for ccp_alpha in ccp_alphas:
dt_clf = DecisionTreeClassifier(random_state=42, ccp_alpha=ccp_alpha)
dt_clf.fit(X_train, y_train)
dt_clfs1.append(dt_clf)
print(
"Number of nodes in the last tree is: {} with ccp_alpha: {}".format(
dt_clfs1[-1].tree_.node_count, ccp_alphas[-1]
)
)

train_scores1 = [dt_clf.score(X_train, y_train) for dt_clf in dt_clfs1]
test_scores1 = [dt_clf.score(X_test, y_test) for dt_clf in dt_clfs1]

fig, ax = plt.subplots()
ax.set_xlabel("Alpha")
ax.set_ylabel("Accuracy")
ax.set_title("Accuracy vs Alpha for training and testing sets")
ax.plot(ccp_alphas, train_scores1, marker="o", label="train", drawstyle="steps-post")
ax.plot(ccp_alphas, test_scores1, marker="o", label="test", drawstyle="steps-post")
ax.legend()
plt.show()
plt.close()

dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)
ypred = dt_clf.predict(X_test)

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("Decision Tree Classifier - Confusion Matrix")
plt.show()
plt.close()

Expand All @@ -49,82 +90,115 @@
rf_clf.fit(X_train, y_train)
training_accuracy = rf_clf.score(X_train, y_train)
accuracy = rf_clf.score(X_test, y_test)
print(training_accuracy)
print(accuracy)

ypred = dt_clf.predict(X_test)
y_pred = rf_clf.predict(X_test)
print("Random Forest Classifier")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}")
max_depth = 0
for tree in rf_clf.estimators_:
if max_depth < tree.get_depth():
max_depth = tree.get_depth()
print(f"Maximum depth of Random Forest: {max_depth}\n")

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("Random Forest Classifier - Confusion Matrix")
plt.show()
plt.close()

bst = xgb.XGBClassifier(n_estimators=20, max_depth=15, learning_rate=0.8, objective='multi:softmax')
# ------------- XGBoost ----------------
# Traning model 1

bst = xgb.XGBClassifier(n_estimators=20, max_depth=15, learning_rate=0.8, objective='multi:softmax', verbosity=2, subsample=0.25)
# fit model
bst.fit(X_train, y_train)
bst.fit(X_train, y_train, verbose=True)
# make predictions
preds = bst.predict(X_test)
training_accuracy = bst.score(X_train, y_train)
test_accuracy = bst.score(X_test, y_test)
print(training_accuracy)
print(test_accuracy)

ypred = bst.predict(X_test)
print("XGBoost Classifier - 20 estimators, max_depth of 15, learning rate of 0.8, softmax objective function.")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("XGBoost Classifier - Model 1 - Confusion Matrix")
plt.show()
plt.close()
# Model 1 but with table of training results

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

print(training_accuracy)
print(test_accuracy)
param = {'max_depth': 15, 'eta': 0.8, 'objective': 'multi:softmax'}
param['nthread'] = 4
param['num_class'] = 5
param['subsample'] = 0.25
param['eval_metric'] = ['auc', 'merror']
evallist = [(dtrain, 'train'), (dtest, 'eval')]

num_round = 20
bst = xgb.train(param, dtrain, num_round, evals=evallist, early_stopping_rounds=20)
bst.save_model('src\\musicNet\\saved_models\\bt\\austin1.model')
bst.dump_model('src\\musicNet\\saved_models\\bt\\dump.raw.txt')

ypred = bst.predict(dtest)
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("XGBoost Classifier - Model 1 - Confusion Matrix")
plt.show()
plt.close()

# Training model 2

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

dtrain.save_binary('src/musicNet/data/xgboost/train.buffer')

param = {'max_depth': 3, 'eta': 1, 'objective': 'multi:softmax'}
param = {'max_depth': 10, 'eta': 1, 'objective': 'multi:softmax'}
param['nthread'] = 4
param['eval_metric'] = 'auc'
param['subsample'] = 0.25
param['num_class'] = 5
param['eval_metric'] = ['auc', 'ams@0']

param['eval_metric'] = ['auc', 'merror']
evallist = [(dtrain, 'train'), (dtest, 'eval')]

num_round = 10000
bst = xgb.train(param, dtrain, num_round, evals=evallist, early_stopping_rounds=10)

bst = xgb.train(param, dtrain, num_round, evals=evallist, early_stopping_rounds=100)
bst.save_model('src\\musicNet\\saved_models\\bt\\austin1.model')
# dump model
bst.dump_model('src\\musicNet\\saved_models\\bt\\dump.raw.txt')
# dump model with feature map
#bst.dump_model('src/musicNet/saved_models/bt/dump.raw.txt', 'src/musicNet/saved_models/bt/featmap.txt')
#xgb.plot_importance(bst)
#xgb.plot_tree(bst, num_trees=2)
#xgb.to_graphviz(bst, num_trees=2)
ypred = bst.predict(dtest)

ypred = bst.predict(dtest)
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()

plt.title("XGBoost Classifier - Model 2 - Confusion Matrix")
plt.show()
plt.close()

# Repackage model 2 so we can make actual predictions

xgb_clf = xgb.XGBClassifier(**param)
xgb_clf._Boster = bst

xgb_clf.fit(X_train, y_train)
xgb_clf.fit(X_train, y_train, verbose=True)
# make predictions
preds = xgb_clf.predict(X_test)
training_accuracy = xgb_clf.score(X_train, y_train)
test_accuracy = xgb_clf.score(X_test, y_test)
print("final bt")
print(training_accuracy)
print(test_accuracy)

ypred = xgb_clf.predict(X_test)
print("XGBoost Classifier - 1000 estimators, max_depth of 15, learning rate of 0.8, softmax objective function.")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
conf_mat_display.plot()
plt.title("XGBoost Classifier - Model 3 - Confusion Matrix")
plt.show()
plt.close()
Binary file modified src/musicNet/saved_models/bt/austin1.model
Binary file not shown.
Loading

0 comments on commit 5610c66

Please sign in to comment.