Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

31 output model architecture to json for increased readability #36

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 9 additions & 30 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,53 +1,34 @@
---
repos:
# isort for jupyter notebooks
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.3.1
# Formats import order
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: nbqa-isort
- id: isort
name: isort (python)
args: ["--profile", "black", "--filter-files"]

# Code formatter for both python files and jupyter notebooks
#Code formatter for both python files and jupyter notebooks
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black-jupyter
- id: black
language_version: python3.10

# code for converting jupyter notebooks to python scripts
- repo: https://github.com/mwouts/jupytext
rev: v1.14.0 # CURRENT_TAG/COMMIT_HASH
hooks:
- id: jupytext
args: [--from, notebooks///ipynb, --to, "scripts///py:percent", --sync, --pipe, black]

# Formats import order
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "--filter-files"]

# remove unused imports
- repo: https://github.com/hadialqattan/pycln.git
rev: v2.1.3
hooks:
- id: pycln


# Code formatter for both python files and jupyter notebooks
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black-jupyter
- id: black
language_version: python3.10

# isort for jupyter notebooks
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.3.1
hooks:
- id: nbqa-isort

# additional hooks found with in the pre-commit lib
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
Expand All @@ -61,5 +42,3 @@ repos:
- --autofix
- --indent=4
- --no-sort-keys


2 changes: 1 addition & 1 deletion MLP_Model/MLP_utils/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ LEARNING_RATE_MAX = 0.1
OPTIMIZER_LIST = [ "Adam", "RMSprop", "SGD" ]
METRIC = "loss"
DIRECTION = "minimize"
MODEL_NAME = "LPS_10_vs_DMSO"
MODEL_NAME = "LPS_10_vs_LPS_100"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just confirming. Whenever you are training a new model, you have to change this model name? And the config.toml file name never changes?

I wonder if the file name should also change, in case the parameters change per model. Oh, but maybe these parameters won't change? (It would still likely be good to keep a record of the models that you've tested)

One final point - is the model name arbitrary? In other words, it doesn't specify which treatments to include, it is simply an identifier?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an issue. I am making this an issue now. I will address in a new PR. I will need to make my code more dynamic to parse the config to decide which treatments to test and also make different configs for each model. As of now the treatments selected are hardcoded into the notebook.

35 changes: 16 additions & 19 deletions MLP_Model/MLP_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
These are helper functions meant to be called in a separate notebook or script
"""

import ast
import json
import pathlib
from pathlib import Path
from typing import Tuple
Expand All @@ -21,7 +21,7 @@
ModelTypeError,
OptimizationMetricError,
TrainingValidationTestingSplitError,
yDataTypeError,
YDataTypeError,
)
from MLP_utils.parameters import Parameters
from sklearn.metrics import (
Expand Down Expand Up @@ -370,7 +370,7 @@ def train_n_validate(
elif params.MODEL_TYPE == "Regression":
pass
else:
raise yDataTypeError
raise YDataTypeError

X_train_batch, y_train_batch = X_train_batch.to(
params.DEVICE
Expand Down Expand Up @@ -426,7 +426,7 @@ def train_n_validate(
elif params.MODEL_TYPE == "Regression":
pass
else:
raise yDataTypeError
raise YDataTypeError

X_valid_batch, y_valid_batch = X_valid_batch.to(
params.DEVICE
Expand Down Expand Up @@ -673,26 +673,26 @@ def extract_best_trial_params(

if MLP_params.MODEL_TYPE == "Multi_Class":
with open(
f"../../trained_models/architectures/Multi_Class/Multi_Class_{model_name}.txt",
f"../../trained_models/architectures/Multi_Class/Multi_Class_{model_name}.json",
"w",
) as f:
f.write(str(param_dict))
json.dump(param_dict, f, indent=4)
f.close()

elif MLP_params.MODEL_TYPE == "Binary_Classification":
with open(
f"../../trained_models/architectures/Binary_Classification/Binary_Classification_{model_name}.txt",
f"../../trained_models/architectures/Binary_Classification/Binary_Classification_{model_name}.json",
"w",
) as f:
f.write(str(param_dict))
json.dump(param_dict, f, indent=4)
f.close()

elif MLP_params.MODEL_TYPE == "Regression":
with open(
f"../../trained_models/architectures/Regression/Regression_{model_name}.txt",
f"../../trained_models/architectures/Regression/Regression_{model_name}.json",
"w",
) as f:
f.write(str(param_dict))
json.dump(param_dict, f, indent=4)
f.close()

else:
Expand Down Expand Up @@ -727,28 +727,25 @@ def optimized_model_create(
# load in model architecture from saved model architecture
if params.MODEL_TYPE == "Multi_Class":
with open(
f"../../trained_models/architectures/Multi_Class/Multi_Class_{model_name}.txt",
f"../../trained_models/architectures/Multi_Class/Multi_Class_{model_name}.json",
"r",
) as f:
parameter_dict = ast.literal_eval(f.read())
parameter_dict = json.load(f)
f.close()

elif params.MODEL_TYPE == "Binary_Classification":
with open(
f"../../trained_models/architectures/Binary_Classification/Binary_Classification_{model_name}.txt",
f"../../trained_models/architectures/Binary_Classification/Binary_Classification_{model_name}.json",
"r",
) as f:
parameter_dict = ast.literal_eval(f.read())
parameter_dict = json.load(f)
f.close()

elif params.MODEL_TYPE == "Regression":
with open(
f"../../trained_models/architectures/Regression/Regression_{model_name}.txt",
f"../../trained_models/architectures/Regression/Regression_{model_name}.json",
"r",
) as f:
parameter_dict = ast.literal_eval(f.read())
parameter_dict = json.load(f)
f.close()

else:
raise ModelTypeError

Expand Down
259 changes: 136 additions & 123 deletions MLP_Model/model_testing/notebooks/binary_classification_testing.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from sklearn.model_selection import train_test_split

sys.path.append("..")
sys.path.append("../..")
from MLP_utils.parameters import Parameters
from MLP_utils.utils import (
Dataset_formatter,
Expand All @@ -55,20 +55,20 @@
un_nest,
)

sys.path.append("../..")
sys.path.append("../../..")
from utils.utils import df_stats

# %%
# Import Data
# set data file path under pathlib path for multi-system use
file_path = Path(
"../../../Extracted_Features_(CSV_files)/SHSY5Y_preprocessed_df_sc_norm.parquet"
"../../../../Extracted_Features_(CSV_files)/SHSY5Y_preprocessed_df_sc_norm.parquet"
)

df = pq.read_table(file_path).to_pandas()

# %%
data = Path("../MLP_utils/config.toml")
data = Path("../../MLP_utils/config.toml")
config = toml.load(data)
params = Parameters()
params = parameter_set(params, config)
Expand Down
Loading