From 811fb7b3639ad0175be19ab776b6502ffef4726f Mon Sep 17 00:00:00 2001 From: Teo Bucci Date: Mon, 25 Mar 2024 22:17:10 +0100 Subject: [PATCH] Run formats --- hawk/analysis/metrics.py | 10 +++++----- hawk/analysis/pcmci_tools.py | 2 +- hawk/analysis/postprocessing.py | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hawk/analysis/metrics.py b/hawk/analysis/metrics.py index 788a29e..fe56201 100644 --- a/hawk/analysis/metrics.py +++ b/hawk/analysis/metrics.py @@ -6,8 +6,8 @@ from sklearn.model_selection import BaseCrossValidator, cross_val_score inputs_names_lags_doc = """ -:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags. - For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods, +:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags. + For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods, and 'feature2' by 1 period. """ @@ -23,7 +23,7 @@ def prepare_data_with_lags( ) -> Tuple[pd.DataFrame, pd.Series]: f""" Prepares data for regression by generating lagged features for specified variables and targets. - + :param df: The pandas DataFrame containing the time series data. {inputs_names_lags_doc} {target_name_doc} @@ -72,7 +72,7 @@ def regression_analysis( f""" Performs regression analysis with support for either cross-validation or a train-test split, based on the arguments provided. - + {inputs_names_lags_doc} {target_name_doc} :param df: DataFrame for cross-validation mode. If specified, cv_scheme must also be provided. @@ -87,7 +87,7 @@ def regression_analysis( train_test_mode = bool(df_train is not None and df_test is not None) if not (cross_val_mode ^ train_test_mode): raise ValueError( - "Specify either cross-validation with 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both." + "Specify either a 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both." ) if cross_val_mode: diff --git a/hawk/analysis/pcmci_tools.py b/hawk/analysis/pcmci_tools.py index 67357c4..6ce7a7c 100644 --- a/hawk/analysis/pcmci_tools.py +++ b/hawk/analysis/pcmci_tools.py @@ -9,7 +9,7 @@ def get_connected_variables(graph: np.ndarray, var_names: list[str]) -> list[str The target is assumed to be the last variable. The connection is considered of any type: from, to, or undefined. - :param graph: the graph of the PCMCI algorithm, i.e. what's returned by PCMCI.run_pcmci(), array of shape [N, N, tau_max+1] + :param graph: the graph of the PCMCI algorithm, i.e. what's returned by PCMCI.run_pcmci() :param var_names: the names of the variables """ diff --git a/hawk/analysis/postprocessing.py b/hawk/analysis/postprocessing.py index 009e150..8f6037f 100644 --- a/hawk/analysis/postprocessing.py +++ b/hawk/analysis/postprocessing.py @@ -401,8 +401,8 @@ def run_postprocessing_tefs_wrapper( features_columns = dataframe["full"].drop(columns=target_columns).columns # --------------------- Select features using threshold (conservative) --------------------- - # selected_features_names_with_threshold = simulation["results"].select_features(simulation["params"]["threshold"]) - # n_features_selected_with_threshold = len(selected_features_names_with_threshold) + # selected_features_names_with_threshold = simulation["results"].select_features(simulation["params"]["threshold"]) # noqa + # n_features_selected_with_threshold = len(selected_features_names_with_threshold) # noqa # --------------------- Compute test R2 for each number of features --------------------- test_r2_train_test = [] @@ -510,13 +510,13 @@ def run_postprocessing_tefs_wrapper( # fig, ax = plt.subplots(figsize=(10, 5)) # ax.plot(test_r2_cv.mean(axis=1), marker="o", label="Cross-validation") # maxima = np.where(test_r2_cv.mean(axis=1) == test_r2_cv.mean(axis=1).max())[0] - # ax.plot(maxima, test_r2_cv.mean(axis=1)[maxima], marker="o", color="red", linestyle="None", label="Maximum", markersize=10) - # ax.plot(n_features_selected_with_threshold, test_r2_cv.mean(axis=1)[n_features_selected_with_threshold], marker="o", color="green", linestyle="None", label="TEFS (conservative)", markersize=10) + # ax.plot(maxima, test_r2_cv.mean(axis=1)[maxima], marker="o", color="red", linestyle="None", label="Maximum", markersize=10) # noqa + # ax.plot(n_features_selected_with_threshold, test_r2_cv.mean(axis=1)[n_features_selected_with_threshold], marker="o", color="green", linestyle="None", label="TEFS (conservative)", markersize=10) # noqa # # plot confidence interval bands from cross-validation based on mean and standard deviation (90% confidence) # alpha = 0.1 # quantile = scipy.stats.norm.ppf(1 - alpha / 2) - # ax.fill_between(range(test_r2_cv.shape[0]), test_r2_cv.mean(axis=1) - test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), test_r2_cv.mean(axis=1) + test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), alpha=0.3) + # ax.fill_between(range(test_r2_cv.shape[0]), test_r2_cv.mean(axis=1) - test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), test_r2_cv.mean(axis=1) + test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), alpha=0.3) # noqa # ax.set_xlabel("Number of features") # ax.set_ylabel("Test $R^2$") @@ -528,7 +528,7 @@ def run_postprocessing_tefs_wrapper( # else: # threshold_text = simulation["params"]["threshold"] - # title_text = f"TEFS on basin {basin_name.upper()} with dataset {dataset_name}\n[lagfeatures $={simulation['params']['lagfeatures']}$, lagtarget $={simulation['params']['lagtarget']}$, direction = {simulation['params']['direction']}, threshold $={threshold_text}]$" + # title_text = f"TEFS on basin {basin_name.upper()} with dataset {dataset_name}\n[lagfeatures $={simulation['params']['lagfeatures']}$, lagtarget $={simulation['params']['lagtarget']}$, direction = {simulation['params']['direction']}, threshold $={threshold_text}]$" # noqa # ax.set_title(title_text) # ax.legend() # if num_total_features < 30: