Skip to content

Commit

Permalink
Run formats
Browse files Browse the repository at this point in the history
  • Loading branch information
teobucci committed Mar 25, 2024
1 parent 78de313 commit 811fb7b
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
10 changes: 5 additions & 5 deletions hawk/analysis/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from sklearn.model_selection import BaseCrossValidator, cross_val_score

inputs_names_lags_doc = """
:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags.
For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods,
:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags.
For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods,
and 'feature2' by 1 period.
"""

Expand All @@ -23,7 +23,7 @@ def prepare_data_with_lags(
) -> Tuple[pd.DataFrame, pd.Series]:
f"""
Prepares data for regression by generating lagged features for specified variables and targets.
:param df: The pandas DataFrame containing the time series data.
{inputs_names_lags_doc}
{target_name_doc}
Expand Down Expand Up @@ -72,7 +72,7 @@ def regression_analysis(
f"""
Performs regression analysis with support for either cross-validation or a train-test split,
based on the arguments provided.
{inputs_names_lags_doc}
{target_name_doc}
:param df: DataFrame for cross-validation mode. If specified, cv_scheme must also be provided.
Expand All @@ -87,7 +87,7 @@ def regression_analysis(
train_test_mode = bool(df_train is not None and df_test is not None)
if not (cross_val_mode ^ train_test_mode):
raise ValueError(
"Specify either cross-validation with 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both."
"Specify either a 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both."
)

if cross_val_mode:
Expand Down
2 changes: 1 addition & 1 deletion hawk/analysis/pcmci_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def get_connected_variables(graph: np.ndarray, var_names: list[str]) -> list[str
The target is assumed to be the last variable.
The connection is considered of any type: from, to, or undefined.
:param graph: the graph of the PCMCI algorithm, i.e. what's returned by PCMCI.run_pcmci(), array of shape [N, N, tau_max+1]
:param graph: the graph of the PCMCI algorithm, i.e. what's returned by PCMCI.run_pcmci()
:param var_names: the names of the variables
"""

Expand Down
12 changes: 6 additions & 6 deletions hawk/analysis/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,8 @@ def run_postprocessing_tefs_wrapper(
features_columns = dataframe["full"].drop(columns=target_columns).columns

# --------------------- Select features using threshold (conservative) ---------------------
# selected_features_names_with_threshold = simulation["results"].select_features(simulation["params"]["threshold"])
# n_features_selected_with_threshold = len(selected_features_names_with_threshold)
# selected_features_names_with_threshold = simulation["results"].select_features(simulation["params"]["threshold"]) # noqa
# n_features_selected_with_threshold = len(selected_features_names_with_threshold) # noqa

# --------------------- Compute test R2 for each number of features ---------------------
test_r2_train_test = []
Expand Down Expand Up @@ -510,13 +510,13 @@ def run_postprocessing_tefs_wrapper(
# fig, ax = plt.subplots(figsize=(10, 5))
# ax.plot(test_r2_cv.mean(axis=1), marker="o", label="Cross-validation")
# maxima = np.where(test_r2_cv.mean(axis=1) == test_r2_cv.mean(axis=1).max())[0]
# ax.plot(maxima, test_r2_cv.mean(axis=1)[maxima], marker="o", color="red", linestyle="None", label="Maximum", markersize=10)
# ax.plot(n_features_selected_with_threshold, test_r2_cv.mean(axis=1)[n_features_selected_with_threshold], marker="o", color="green", linestyle="None", label="TEFS (conservative)", markersize=10)
# ax.plot(maxima, test_r2_cv.mean(axis=1)[maxima], marker="o", color="red", linestyle="None", label="Maximum", markersize=10) # noqa
# ax.plot(n_features_selected_with_threshold, test_r2_cv.mean(axis=1)[n_features_selected_with_threshold], marker="o", color="green", linestyle="None", label="TEFS (conservative)", markersize=10) # noqa

# # plot confidence interval bands from cross-validation based on mean and standard deviation (90% confidence)
# alpha = 0.1
# quantile = scipy.stats.norm.ppf(1 - alpha / 2)
# ax.fill_between(range(test_r2_cv.shape[0]), test_r2_cv.mean(axis=1) - test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), test_r2_cv.mean(axis=1) + test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), alpha=0.3)
# ax.fill_between(range(test_r2_cv.shape[0]), test_r2_cv.mean(axis=1) - test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), test_r2_cv.mean(axis=1) + test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), alpha=0.3) # noqa

# ax.set_xlabel("Number of features")
# ax.set_ylabel("Test $R^2$")
Expand All @@ -528,7 +528,7 @@ def run_postprocessing_tefs_wrapper(
# else:
# threshold_text = simulation["params"]["threshold"]

# title_text = f"TEFS on basin {basin_name.upper()} with dataset {dataset_name}\n[lagfeatures $={simulation['params']['lagfeatures']}$, lagtarget $={simulation['params']['lagtarget']}$, direction = {simulation['params']['direction']}, threshold $={threshold_text}]$"
# title_text = f"TEFS on basin {basin_name.upper()} with dataset {dataset_name}\n[lagfeatures $={simulation['params']['lagfeatures']}$, lagtarget $={simulation['params']['lagtarget']}$, direction = {simulation['params']['direction']}, threshold $={threshold_text}]$" # noqa
# ax.set_title(title_text)
# ax.legend()
# if num_total_features < 30:
Expand Down

0 comments on commit 811fb7b

Please sign in to comment.