Run formats

climateintelligence · Mar 25, 2024 · 811fb7b · 811fb7b
1 parent 78de313
commit 811fb7b
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 12 deletions.
diff --git a/hawk/analysis/metrics.py b/hawk/analysis/metrics.py
@@ -6,8 +6,8 @@
 from sklearn.model_selection import BaseCrossValidator, cross_val_score
 
 inputs_names_lags_doc = """
-:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags. 
-    For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods, 
+:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags.
+    For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods,
     and 'feature2' by 1 period.
 """
 
@@ -23,7 +23,7 @@ def prepare_data_with_lags(
 ) -> Tuple[pd.DataFrame, pd.Series]:
     f"""
     Prepares data for regression by generating lagged features for specified variables and targets.
-    
+
     :param df: The pandas DataFrame containing the time series data.
     {inputs_names_lags_doc}
     {target_name_doc}
@@ -72,7 +72,7 @@ def regression_analysis(
     f"""
     Performs regression analysis with support for either cross-validation or a train-test split,
     based on the arguments provided.
-    
+
     {inputs_names_lags_doc}
     {target_name_doc}
     :param df: DataFrame for cross-validation mode. If specified, cv_scheme must also be provided.
@@ -87,7 +87,7 @@ def regression_analysis(
     train_test_mode = bool(df_train is not None and df_test is not None)
     if not (cross_val_mode ^ train_test_mode):
         raise ValueError(
-            "Specify either cross-validation with 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both."
+            "Specify either a 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both."
         )
 
     if cross_val_mode:

diff --git a/hawk/analysis/pcmci_tools.py b/hawk/analysis/pcmci_tools.py
@@ -9,7 +9,7 @@ def get_connected_variables(graph: np.ndarray, var_names: list[str]) -> list[str
     The target is assumed to be the last variable.
     The connection is considered of any type: from, to, or undefined.
 
-    :param graph: the graph of the PCMCI algorithm, i.e. what's returned by PCMCI.run_pcmci(), array of shape [N, N, tau_max+1]
+    :param graph: the graph of the PCMCI algorithm, i.e. what's returned by PCMCI.run_pcmci()
     :param var_names: the names of the variables
     """
 

diff --git a/hawk/analysis/postprocessing.py b/hawk/analysis/postprocessing.py
@@ -401,8 +401,8 @@ def run_postprocessing_tefs_wrapper(
         features_columns = dataframe["full"].drop(columns=target_columns).columns
 
         # --------------------- Select features using threshold (conservative) ---------------------
-        # selected_features_names_with_threshold = simulation["results"].select_features(simulation["params"]["threshold"])
-        # n_features_selected_with_threshold = len(selected_features_names_with_threshold)
+        # selected_features_names_with_threshold = simulation["results"].select_features(simulation["params"]["threshold"]) # noqa
+        # n_features_selected_with_threshold = len(selected_features_names_with_threshold) # noqa
 
         # --------------------- Compute test R2 for each number of features ---------------------
         test_r2_train_test = []
@@ -510,13 +510,13 @@ def run_postprocessing_tefs_wrapper(
     # fig, ax = plt.subplots(figsize=(10, 5))
     # ax.plot(test_r2_cv.mean(axis=1), marker="o", label="Cross-validation")
     # maxima = np.where(test_r2_cv.mean(axis=1) == test_r2_cv.mean(axis=1).max())[0]
-    # ax.plot(maxima, test_r2_cv.mean(axis=1)[maxima], marker="o", color="red", linestyle="None", label="Maximum", markersize=10)
-    # ax.plot(n_features_selected_with_threshold, test_r2_cv.mean(axis=1)[n_features_selected_with_threshold], marker="o", color="green", linestyle="None", label="TEFS (conservative)", markersize=10)
+    # ax.plot(maxima, test_r2_cv.mean(axis=1)[maxima], marker="o", color="red", linestyle="None", label="Maximum", markersize=10) # noqa
+    # ax.plot(n_features_selected_with_threshold, test_r2_cv.mean(axis=1)[n_features_selected_with_threshold], marker="o", color="green", linestyle="None", label="TEFS (conservative)", markersize=10) # noqa
 
     # # plot confidence interval bands from cross-validation based on mean and standard deviation (90% confidence)
     # alpha = 0.1
     # quantile = scipy.stats.norm.ppf(1 - alpha / 2)
-    # ax.fill_between(range(test_r2_cv.shape[0]), test_r2_cv.mean(axis=1) - test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), test_r2_cv.mean(axis=1) + test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), alpha=0.3)
+    # ax.fill_between(range(test_r2_cv.shape[0]), test_r2_cv.mean(axis=1) - test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), test_r2_cv.mean(axis=1) + test_r2_cv.std(axis=1) * quantile / np.sqrt(test_r2_cv.shape[1]), alpha=0.3) # noqa
 
     # ax.set_xlabel("Number of features")
     # ax.set_ylabel("Test $R^2$")
@@ -528,7 +528,7 @@ def run_postprocessing_tefs_wrapper(
     # else:
     #     threshold_text = simulation["params"]["threshold"]
 
-    # title_text = f"TEFS on basin {basin_name.upper()} with dataset {dataset_name}\n[lagfeatures $={simulation['params']['lagfeatures']}$, lagtarget $={simulation['params']['lagtarget']}$, direction = {simulation['params']['direction']}, threshold $={threshold_text}]$"
+    # title_text = f"TEFS on basin {basin_name.upper()} with dataset {dataset_name}\n[lagfeatures $={simulation['params']['lagfeatures']}$, lagtarget $={simulation['params']['lagtarget']}$, direction = {simulation['params']['direction']}, threshold $={threshold_text}]$" # noqa
     # ax.set_title(title_text)
     # ax.legend()
     # if num_total_features < 30: