From a9282b452e1d531e1a0bd06781f5f43f53a749aa Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 14:42:14 +0200
Subject: [PATCH 1/8] Remove encoding in pkl format

---
 hawk/processes/wps_causal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hawk/processes/wps_causal.py b/hawk/processes/wps_causal.py
index c411275..35f5cc5 100644
--- a/hawk/processes/wps_causal.py
+++ b/hawk/processes/wps_causal.py
@@ -10,7 +10,7 @@
 
 FORMAT_PNG = Format("image/png", extension=".png", encoding="base64")
 FORMAT_PDF = Format("application/pdf", extension=".pdf", encoding="utf-8")
-FORMAT_PICKLE = Format("application/octet-stream", extension=".pkl", encoding="utf-8")
+FORMAT_PICKLE = Format("application/octet-stream", extension=".pkl")
 
 
 class Causal(Process):

From ee6305e6d9ec5da3a529029b16a0113f60b397ec Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 14:43:40 +0200
Subject: [PATCH 2/8] Avoid duplicate file name

---
 hawk/analysis/postprocessing.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hawk/analysis/postprocessing.py b/hawk/analysis/postprocessing.py
index 8928932..985038a 100644
--- a/hawk/analysis/postprocessing.py
+++ b/hawk/analysis/postprocessing.py
@@ -257,7 +257,7 @@ def run_postprocessing_pcmci(
     target_file_plots = {}
     for image_format in image_formats:
         target_file_plot = os.path.join(
-            destination_path, "algorithm_results", "pcmci", f"feature_presence.{image_format}"
+            destination_path, "algorithm_results", "pcmci", f"feature_presence_pcmci.{image_format}"
         )
         os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
         plt.savefig(target_file_plot, bbox_inches="tight")
@@ -384,7 +384,9 @@ def run_postprocessing_tefs(
     )
     target_file_plots = {}
     for image_format in image_formats:
-        target_file_plot = os.path.join(destination_path, "algorithm_results", "te", f"feature_presence.{image_format}")
+        target_file_plot = os.path.join(
+            destination_path, "algorithm_results", "te", f"feature_presence_tefs.{image_format}"
+        )
         os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
         plt.savefig(target_file_plot, bbox_inches="tight")
         target_file_plots[image_format] = target_file_plot

From 678f83532d92cf30c49c2925f2f6f992b333b936 Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 14:47:52 +0200
Subject: [PATCH 3/8] Use library's metrics

---
 hawk/analysis/main.py           |   2 +-
 hawk/analysis/metrics.py        | 124 --------------------------------
 hawk/analysis/postprocessing.py |   2 +-
 3 files changed, 2 insertions(+), 126 deletions(-)
 delete mode 100644 hawk/analysis/metrics.py

diff --git a/hawk/analysis/main.py b/hawk/analysis/main.py
index ac423ae..2d51add 100644
--- a/hawk/analysis/main.py
+++ b/hawk/analysis/main.py
@@ -2,11 +2,11 @@
 import os
 
 import pandas as pd
+from tefs.metrics import regression_analysis
 from tigramite.independence_tests.cmiknn import CMIknn
 from tigramite.independence_tests.parcorr import ParCorr
 
 from .file_management import save_to_pkl_file
-from .metrics import regression_analysis
 from .pcmci_tools import initialize_tigramite_df
 from .postprocessing import (
     run_postprocessing_pcmci,
diff --git a/hawk/analysis/metrics.py b/hawk/analysis/metrics.py
deleted file mode 100644
index 066f0c6..0000000
--- a/hawk/analysis/metrics.py
+++ /dev/null
@@ -1,124 +0,0 @@
-from typing import Any, Dict, Optional, Tuple
-
-import pandas as pd
-from sklearn.linear_model import LinearRegression
-from sklearn.metrics import r2_score
-from sklearn.model_selection import BaseCrossValidator, cross_val_score
-
-inputs_names_lags_doc = """
-:param inputs_names_lags: A dictionary mapping input feature names to their corresponding list of lags.
-    For example, {'feature1': [1, 2], 'feature2': [1]} indicates 'feature1' should be lagged by 1 and 2 periods,
-    and 'feature2' by 1 period.
-"""
-
-target_name_doc = """
-:param target_name: The name of the target variable in the DataFrame.
-"""
-
-
-def prepare_data_with_lags(
-    df: pd.DataFrame,
-    inputs_names_lags: Dict[str, list[int]],
-    target_name: str,
-) -> Tuple[pd.DataFrame, pd.Series]:
-    f"""
-    Prepares data for regression by generating lagged features for specified variables and targets.
-
-    :param df: The pandas DataFrame containing the time series data.
-    {inputs_names_lags_doc}
-    {target_name_doc}
-    :return: A tuple containing the lagged features DataFrame and the target variable Series.
-    """
-
-    required_columns = set([*inputs_names_lags.keys(), target_name])
-    if not required_columns.issubset(set(df.columns)):
-        raise ValueError(
-            "DataFrame 'df' must contain all the columns specified in 'features_names' and 'targets_names'."
-        )
-
-    for lags in inputs_names_lags.values():
-        if lags and min(lags) < 0:
-            raise ValueError("Lag for independent variables must be a non-negative integer.")
-
-    # Initialize a list to hold all DataFrame chunks
-    lagged_chunks = []
-
-    # Generate lagged inputs for the independent variables
-    for input, lags in inputs_names_lags.items():
-        for lag in lags:
-            lagged_chunk = df[input].shift(lag).to_frame(f"{input}_t-{lag}")
-            lagged_chunks.append(lagged_chunk)
-
-    # Adding target column
-    lagged_chunks.append(df[target_name].to_frame(target_name))
-
-    # Concatenate chunks
-    df_lagged = pd.concat(lagged_chunks, axis=1)
-
-    # Dropping rows with NaN values caused by shifting
-    df_lagged = df_lagged.dropna()
-
-    return df_lagged.drop(columns=target_name), df_lagged[target_name]
-
-
-def regression_analysis(
-    inputs_names_lags: Dict[str, list[int]],
-    target_name: str,
-    df: Optional[pd.DataFrame] = None,
-    cv_scheme: Optional[BaseCrossValidator] = None,
-    df_train: Optional[pd.DataFrame] = None,
-    df_test: Optional[pd.DataFrame] = None,
-) -> Any:
-    f"""
-    Performs regression analysis with support for either cross-validation or a train-test split,
-    based on the arguments provided.
-
-    {inputs_names_lags_doc}
-    {target_name_doc}
-    :param df: DataFrame for cross-validation mode. If specified, cv_scheme must also be provided.
-    :param cv_scheme: Cross-validator object for cross-validation mode. If specified, df must also be provided.
-    :param df_train: Training DataFrame for train-test split mode. Required if df_test is provided.
-    :param df_test: Testing DataFrame for train-test split mode. Requires df_train to be specified.
-    :return: Cross-validated scores or R-squared scores from train-test evaluation.
-    """
-
-    # Check that exactly one mode is specified
-    cross_val_mode = bool(df is not None and cv_scheme is not None)
-    train_test_mode = bool(df_train is not None and df_test is not None)
-    if not (cross_val_mode ^ train_test_mode):
-        raise ValueError(
-            "Specify either a 'cv_scheme' and 'df', or a train-test split with 'df_train' and 'df_test', not both."
-        )
-
-    if cross_val_mode:
-        if df is None or cv_scheme is None:
-            raise ValueError("Both 'df' and 'cv_scheme' must be specified for cross-validation mode.")
-
-        X, y = prepare_data_with_lags(
-            df,
-            inputs_names_lags,
-            target_name,
-        )
-
-        model = LinearRegression()
-        return cross_val_score(model, X, y, cv=cv_scheme)
-
-    elif train_test_mode:
-        if df_train is None or df_test is None:
-            raise ValueError("Both 'df_train' and 'df_test' must be specified for train-test split mode.")
-
-        X_train, y_train = prepare_data_with_lags(
-            df_train,
-            inputs_names_lags,
-            target_name,
-        )
-
-        X_test, y_test = prepare_data_with_lags(
-            df_test,
-            inputs_names_lags,
-            target_name,
-        )
-
-        model = LinearRegression().fit(X_train, y_train)
-        y_pred = model.predict(X_test)
-        return r2_score(y_test, y_pred)
diff --git a/hawk/analysis/postprocessing.py b/hawk/analysis/postprocessing.py
index 985038a..bff5452 100644
--- a/hawk/analysis/postprocessing.py
+++ b/hawk/analysis/postprocessing.py
@@ -6,10 +6,10 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
+from tefs.metrics import regression_analysis
 
 # from tigramite import plotting as tp
 from .file_management import save_to_pkl_file
-from .metrics import regression_analysis
 from .pcmci_tools import get_connected_variables
 
 

From 8dd1f08fb836490632c492cbecded4724fda013c Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 15:04:34 +0200
Subject: [PATCH 4/8] Change interactive defaults

---
 hawk/processes/simulation_interactive.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/hawk/processes/simulation_interactive.py b/hawk/processes/simulation_interactive.py
index 2d3bd68..e833b84 100644
--- a/hawk/processes/simulation_interactive.py
+++ b/hawk/processes/simulation_interactive.py
@@ -1,9 +1,5 @@
 from birdy import WPSClient
 
-train_file_path = "Emiliani1_train.csv"
-test_file_path = "Emiliani1_test.csv"
-target_column_name = "cyclostationary_mean_rr_4w_1"
-
 # ----------------- WPS -----------------
 
 wps = WPSClient("http://localhost:5002/wps", verify=False)
@@ -11,14 +7,14 @@
 
 # Input some data for the causal process
 resp = wps.causal(
-    dataset_train=open(train_file_path),
-    dataset_test=open(test_file_path),
-    target_column_name=target_column_name,
+    dataset_train="https://raw.githubusercontent.com/climateintelligence/hawk/main/hawk/demo/Ticino_train.csv",
+    dataset_test="https://raw.githubusercontent.com/climateintelligence/hawk/main/hawk/demo/Ticino_train.csv",
+    target_column_name="target",
     pcmci_test_choice="ParCorr",
-    pcmci_max_lag="0",
-    tefs_direction="both",
-    tefs_use_contemporary_features="Yes",
-    tefs_max_lag_features="1",
+    pcmci_max_lag="1",
+    tefs_direction="forward",
+    tefs_use_contemporary_features=True,
+    tefs_max_lag_features="2",
     tefs_max_lag_target="1",
 )
 

From d715d137293747da5e0d33afa858bac7b5e0088d Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 15:05:05 +0200
Subject: [PATCH 5/8] Change test defaults

---
 tests/test_causal_analysis.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_causal_analysis.py b/tests/test_causal_analysis.py
index a9a0ae2..64948a4 100644
--- a/tests/test_causal_analysis.py
+++ b/tests/test_causal_analysis.py
@@ -34,8 +34,8 @@ def test_causal_analysis():
     df_test = pd.read_csv("hawk/demo/Ticino_test.csv", header=0)
     target_column_name = "target"
     pcmci_test_choice = "ParCorr"
-    pcmci_max_lag = 0
-    tefs_direction = "forward"
+    pcmci_max_lag = 2
+    tefs_direction = "both"
     tefs_use_contemporary_features = True
     tefs_max_lag_features = 1
     tefs_max_lag_target = 1

From 207e586840296eda79ae971cff11a95dea2ddd02 Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 15:05:52 +0200
Subject: [PATCH 6/8] Fix typo and default

---
 hawk/processes/wps_causal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hawk/processes/wps_causal.py b/hawk/processes/wps_causal.py
index 35f5cc5..70b733d 100644
--- a/hawk/processes/wps_causal.py
+++ b/hawk/processes/wps_causal.py
@@ -79,8 +79,8 @@ def __init__(self):
                 "tefs_use_contemporary_features",
                 "TEFS Use Contemporary Features",
                 data_type="boolean",
-                abstract="Choose whether to use comtemporary features in the TEFS algorithm.",
-                default="Yes",
+                abstract="Choose whether to use contemporary features in the TEFS algorithm.",
+                default=True,
             ),
             LiteralInput(
                 "tefs_max_lag_features",

From 9930bbec99ea71b62767d5be864ff87f4ee67f16 Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 15:06:04 +0200
Subject: [PATCH 7/8] Add defaults

---
 hawk/processes/wps_causal.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/hawk/processes/wps_causal.py b/hawk/processes/wps_causal.py
index 70b733d..4f549ae 100644
--- a/hawk/processes/wps_causal.py
+++ b/hawk/processes/wps_causal.py
@@ -39,12 +39,14 @@ def __init__(self):
                 "Target Column Name",
                 data_type="string",
                 abstract="Please enter the case-specific name of the target variable in the dataframe.",
+                default="target",
             ),
             LiteralInput(
                 "pcmci_test_choice",
                 "PCMCI Test Choice",
                 data_type="string",
                 abstract="Choose the independence test to be used in PCMCI.",
+                default="ParCorr",
                 allowed_values=[
                     "ParCorr",
                     "CMIknn",
@@ -55,6 +57,7 @@ def __init__(self):
                 "PCMCI Max Lag",
                 data_type="string",
                 abstract="Choose the maximum lag to test used in PCMCI.",
+                default="1",
                 allowed_values=[
                     "0",
                     "1",
@@ -69,6 +72,7 @@ def __init__(self):
                 "TEFS Direction",
                 data_type="string",
                 abstract="Choose the direction of the TEFS algorithm.",
+                default="both",
                 allowed_values=[
                     "forward",
                     "backward",
@@ -87,6 +91,7 @@ def __init__(self):
                 "TEFS Max Lag Features",
                 data_type="string",
                 abstract="Choose the maximum lag of the features in the TEFS algorithm.",
+                default="1",
                 allowed_values=[
                     "no_lag",
                     "1",
@@ -101,6 +106,7 @@ def __init__(self):
                 "TEFS Max Lag Target",
                 data_type="string",
                 abstract="Choose the maximum lag of the target in the TEFS algorithm.",
+                default="1",
                 allowed_values=[
                     "1",
                     "2",

From 81599c3d38369d2905d694bf94ad574c24591477 Mon Sep 17 00:00:00 2001
From: Teo Bucci <teobucci8@gmail.com>
Date: Sat, 13 Apr 2024 15:08:17 +0200
Subject: [PATCH 8/8] Specify tefs version to 0.3.1

---
 environment.yml  | 2 +-
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index d34d498..a78fab3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,7 +11,7 @@ dependencies:
 - psutil
 - birdy
 #- tigramite
-#- tefs
+#- tefs==0.3.1
 - pandas
 - scikit-learn
 - numpy
diff --git a/requirements.txt b/requirements.txt
index f89c934..2bcfac0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ psutil
 pywps>=4.5.1,<4.6
 birdhouse-birdy
 tigramite>=5.2.5.1
-tefs
+tefs==0.3.1
 pandas
 scikit-learn
 numpy
\ No newline at end of file