From d53dcdbe16f0a8428b8375c42fa71a010749243c Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Tue, 12 Mar 2024 17:44:05 +0000
Subject: [PATCH 1/4] First attempt - need to expand tests

---
 swmmanywhere/metric_utilities.py | 91 +++++++++++++++++++++++++++-----
 tests/test_metric_utilities.py   | 69 +++++++++++++++++++++++-
 2 files changed, 145 insertions(+), 15 deletions(-)

diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py
index 289e51db..3d02cd17 100644
--- a/swmmanywhere/metric_utilities.py
+++ b/swmmanywhere/metric_utilities.py
@@ -136,6 +136,32 @@ def nse(y: np.ndarray,
     """Calculate Nash-Sutcliffe efficiency (NSE)."""
     return 1 - np.sum((y - yhat)**2) / np.sum((y - np.mean(y))**2)
 
+def nodes_to_subs(G: nx.Graph,
+                  subs: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
+    """Nodes to subcatchments.
+
+    Classify the nodes of the graph to the subcatchments of the subs dataframe.
+
+    Args:
+        G (nx.Graph): The graph.
+        subs (gpd.GeoDataFrame): The subcatchments.
+
+    Returns:
+        gpd.GeoDataFrame: A dataframe from the nodes and data, and the 
+            subcatchment information, distinguished by the column 'sub_id'.
+    """
+    nodes_df = pd.DataFrame([{**{'id' :x}, **d} for x,d in G.nodes(data=True)])
+    nodes_joined = (
+        gpd.GeoDataFrame(nodes_df, 
+                        geometry=gpd.points_from_xy(nodes_df.x, 
+                                                    nodes_df.y),
+                        crs = G.graph['crs'])
+        .sjoin(subs.rename(columns = {'id' : 'sub_id'}), 
+               how="inner", 
+               predicate="within")
+    )
+    return nodes_joined
+
 def best_outlet_match(synthetic_G: nx.Graph,
                       real_subs: gpd.GeoDataFrame) -> tuple[nx.Graph,int]:
     """Best outlet match.
@@ -152,19 +178,8 @@ def best_outlet_match(synthetic_G: nx.Graph,
             most nodes within the real_subs.
         int: The id of the outlet.
     """
-    # Identify which nodes fall within real_subs
-    nodes_df = pd.DataFrame([d for x,d in synthetic_G.nodes(data=True)],
-                             index = synthetic_G.nodes)
-    nodes_joined = (
-        gpd.GeoDataFrame(nodes_df, 
-                         geometry=gpd.points_from_xy(nodes_df.x, 
-                                                    nodes_df.y),
-                         crs = synthetic_G.graph['crs'])
-        .sjoin(real_subs, 
-               how="right", 
-               predicate="within")
-    )
-
+    nodes_joined = nodes_to_subs(synthetic_G, real_subs)
+    
     # Select the most common outlet
     outlet = nodes_joined.outlet.value_counts().idxmax()
 
@@ -404,4 +419,52 @@ def outlet_nse_flooding(synthetic_G: nx.Graph,
                          real_results, 
                          'flooding', 
                          list(sg_syn.nodes),
-                         list(sg_real.nodes))
\ No newline at end of file
+                         list(sg_real.nodes))
+
+@metrics.register
+def subcatchment_nse_flooding(synthetic_G: nx.Graph,
+                            real_G: nx.Graph,
+                            synthetic_results: pd.DataFrame,
+                            real_results: pd.DataFrame,
+                            real_subs: gpd.GeoDataFrame,
+                            **kwargs) -> float:
+    """Subcatchment NSE flooding.
+    
+    Classify synthetic nodes to real subcatchments and calculate the NSE of
+    flooding over time for each subcatchment. The metric produced is the median
+    NSE across all subcatchments.
+    """
+    synthetic_joined = nodes_to_subs(synthetic_G, real_subs)
+    real_joined = nodes_to_subs(real_G, real_subs)
+
+    # Extract data
+    real_results = extract_var(real_results, 'flooding')
+    synthetic_results = extract_var(synthetic_results, 'flooding')
+
+    # Align data
+    synthetic_results = pd.merge(synthetic_results,
+                                 synthetic_joined[['id','sub_id']],
+                                 left_on='object',
+                                 right_on = 'id')
+    real_results = pd.merge(real_results,
+                            real_joined[['id','sub_id']],
+                            left_on='object',
+                            right_on = 'id')
+    
+    results = pd.merge(real_results[['date','sub_id','value']],
+                            synthetic_results[['date','sub_id','value']],
+                            on = ['date','sub_id'],
+                            suffixes = ('_real', '_sim')
+                            )
+    
+    val = (
+        results
+        .groupby(['date','sub_id'])
+        .sum()
+        .reset_index()
+        .groupby('sub_id')
+        .apply(lambda x: nse(x.value_real, x.value_sim))
+        .median()
+    )
+    return val
+    
diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py
index 5918f06c..4d330ce6 100644
--- a/tests/test_metric_utilities.py
+++ b/tests/test_metric_utilities.py
@@ -295,4 +295,71 @@ def test_netcomp():
 
         G_ = load_graph(Path(__file__).parent / 'test_data' / 'street_graph.json')
         val_ = getattr(mu.metrics, func)(synthetic_G = G_, real_G = G)
-        assert np.isclose(val, val_), func
\ No newline at end of file
+        assert np.isclose(val, val_), func
+
+def test_subcatchment_nse_flooding():
+    """Test the outlet_nse_flow metric."""
+    # Load data
+    G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json')
+    subs = get_subs()
+
+    # Mock results
+    results = pd.DataFrame([{'object' : 4253560,
+                             'variable' : 'flow',
+                             'value' : 10,
+                             'date' : pd.to_datetime('2021-01-01 00:00:00')},
+                             {'object' : 4253560,
+                             'variable' : 'flow',
+                             'value' : 5,
+                             'date' : pd.to_datetime('2021-01-01 00:00:05')},
+                             {'object' : 1696030874,
+                             'variable' : 'flooding',
+                             'value' : 4.5,
+                             'date' : pd.to_datetime('2021-01-01 00:00:00')},
+                            {'object' : 770549936,
+                             'variable' : 'flooding',
+                             'value' : 5,
+                             'date' : pd.to_datetime('2021-01-01 00:00:00')},
+                            {'object' : 107736,
+                             'variable' : 'flooding',
+                             'value' : 10,
+                             'date' : pd.to_datetime('2021-01-01 00:00:00')},
+                             {'object' : 1696030874,
+                             'variable' : 'flooding',
+                             'value' : 0,
+                             'date' : pd.to_datetime('2021-01-01 00:00:05')},
+                            {'object' : 770549936,
+                             'variable' : 'flooding',
+                             'value' : 5,
+                             'date' : pd.to_datetime('2021-01-01 00:00:05')},
+                            {'object' : 107736,
+                             'variable' : 'flooding',
+                             'value' : 15,
+                             'date' : pd.to_datetime('2021-01-01 00:00:05')}])
+    
+    # Calculate NSE (perfect results)
+    val = mu.metrics.subcatchment_nse_flooding(synthetic_G = G,
+                                               real_G = G,
+                                                synthetic_results = results,
+                                                real_results = results,
+                                                real_subs = subs)
+    assert val == 1.0
+
+    # Calculate NSE (remapped node)
+
+    G_ = G.copy()
+    # Create a mapping from the old name to the new name
+    mapping = {1696030874: 'new_name'}
+
+    # Rename the node
+    G_ = nx.relabel_nodes(G_, mapping)
+
+    results_ = results.copy()
+    results_.object = results_.object.replace(mapping)
+
+    val = mu.metrics.subcatchment_nse_flooding(synthetic_G = G_,
+                                    synthetic_results = results_,
+                                    real_G = G,
+                                    real_results = results,
+                                    real_subs = subs)
+    assert val == 1.0
\ No newline at end of file

From 86ca65801d03e177dd53405af3f9c7efb268724b Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Wed, 13 Mar 2024 11:59:24 +0000
Subject: [PATCH 2/4] Update subcatchment_nse_flooding

- Update test
- Functionalise
---
 swmmanywhere/metric_utilities.py | 103 +++++++++++++++++++++----------
 tests/test_metric_utilities.py   |  21 ++++++-
 2 files changed, 89 insertions(+), 35 deletions(-)

diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py
index 3d02cd17..4fa70949 100644
--- a/swmmanywhere/metric_utilities.py
+++ b/swmmanywhere/metric_utilities.py
@@ -136,6 +136,33 @@ def nse(y: np.ndarray,
     """Calculate Nash-Sutcliffe efficiency (NSE)."""
     return 1 - np.sum((y - yhat)**2) / np.sum((y - np.mean(y))**2)
 
+def median_nse_by_group(results: pd.DataFrame,
+                        gb_key: str) -> float:
+    """Median NSE by group.
+
+    Calculate the median Nash-Sutcliffe efficiency (NSE) of a variable over time
+    for each group in the results dataframe, and return the median of these
+    values.
+
+    Args:
+        results (pd.DataFrame): The results dataframe.
+        gb_key (str): The column to group by.
+
+    Returns:
+        float: The median NSE.
+    """
+    val = (
+        results
+        .groupby(['date',gb_key])
+        .sum()
+        .reset_index()
+        .groupby(gb_key)
+        .apply(lambda x: nse(x.value_real, x.value_sim))
+        .median()
+    ) 
+    return val
+
+
 def nodes_to_subs(G: nx.Graph,
                   subs: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     """Nodes to subcatchments.
@@ -250,6 +277,40 @@ def edge_betweenness_centrality(G: nx.Graph,
             bt_c[n] += v
     return bt_c
 
+def align_by_subcatchment(var,
+                          synthetic_results: pd.DataFrame,
+                          real_results: pd.DataFrame,
+                          real_subs: gpd.GeoDataFrame,
+                          synthetic_G: nx.Graph,
+                          real_G: nx.Graph) -> pd.DataFrame:
+    """Align by subcatchment.
+
+    Align synthetic and real results by subcatchment and return the results.
+    """
+    synthetic_joined = nodes_to_subs(synthetic_G, real_subs)
+    real_joined = nodes_to_subs(real_G, real_subs)
+
+    # Extract data
+    real_results = extract_var(real_results, var)
+    synthetic_results = extract_var(synthetic_results, var)
+
+    # Align data
+    synthetic_results = pd.merge(synthetic_results,
+                                 synthetic_joined[['id','sub_id']],
+                                 left_on='object',
+                                 right_on = 'id')
+    real_results = pd.merge(real_results,
+                            real_joined[['id','sub_id']],
+                            left_on='object',
+                            right_on = 'id')
+    
+    results = pd.merge(real_results[['date','sub_id','value']],
+                            synthetic_results[['date','sub_id','value']],
+                            on = ['date','sub_id'],
+                            suffixes = ('_real', '_sim')
+                            )
+    return results
+
 @metrics.register
 def nc_deltacon0(synthetic_G: nx.Graph,
                   real_G: nx.Graph,
@@ -421,6 +482,8 @@ def outlet_nse_flooding(synthetic_G: nx.Graph,
                          list(sg_syn.nodes),
                          list(sg_real.nodes))
 
+
+
 @metrics.register
 def subcatchment_nse_flooding(synthetic_G: nx.Graph,
                             real_G: nx.Graph,
@@ -434,37 +497,11 @@ def subcatchment_nse_flooding(synthetic_G: nx.Graph,
     flooding over time for each subcatchment. The metric produced is the median
     NSE across all subcatchments.
     """
-    synthetic_joined = nodes_to_subs(synthetic_G, real_subs)
-    real_joined = nodes_to_subs(real_G, real_subs)
-
-    # Extract data
-    real_results = extract_var(real_results, 'flooding')
-    synthetic_results = extract_var(synthetic_results, 'flooding')
-
-    # Align data
-    synthetic_results = pd.merge(synthetic_results,
-                                 synthetic_joined[['id','sub_id']],
-                                 left_on='object',
-                                 right_on = 'id')
-    real_results = pd.merge(real_results,
-                            real_joined[['id','sub_id']],
-                            left_on='object',
-                            right_on = 'id')
-    
-    results = pd.merge(real_results[['date','sub_id','value']],
-                            synthetic_results[['date','sub_id','value']],
-                            on = ['date','sub_id'],
-                            suffixes = ('_real', '_sim')
-                            )
-    
-    val = (
-        results
-        .groupby(['date','sub_id'])
-        .sum()
-        .reset_index()
-        .groupby('sub_id')
-        .apply(lambda x: nse(x.value_real, x.value_sim))
-        .median()
-    )
-    return val
+    results = align_by_subcatchment('flooding',
+                                    synthetic_results = synthetic_results,
+                                    real_results = real_results,
+                                    real_subs = real_subs,
+                                    synthetic_G = synthetic_G,
+                                    real_G = real_G)
     
+    return median_nse_by_group(results, 'sub_id')
\ No newline at end of file
diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py
index 4d330ce6..60350e0f 100644
--- a/tests/test_metric_utilities.py
+++ b/tests/test_metric_utilities.py
@@ -324,7 +324,15 @@ def test_subcatchment_nse_flooding():
                              'variable' : 'flooding',
                              'value' : 10,
                              'date' : pd.to_datetime('2021-01-01 00:00:00')},
-                             {'object' : 1696030874,
+                            {'object' : 107733,
+                             'variable' : 'flooding',
+                             'value' : 1,
+                             'date' : pd.to_datetime('2021-01-01 00:00:00')},
+                            {'object' : 107737,
+                             'variable' : 'flooding',
+                             'value' : 2,
+                             'date' : pd.to_datetime('2021-01-01 00:00:00')},
+                            {'object' : 1696030874,
                              'variable' : 'flooding',
                              'value' : 0,
                              'date' : pd.to_datetime('2021-01-01 00:00:05')},
@@ -335,6 +343,14 @@ def test_subcatchment_nse_flooding():
                             {'object' : 107736,
                              'variable' : 'flooding',
                              'value' : 15,
+                             'date' : pd.to_datetime('2021-01-01 00:00:05')},
+                            {'object' : 107733,
+                             'variable' : 'flooding',
+                             'value' : 2,
+                             'date' : pd.to_datetime('2021-01-01 00:00:05')},
+                            {'object' : 107737,
+                             'variable' : 'flooding',
+                             'value' : 2,
                              'date' : pd.to_datetime('2021-01-01 00:00:05')}])
     
     # Calculate NSE (perfect results)
@@ -349,7 +365,8 @@ def test_subcatchment_nse_flooding():
 
     G_ = G.copy()
     # Create a mapping from the old name to the new name
-    mapping = {1696030874: 'new_name'}
+    mapping = {1696030874: 'new_name',
+               107737 : 'new_name2'}
 
     # Rename the node
     G_ = nx.relabel_nodes(G_, mapping)

From e7b30417ede5c4066488d6e784bb45c16c13ce7e Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Mon, 18 Mar 2024 09:21:56 +0000
Subject: [PATCH 3/4] Update metric_utilities.py

---
 swmmanywhere/metric_utilities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py
index 4fa70949..10ea285d 100644
--- a/swmmanywhere/metric_utilities.py
+++ b/swmmanywhere/metric_utilities.py
@@ -177,7 +177,7 @@ def nodes_to_subs(G: nx.Graph,
         gpd.GeoDataFrame: A dataframe from the nodes and data, and the 
             subcatchment information, distinguished by the column 'sub_id'.
     """
-    nodes_df = pd.DataFrame([{**{'id' :x}, **d} for x,d in G.nodes(data=True)])
+    nodes_df = pd.DataFrame([{'id' :x, **d} for x,d in G.nodes(data=True)])
     nodes_joined = (
         gpd.GeoDataFrame(nodes_df, 
                         geometry=gpd.points_from_xy(nodes_df.x, 

From ef5454192af0bfe3b12c5767007f242263d1b635 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 09:26:37 +0000
Subject: [PATCH 4/4] Bump certifi from 2023.11.17 to 2024.2.2

Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.11.17 to 2024.2.2.
- [Commits](https://github.com/certifi/python-certifi/compare/2023.11.17...2024.02.02)

---
updated-dependencies:
- dependency-name: certifi
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 dev-requirements.txt | 2 +-
 requirements.txt     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 852b7e5a..3f098c98 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -25,7 +25,7 @@ build==1.0.3
     # via pip-tools
 cdsapi==0.6.1
     # via swmmanywhere (pyproject.toml)
-certifi==2023.11.17
+certifi==2024.2.2
     # via
     #   fiona
     #   netcdf4
diff --git a/requirements.txt b/requirements.txt
index 32ffe117..769bf687 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,7 +22,7 @@ attrs==23.2.0
     #   referencing
 cdsapi==0.6.1
     # via swmmanywhere (pyproject.toml)
-certifi==2023.11.17
+certifi==2024.2.2
     # via
     #   fiona
     #   netcdf4