From f4b9dba7008abbb5f932d5ef59261cf114cf251c Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Wed, 28 Feb 2024 17:34:54 +0000
Subject: [PATCH 1/3] Add example tmeplate for metric implementation

---
 swmmanywhere/metric_utilities.py | 107 +++++++++++++++++++++++++++++++
 tests/test_metric_utilities.py   |  49 ++++++++++++++
 2 files changed, 156 insertions(+)
 create mode 100644 swmmanywhere/metric_utilities.py
 create mode 100644 tests/test_metric_utilities.py

diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py
new file mode 100644
index 00000000..a01c4842
--- /dev/null
+++ b/swmmanywhere/metric_utilities.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+"""Created 2023-12-20.
+
+@author: Barnaby Dobson
+"""
+from abc import ABC, abstractmethod
+from typing import Callable
+
+import geopandas as gpd
+import networkx as nx
+import numpy as np
+import pandas as pd
+from scipy import stats
+
+
+class BaseMetric(ABC):
+    """Base metric class."""
+    @abstractmethod
+    def __call__(self, 
+                 *args,
+                 **kwargs) -> float:
+        """Run the evaluated metric."""
+        return 0
+
+class MetricRegistry(dict): 
+    """Registry object.""" 
+    
+    def register(self, cls):
+        """Register a metric."""
+        if cls.__name__ in self:
+            raise ValueError(f"{cls.__name__} already in the metric registry!")
+
+        self[cls.__name__] = cls()
+        return cls
+
+    def __getattr__(self, name):
+        """Get a metric from the graphfcn dict."""
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(f"{name} NOT in the metric registry!")
+        
+
+metrics = MetricRegistry()
+
+def register_metric(cls) -> Callable:
+    """Register a metric.
+
+    Args:
+        cls (Callable): A class that inherits from BaseMetric
+
+    Returns:
+        cls (Callable): The same class
+    """
+    metrics.register(cls)
+    return cls
+
+def extract_var(df: pd.DataFrame,
+                     var: str) -> pd.DataFrame:
+    """Extract var from a dataframe."""
+    df_ = df.loc[df.variable == var]
+    df_['duration'] = (df_.date - \
+                        df_.date.min()).dt.total_seconds()
+    return df_
+
+@register_metric
+class bias_flood_depth(BaseMetric):
+    """Bias flood depth."""
+    def __call__(self, 
+                 synthetic_results: pd.DataFrame,
+                 real_results: pd.DataFrame,
+                 synthetic_subs: gpd.GeoDataFrame,
+                 real_subs: gpd.GeoDataFrame,
+                 *args,
+                 **kwargs) -> float:
+        """Run the evaluated metric."""
+        
+        def _f(x):
+            return np.trapz(x.value,x.duration)
+
+        syn_flooding = extract_var(synthetic_results,
+                                    'flooding').groupby('object').apply(_f)
+        syn_area = synthetic_subs.impervious_area.sum()
+        syn_tot = syn_flooding.sum() / syn_area
+
+        real_flooding = extract_var(real_results,
+                                    'flooding').groupby('object').apply(_f)
+        real_area = real_subs.impervious_area.sum()
+        real_tot = real_flooding.sum() / real_area
+
+        return (syn_tot - real_tot) / real_tot
+
+@register_metric
+class kstest_betweenness(BaseMetric):
+    """KS two sided of betweenness distribution."""
+    def __call__(self, 
+                 synthetic_G: nx.Graph,
+                 real_G: nx.Graph,
+                 *args,
+                 **kwargs) -> float:
+        """Run the evaluated metric."""
+        syn_betweenness = nx.betweenness_centrality(synthetic_G)
+        real_betweenness = nx.betweenness_centrality(real_G)
+
+        #TODO does it make more sense to use statistic or pvalue?
+        return stats.ks_2samp(list(syn_betweenness.values()),
+                              list(real_betweenness.values())).statistic
\ No newline at end of file
diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py
new file mode 100644
index 00000000..5cb599ab
--- /dev/null
+++ b/tests/test_metric_utilities.py
@@ -0,0 +1,49 @@
+from pathlib import Path
+
+import pandas as pd
+
+from swmmanywhere.graph_utilities import load_graph
+from swmmanywhere.metric_utilities import metrics as sm
+
+
+def test_bias_flood_depth():
+    """Test the bias_flood_depth metric."""
+    # Create synthetic and real data
+    synthetic_results = pd.DataFrame({
+        'object': ['obj1', 'obj1','obj2','obj2'],
+        'value': [10, 20, 5, 2],
+        'variable': 'flooding',
+        'date' : pd.to_datetime(['2021-01-01 00:00:00','2021-01-01 00:05:00',
+                                 '2021-01-01 00:00:00','2021-01-01 00:05:00'])
+    })
+    real_results = pd.DataFrame({
+        'object': ['obj1', 'obj1','obj2','obj2'],
+        'value': [15, 25, 10, 20],
+        'variable': 'flooding',
+        'date' : pd.to_datetime(['2021-01-01 00:00:00','2021-01-01 00:05:00',
+                                 '2021-01-01 00:00:00','2021-01-01 00:05:00'])
+    })
+    synthetic_subs = pd.DataFrame({
+        'impervious_area': [100, 200],
+    })
+    real_subs = pd.DataFrame({
+        'impervious_area': [150, 250],
+    })
+
+    # Run the metric
+    val = sm.bias_flood_depth(synthetic_results = synthetic_results, 
+                              real_results = real_results,
+                              synthetic_subs = synthetic_subs,
+                              real_subs = real_subs)
+    assert val == -0.29523809523809524
+
+def test_kstest_betweenness():
+    """Test the kstest_betweenness metric."""
+    G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json')
+    val = sm.kstest_betweenness(synthetic_G = G, real_G = G)
+    assert val == 0.0
+
+    G_ = G.copy()
+    G_.remove_node(list(G.nodes)[0])
+    val = sm.kstest_betweenness(synthetic_G = G_, real_G = G)
+    assert val == 0.286231884057971
\ No newline at end of file

From 9a1e235d19651d1cc1a119a4ab54f328f0a71bdd Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Tue, 5 Mar 2024 14:32:09 +0000
Subject: [PATCH 2/3] Update metric_utilities.py

---
 swmmanywhere/metric_utilities.py | 66 ++++++++++++++------------------
 1 file changed, 28 insertions(+), 38 deletions(-)

diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py
index a01c4842..e2f9498d 100644
--- a/swmmanywhere/metric_utilities.py
+++ b/swmmanywhere/metric_utilities.py
@@ -3,7 +3,7 @@
 
 @author: Barnaby Dobson
 """
-from abc import ABC, abstractmethod
+from inspect import signature
 from typing import Callable
 
 import geopandas as gpd
@@ -13,25 +13,33 @@
 from scipy import stats
 
 
-class BaseMetric(ABC):
-    """Base metric class."""
-    @abstractmethod
-    def __call__(self, 
-                 *args,
-                 **kwargs) -> float:
-        """Run the evaluated metric."""
-        return 0
-
 class MetricRegistry(dict): 
     """Registry object.""" 
     
-    def register(self, cls):
+    def register(self, func: Callable) -> Callable:
         """Register a metric."""
-        if cls.__name__ in self:
-            raise ValueError(f"{cls.__name__} already in the metric registry!")
-
-        self[cls.__name__] = cls()
-        return cls
+        if func.__name__ in self:
+            raise ValueError(f"{func.__name__} already in the metric registry!")
+
+        allowable_params = {"synthetic_results": pd.DataFrame,
+                            "real_results": pd.DataFrame,
+                            "synthetic_subs": gpd.GeoDataFrame,
+                            "real_subs": gpd.GeoDataFrame,
+                            "synthetic_G": nx.Graph,
+                            "real_G": nx.Graph}
+
+        sig = signature(func)
+        for param, obj in sig.parameters.items():
+            if param == 'kwargs':
+                continue
+            if param not in allowable_params.keys():
+                raise ValueError(f"{param} of {func.__name__} not allowed.")
+            if obj.annotation != allowable_params[param]:
+                raise ValueError(f"""{param} of {func.__name__} should be of
+                                 type {allowable_params[param]}, not 
+                                 {obj.__class__}.""")
+        self[func.__name__] = func
+        return func
 
     def __getattr__(self, name):
         """Get a metric from the graphfcn dict."""
@@ -43,18 +51,6 @@ def __getattr__(self, name):
 
 metrics = MetricRegistry()
 
-def register_metric(cls) -> Callable:
-    """Register a metric.
-
-    Args:
-        cls (Callable): A class that inherits from BaseMetric
-
-    Returns:
-        cls (Callable): The same class
-    """
-    metrics.register(cls)
-    return cls
-
 def extract_var(df: pd.DataFrame,
                      var: str) -> pd.DataFrame:
     """Extract var from a dataframe."""
@@ -63,15 +59,12 @@ def extract_var(df: pd.DataFrame,
                         df_.date.min()).dt.total_seconds()
     return df_
 
-@register_metric
-class bias_flood_depth(BaseMetric):
-    """Bias flood depth."""
-    def __call__(self, 
+@metrics.register
+def bias_flood_depth(
                  synthetic_results: pd.DataFrame,
                  real_results: pd.DataFrame,
                  synthetic_subs: gpd.GeoDataFrame,
                  real_subs: gpd.GeoDataFrame,
-                 *args,
                  **kwargs) -> float:
         """Run the evaluated metric."""
         
@@ -90,13 +83,10 @@ def _f(x):
 
         return (syn_tot - real_tot) / real_tot
 
-@register_metric
-class kstest_betweenness(BaseMetric):
-    """KS two sided of betweenness distribution."""
-    def __call__(self, 
+@metrics.register
+def kstest_betweenness( 
                  synthetic_G: nx.Graph,
                  real_G: nx.Graph,
-                 *args,
                  **kwargs) -> float:
         """Run the evaluated metric."""
         syn_betweenness = nx.betweenness_centrality(synthetic_G)

From c79fd6b607944ea62279860e06c9e74d7737a252 Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Wed, 6 Mar 2024 11:22:52 +0000
Subject: [PATCH 3/3] Update test_metric_utilities.py

---
 tests/test_metric_utilities.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py
index 5cb599ab..a4739029 100644
--- a/tests/test_metric_utilities.py
+++ b/tests/test_metric_utilities.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+import numpy as np
 import pandas as pd
 
 from swmmanywhere.graph_utilities import load_graph
@@ -35,7 +36,7 @@ def test_bias_flood_depth():
                               real_results = real_results,
                               synthetic_subs = synthetic_subs,
                               real_subs = real_subs)
-    assert val == -0.29523809523809524
+    assert np.isclose(val, -0.29523809523809524)
 
 def test_kstest_betweenness():
     """Test the kstest_betweenness metric."""
@@ -46,4 +47,4 @@ def test_kstest_betweenness():
     G_ = G.copy()
     G_.remove_node(list(G.nodes)[0])
     val = sm.kstest_betweenness(synthetic_G = G_, real_G = G)
-    assert val == 0.286231884057971
\ No newline at end of file
+    assert np.isclose(val, 0.286231884057971)
\ No newline at end of file