From f4b9dba7008abbb5f932d5ef59261cf114cf251c Mon Sep 17 00:00:00 2001 From: Dobson Date: Wed, 28 Feb 2024 17:34:54 +0000 Subject: [PATCH 1/3] Add example tmeplate for metric implementation --- swmmanywhere/metric_utilities.py | 107 +++++++++++++++++++++++++++++++ tests/test_metric_utilities.py | 49 ++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 swmmanywhere/metric_utilities.py create mode 100644 tests/test_metric_utilities.py diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py new file mode 100644 index 00000000..a01c4842 --- /dev/null +++ b/swmmanywhere/metric_utilities.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +"""Created 2023-12-20. + +@author: Barnaby Dobson +""" +from abc import ABC, abstractmethod +from typing import Callable + +import geopandas as gpd +import networkx as nx +import numpy as np +import pandas as pd +from scipy import stats + + +class BaseMetric(ABC): + """Base metric class.""" + @abstractmethod + def __call__(self, + *args, + **kwargs) -> float: + """Run the evaluated metric.""" + return 0 + +class MetricRegistry(dict): + """Registry object.""" + + def register(self, cls): + """Register a metric.""" + if cls.__name__ in self: + raise ValueError(f"{cls.__name__} already in the metric registry!") + + self[cls.__name__] = cls() + return cls + + def __getattr__(self, name): + """Get a metric from the graphfcn dict.""" + try: + return self[name] + except KeyError: + raise AttributeError(f"{name} NOT in the metric registry!") + + +metrics = MetricRegistry() + +def register_metric(cls) -> Callable: + """Register a metric. + + Args: + cls (Callable): A class that inherits from BaseMetric + + Returns: + cls (Callable): The same class + """ + metrics.register(cls) + return cls + +def extract_var(df: pd.DataFrame, + var: str) -> pd.DataFrame: + """Extract var from a dataframe.""" + df_ = df.loc[df.variable == var] + df_['duration'] = (df_.date - \ + df_.date.min()).dt.total_seconds() + return df_ + +@register_metric +class bias_flood_depth(BaseMetric): + """Bias flood depth.""" + def __call__(self, + synthetic_results: pd.DataFrame, + real_results: pd.DataFrame, + synthetic_subs: gpd.GeoDataFrame, + real_subs: gpd.GeoDataFrame, + *args, + **kwargs) -> float: + """Run the evaluated metric.""" + + def _f(x): + return np.trapz(x.value,x.duration) + + syn_flooding = extract_var(synthetic_results, + 'flooding').groupby('object').apply(_f) + syn_area = synthetic_subs.impervious_area.sum() + syn_tot = syn_flooding.sum() / syn_area + + real_flooding = extract_var(real_results, + 'flooding').groupby('object').apply(_f) + real_area = real_subs.impervious_area.sum() + real_tot = real_flooding.sum() / real_area + + return (syn_tot - real_tot) / real_tot + +@register_metric +class kstest_betweenness(BaseMetric): + """KS two sided of betweenness distribution.""" + def __call__(self, + synthetic_G: nx.Graph, + real_G: nx.Graph, + *args, + **kwargs) -> float: + """Run the evaluated metric.""" + syn_betweenness = nx.betweenness_centrality(synthetic_G) + real_betweenness = nx.betweenness_centrality(real_G) + + #TODO does it make more sense to use statistic or pvalue? + return stats.ks_2samp(list(syn_betweenness.values()), + list(real_betweenness.values())).statistic \ No newline at end of file diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py new file mode 100644 index 00000000..5cb599ab --- /dev/null +++ b/tests/test_metric_utilities.py @@ -0,0 +1,49 @@ +from pathlib import Path + +import pandas as pd + +from swmmanywhere.graph_utilities import load_graph +from swmmanywhere.metric_utilities import metrics as sm + + +def test_bias_flood_depth(): + """Test the bias_flood_depth metric.""" + # Create synthetic and real data + synthetic_results = pd.DataFrame({ + 'object': ['obj1', 'obj1','obj2','obj2'], + 'value': [10, 20, 5, 2], + 'variable': 'flooding', + 'date' : pd.to_datetime(['2021-01-01 00:00:00','2021-01-01 00:05:00', + '2021-01-01 00:00:00','2021-01-01 00:05:00']) + }) + real_results = pd.DataFrame({ + 'object': ['obj1', 'obj1','obj2','obj2'], + 'value': [15, 25, 10, 20], + 'variable': 'flooding', + 'date' : pd.to_datetime(['2021-01-01 00:00:00','2021-01-01 00:05:00', + '2021-01-01 00:00:00','2021-01-01 00:05:00']) + }) + synthetic_subs = pd.DataFrame({ + 'impervious_area': [100, 200], + }) + real_subs = pd.DataFrame({ + 'impervious_area': [150, 250], + }) + + # Run the metric + val = sm.bias_flood_depth(synthetic_results = synthetic_results, + real_results = real_results, + synthetic_subs = synthetic_subs, + real_subs = real_subs) + assert val == -0.29523809523809524 + +def test_kstest_betweenness(): + """Test the kstest_betweenness metric.""" + G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') + val = sm.kstest_betweenness(synthetic_G = G, real_G = G) + assert val == 0.0 + + G_ = G.copy() + G_.remove_node(list(G.nodes)[0]) + val = sm.kstest_betweenness(synthetic_G = G_, real_G = G) + assert val == 0.286231884057971 \ No newline at end of file From 9a1e235d19651d1cc1a119a4ab54f328f0a71bdd Mon Sep 17 00:00:00 2001 From: Dobson Date: Tue, 5 Mar 2024 14:32:09 +0000 Subject: [PATCH 2/3] Update metric_utilities.py --- swmmanywhere/metric_utilities.py | 66 ++++++++++++++------------------ 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py index a01c4842..e2f9498d 100644 --- a/swmmanywhere/metric_utilities.py +++ b/swmmanywhere/metric_utilities.py @@ -3,7 +3,7 @@ @author: Barnaby Dobson """ -from abc import ABC, abstractmethod +from inspect import signature from typing import Callable import geopandas as gpd @@ -13,25 +13,33 @@ from scipy import stats -class BaseMetric(ABC): - """Base metric class.""" - @abstractmethod - def __call__(self, - *args, - **kwargs) -> float: - """Run the evaluated metric.""" - return 0 - class MetricRegistry(dict): """Registry object.""" - def register(self, cls): + def register(self, func: Callable) -> Callable: """Register a metric.""" - if cls.__name__ in self: - raise ValueError(f"{cls.__name__} already in the metric registry!") - - self[cls.__name__] = cls() - return cls + if func.__name__ in self: + raise ValueError(f"{func.__name__} already in the metric registry!") + + allowable_params = {"synthetic_results": pd.DataFrame, + "real_results": pd.DataFrame, + "synthetic_subs": gpd.GeoDataFrame, + "real_subs": gpd.GeoDataFrame, + "synthetic_G": nx.Graph, + "real_G": nx.Graph} + + sig = signature(func) + for param, obj in sig.parameters.items(): + if param == 'kwargs': + continue + if param not in allowable_params.keys(): + raise ValueError(f"{param} of {func.__name__} not allowed.") + if obj.annotation != allowable_params[param]: + raise ValueError(f"""{param} of {func.__name__} should be of + type {allowable_params[param]}, not + {obj.__class__}.""") + self[func.__name__] = func + return func def __getattr__(self, name): """Get a metric from the graphfcn dict.""" @@ -43,18 +51,6 @@ def __getattr__(self, name): metrics = MetricRegistry() -def register_metric(cls) -> Callable: - """Register a metric. - - Args: - cls (Callable): A class that inherits from BaseMetric - - Returns: - cls (Callable): The same class - """ - metrics.register(cls) - return cls - def extract_var(df: pd.DataFrame, var: str) -> pd.DataFrame: """Extract var from a dataframe.""" @@ -63,15 +59,12 @@ def extract_var(df: pd.DataFrame, df_.date.min()).dt.total_seconds() return df_ -@register_metric -class bias_flood_depth(BaseMetric): - """Bias flood depth.""" - def __call__(self, +@metrics.register +def bias_flood_depth( synthetic_results: pd.DataFrame, real_results: pd.DataFrame, synthetic_subs: gpd.GeoDataFrame, real_subs: gpd.GeoDataFrame, - *args, **kwargs) -> float: """Run the evaluated metric.""" @@ -90,13 +83,10 @@ def _f(x): return (syn_tot - real_tot) / real_tot -@register_metric -class kstest_betweenness(BaseMetric): - """KS two sided of betweenness distribution.""" - def __call__(self, +@metrics.register +def kstest_betweenness( synthetic_G: nx.Graph, real_G: nx.Graph, - *args, **kwargs) -> float: """Run the evaluated metric.""" syn_betweenness = nx.betweenness_centrality(synthetic_G) From c79fd6b607944ea62279860e06c9e74d7737a252 Mon Sep 17 00:00:00 2001 From: Dobson Date: Wed, 6 Mar 2024 11:22:52 +0000 Subject: [PATCH 3/3] Update test_metric_utilities.py --- tests/test_metric_utilities.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py index 5cb599ab..a4739029 100644 --- a/tests/test_metric_utilities.py +++ b/tests/test_metric_utilities.py @@ -1,5 +1,6 @@ from pathlib import Path +import numpy as np import pandas as pd from swmmanywhere.graph_utilities import load_graph @@ -35,7 +36,7 @@ def test_bias_flood_depth(): real_results = real_results, synthetic_subs = synthetic_subs, real_subs = real_subs) - assert val == -0.29523809523809524 + assert np.isclose(val, -0.29523809523809524) def test_kstest_betweenness(): """Test the kstest_betweenness metric.""" @@ -46,4 +47,4 @@ def test_kstest_betweenness(): G_ = G.copy() G_.remove_node(list(G.nodes)[0]) val = sm.kstest_betweenness(synthetic_G = G_, real_G = G) - assert val == 0.286231884057971 \ No newline at end of file + assert np.isclose(val, 0.286231884057971) \ No newline at end of file