From d53dcdbe16f0a8428b8375c42fa71a010749243c Mon Sep 17 00:00:00 2001 From: Dobson Date: Tue, 12 Mar 2024 17:44:05 +0000 Subject: [PATCH 1/4] First attempt - need to expand tests --- swmmanywhere/metric_utilities.py | 91 +++++++++++++++++++++++++++----- tests/test_metric_utilities.py | 69 +++++++++++++++++++++++- 2 files changed, 145 insertions(+), 15 deletions(-) diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py index 289e51db..3d02cd17 100644 --- a/swmmanywhere/metric_utilities.py +++ b/swmmanywhere/metric_utilities.py @@ -136,6 +136,32 @@ def nse(y: np.ndarray, """Calculate Nash-Sutcliffe efficiency (NSE).""" return 1 - np.sum((y - yhat)**2) / np.sum((y - np.mean(y))**2) +def nodes_to_subs(G: nx.Graph, + subs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Nodes to subcatchments. + + Classify the nodes of the graph to the subcatchments of the subs dataframe. + + Args: + G (nx.Graph): The graph. + subs (gpd.GeoDataFrame): The subcatchments. + + Returns: + gpd.GeoDataFrame: A dataframe from the nodes and data, and the + subcatchment information, distinguished by the column 'sub_id'. + """ + nodes_df = pd.DataFrame([{**{'id' :x}, **d} for x,d in G.nodes(data=True)]) + nodes_joined = ( + gpd.GeoDataFrame(nodes_df, + geometry=gpd.points_from_xy(nodes_df.x, + nodes_df.y), + crs = G.graph['crs']) + .sjoin(subs.rename(columns = {'id' : 'sub_id'}), + how="inner", + predicate="within") + ) + return nodes_joined + def best_outlet_match(synthetic_G: nx.Graph, real_subs: gpd.GeoDataFrame) -> tuple[nx.Graph,int]: """Best outlet match. @@ -152,19 +178,8 @@ def best_outlet_match(synthetic_G: nx.Graph, most nodes within the real_subs. int: The id of the outlet. """ - # Identify which nodes fall within real_subs - nodes_df = pd.DataFrame([d for x,d in synthetic_G.nodes(data=True)], - index = synthetic_G.nodes) - nodes_joined = ( - gpd.GeoDataFrame(nodes_df, - geometry=gpd.points_from_xy(nodes_df.x, - nodes_df.y), - crs = synthetic_G.graph['crs']) - .sjoin(real_subs, - how="right", - predicate="within") - ) - + nodes_joined = nodes_to_subs(synthetic_G, real_subs) + # Select the most common outlet outlet = nodes_joined.outlet.value_counts().idxmax() @@ -404,4 +419,52 @@ def outlet_nse_flooding(synthetic_G: nx.Graph, real_results, 'flooding', list(sg_syn.nodes), - list(sg_real.nodes)) \ No newline at end of file + list(sg_real.nodes)) + +@metrics.register +def subcatchment_nse_flooding(synthetic_G: nx.Graph, + real_G: nx.Graph, + synthetic_results: pd.DataFrame, + real_results: pd.DataFrame, + real_subs: gpd.GeoDataFrame, + **kwargs) -> float: + """Subcatchment NSE flooding. + + Classify synthetic nodes to real subcatchments and calculate the NSE of + flooding over time for each subcatchment. The metric produced is the median + NSE across all subcatchments. + """ + synthetic_joined = nodes_to_subs(synthetic_G, real_subs) + real_joined = nodes_to_subs(real_G, real_subs) + + # Extract data + real_results = extract_var(real_results, 'flooding') + synthetic_results = extract_var(synthetic_results, 'flooding') + + # Align data + synthetic_results = pd.merge(synthetic_results, + synthetic_joined[['id','sub_id']], + left_on='object', + right_on = 'id') + real_results = pd.merge(real_results, + real_joined[['id','sub_id']], + left_on='object', + right_on = 'id') + + results = pd.merge(real_results[['date','sub_id','value']], + synthetic_results[['date','sub_id','value']], + on = ['date','sub_id'], + suffixes = ('_real', '_sim') + ) + + val = ( + results + .groupby(['date','sub_id']) + .sum() + .reset_index() + .groupby('sub_id') + .apply(lambda x: nse(x.value_real, x.value_sim)) + .median() + ) + return val + diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py index 5918f06c..4d330ce6 100644 --- a/tests/test_metric_utilities.py +++ b/tests/test_metric_utilities.py @@ -295,4 +295,71 @@ def test_netcomp(): G_ = load_graph(Path(__file__).parent / 'test_data' / 'street_graph.json') val_ = getattr(mu.metrics, func)(synthetic_G = G_, real_G = G) - assert np.isclose(val, val_), func \ No newline at end of file + assert np.isclose(val, val_), func + +def test_subcatchment_nse_flooding(): + """Test the outlet_nse_flow metric.""" + # Load data + G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') + subs = get_subs() + + # Mock results + results = pd.DataFrame([{'object' : 4253560, + 'variable' : 'flow', + 'value' : 10, + 'date' : pd.to_datetime('2021-01-01 00:00:00')}, + {'object' : 4253560, + 'variable' : 'flow', + 'value' : 5, + 'date' : pd.to_datetime('2021-01-01 00:00:05')}, + {'object' : 1696030874, + 'variable' : 'flooding', + 'value' : 4.5, + 'date' : pd.to_datetime('2021-01-01 00:00:00')}, + {'object' : 770549936, + 'variable' : 'flooding', + 'value' : 5, + 'date' : pd.to_datetime('2021-01-01 00:00:00')}, + {'object' : 107736, + 'variable' : 'flooding', + 'value' : 10, + 'date' : pd.to_datetime('2021-01-01 00:00:00')}, + {'object' : 1696030874, + 'variable' : 'flooding', + 'value' : 0, + 'date' : pd.to_datetime('2021-01-01 00:00:05')}, + {'object' : 770549936, + 'variable' : 'flooding', + 'value' : 5, + 'date' : pd.to_datetime('2021-01-01 00:00:05')}, + {'object' : 107736, + 'variable' : 'flooding', + 'value' : 15, + 'date' : pd.to_datetime('2021-01-01 00:00:05')}]) + + # Calculate NSE (perfect results) + val = mu.metrics.subcatchment_nse_flooding(synthetic_G = G, + real_G = G, + synthetic_results = results, + real_results = results, + real_subs = subs) + assert val == 1.0 + + # Calculate NSE (remapped node) + + G_ = G.copy() + # Create a mapping from the old name to the new name + mapping = {1696030874: 'new_name'} + + # Rename the node + G_ = nx.relabel_nodes(G_, mapping) + + results_ = results.copy() + results_.object = results_.object.replace(mapping) + + val = mu.metrics.subcatchment_nse_flooding(synthetic_G = G_, + synthetic_results = results_, + real_G = G, + real_results = results, + real_subs = subs) + assert val == 1.0 \ No newline at end of file From 86ca65801d03e177dd53405af3f9c7efb268724b Mon Sep 17 00:00:00 2001 From: Dobson Date: Wed, 13 Mar 2024 11:59:24 +0000 Subject: [PATCH 2/4] Update subcatchment_nse_flooding - Update test - Functionalise --- swmmanywhere/metric_utilities.py | 103 +++++++++++++++++++++---------- tests/test_metric_utilities.py | 21 ++++++- 2 files changed, 89 insertions(+), 35 deletions(-) diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py index 3d02cd17..4fa70949 100644 --- a/swmmanywhere/metric_utilities.py +++ b/swmmanywhere/metric_utilities.py @@ -136,6 +136,33 @@ def nse(y: np.ndarray, """Calculate Nash-Sutcliffe efficiency (NSE).""" return 1 - np.sum((y - yhat)**2) / np.sum((y - np.mean(y))**2) +def median_nse_by_group(results: pd.DataFrame, + gb_key: str) -> float: + """Median NSE by group. + + Calculate the median Nash-Sutcliffe efficiency (NSE) of a variable over time + for each group in the results dataframe, and return the median of these + values. + + Args: + results (pd.DataFrame): The results dataframe. + gb_key (str): The column to group by. + + Returns: + float: The median NSE. + """ + val = ( + results + .groupby(['date',gb_key]) + .sum() + .reset_index() + .groupby(gb_key) + .apply(lambda x: nse(x.value_real, x.value_sim)) + .median() + ) + return val + + def nodes_to_subs(G: nx.Graph, subs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Nodes to subcatchments. @@ -250,6 +277,40 @@ def edge_betweenness_centrality(G: nx.Graph, bt_c[n] += v return bt_c +def align_by_subcatchment(var, + synthetic_results: pd.DataFrame, + real_results: pd.DataFrame, + real_subs: gpd.GeoDataFrame, + synthetic_G: nx.Graph, + real_G: nx.Graph) -> pd.DataFrame: + """Align by subcatchment. + + Align synthetic and real results by subcatchment and return the results. + """ + synthetic_joined = nodes_to_subs(synthetic_G, real_subs) + real_joined = nodes_to_subs(real_G, real_subs) + + # Extract data + real_results = extract_var(real_results, var) + synthetic_results = extract_var(synthetic_results, var) + + # Align data + synthetic_results = pd.merge(synthetic_results, + synthetic_joined[['id','sub_id']], + left_on='object', + right_on = 'id') + real_results = pd.merge(real_results, + real_joined[['id','sub_id']], + left_on='object', + right_on = 'id') + + results = pd.merge(real_results[['date','sub_id','value']], + synthetic_results[['date','sub_id','value']], + on = ['date','sub_id'], + suffixes = ('_real', '_sim') + ) + return results + @metrics.register def nc_deltacon0(synthetic_G: nx.Graph, real_G: nx.Graph, @@ -421,6 +482,8 @@ def outlet_nse_flooding(synthetic_G: nx.Graph, list(sg_syn.nodes), list(sg_real.nodes)) + + @metrics.register def subcatchment_nse_flooding(synthetic_G: nx.Graph, real_G: nx.Graph, @@ -434,37 +497,11 @@ def subcatchment_nse_flooding(synthetic_G: nx.Graph, flooding over time for each subcatchment. The metric produced is the median NSE across all subcatchments. """ - synthetic_joined = nodes_to_subs(synthetic_G, real_subs) - real_joined = nodes_to_subs(real_G, real_subs) - - # Extract data - real_results = extract_var(real_results, 'flooding') - synthetic_results = extract_var(synthetic_results, 'flooding') - - # Align data - synthetic_results = pd.merge(synthetic_results, - synthetic_joined[['id','sub_id']], - left_on='object', - right_on = 'id') - real_results = pd.merge(real_results, - real_joined[['id','sub_id']], - left_on='object', - right_on = 'id') - - results = pd.merge(real_results[['date','sub_id','value']], - synthetic_results[['date','sub_id','value']], - on = ['date','sub_id'], - suffixes = ('_real', '_sim') - ) - - val = ( - results - .groupby(['date','sub_id']) - .sum() - .reset_index() - .groupby('sub_id') - .apply(lambda x: nse(x.value_real, x.value_sim)) - .median() - ) - return val + results = align_by_subcatchment('flooding', + synthetic_results = synthetic_results, + real_results = real_results, + real_subs = real_subs, + synthetic_G = synthetic_G, + real_G = real_G) + return median_nse_by_group(results, 'sub_id') \ No newline at end of file diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py index 4d330ce6..60350e0f 100644 --- a/tests/test_metric_utilities.py +++ b/tests/test_metric_utilities.py @@ -324,7 +324,15 @@ def test_subcatchment_nse_flooding(): 'variable' : 'flooding', 'value' : 10, 'date' : pd.to_datetime('2021-01-01 00:00:00')}, - {'object' : 1696030874, + {'object' : 107733, + 'variable' : 'flooding', + 'value' : 1, + 'date' : pd.to_datetime('2021-01-01 00:00:00')}, + {'object' : 107737, + 'variable' : 'flooding', + 'value' : 2, + 'date' : pd.to_datetime('2021-01-01 00:00:00')}, + {'object' : 1696030874, 'variable' : 'flooding', 'value' : 0, 'date' : pd.to_datetime('2021-01-01 00:00:05')}, @@ -335,6 +343,14 @@ def test_subcatchment_nse_flooding(): {'object' : 107736, 'variable' : 'flooding', 'value' : 15, + 'date' : pd.to_datetime('2021-01-01 00:00:05')}, + {'object' : 107733, + 'variable' : 'flooding', + 'value' : 2, + 'date' : pd.to_datetime('2021-01-01 00:00:05')}, + {'object' : 107737, + 'variable' : 'flooding', + 'value' : 2, 'date' : pd.to_datetime('2021-01-01 00:00:05')}]) # Calculate NSE (perfect results) @@ -349,7 +365,8 @@ def test_subcatchment_nse_flooding(): G_ = G.copy() # Create a mapping from the old name to the new name - mapping = {1696030874: 'new_name'} + mapping = {1696030874: 'new_name', + 107737 : 'new_name2'} # Rename the node G_ = nx.relabel_nodes(G_, mapping) From e7b30417ede5c4066488d6e784bb45c16c13ce7e Mon Sep 17 00:00:00 2001 From: Dobson Date: Mon, 18 Mar 2024 09:21:56 +0000 Subject: [PATCH 3/4] Update metric_utilities.py --- swmmanywhere/metric_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py index 4fa70949..10ea285d 100644 --- a/swmmanywhere/metric_utilities.py +++ b/swmmanywhere/metric_utilities.py @@ -177,7 +177,7 @@ def nodes_to_subs(G: nx.Graph, gpd.GeoDataFrame: A dataframe from the nodes and data, and the subcatchment information, distinguished by the column 'sub_id'. """ - nodes_df = pd.DataFrame([{**{'id' :x}, **d} for x,d in G.nodes(data=True)]) + nodes_df = pd.DataFrame([{'id' :x, **d} for x,d in G.nodes(data=True)]) nodes_joined = ( gpd.GeoDataFrame(nodes_df, geometry=gpd.points_from_xy(nodes_df.x, From ef5454192af0bfe3b12c5767007f242263d1b635 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 09:26:37 +0000 Subject: [PATCH 4/4] Bump certifi from 2023.11.17 to 2024.2.2 Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.11.17 to 2024.2.2. - [Commits](https://github.com/certifi/python-certifi/compare/2023.11.17...2024.02.02) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- dev-requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 852b7e5a..3f098c98 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -25,7 +25,7 @@ build==1.0.3 # via pip-tools cdsapi==0.6.1 # via swmmanywhere (pyproject.toml) -certifi==2023.11.17 +certifi==2024.2.2 # via # fiona # netcdf4 diff --git a/requirements.txt b/requirements.txt index 32ffe117..769bf687 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ attrs==23.2.0 # referencing cdsapi==0.6.1 # via swmmanywhere (pyproject.toml) -certifi==2023.11.17 +certifi==2024.2.2 # via # fiona # netcdf4