Skip to content

Commit

Permalink
Implement outlet matching
Browse files Browse the repository at this point in the history
-Update and test KStest
  • Loading branch information
Dobson committed Feb 29, 2024
1 parent f4b9dba commit 44f304f
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 11 deletions.
82 changes: 79 additions & 3 deletions swmmanywhere/metric_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,88 @@ class kstest_betweenness(BaseMetric):
def __call__(self,
synthetic_G: nx.Graph,
real_G: nx.Graph,
real_subs: gpd.GeoDataFrame,
real_results: pd.DataFrame,
*args,
**kwargs) -> float:
"""Run the evaluated metric."""
syn_betweenness = nx.betweenness_centrality(synthetic_G)
real_betweenness = nx.betweenness_centrality(real_G)
# Identify synthetic outlet and subgraph
sg_syn, _ = best_outlet_match(synthetic_G, real_subs)

# Identify real outlet and subgraph
sg_real, _ = dominant_outlet(real_G, real_results)

syn_betweenness = nx.betweenness_centrality(sg_syn)
real_betweenness = nx.betweenness_centrality(sg_real)

#TODO does it make more sense to use statistic or pvalue?
return stats.ks_2samp(list(syn_betweenness.values()),
list(real_betweenness.values())).statistic
list(real_betweenness.values())).statistic

def best_outlet_match(synthetic_G: nx.Graph,
real_subs: gpd.GeoDataFrame) -> tuple[nx.Graph,int]:
"""Best outlet match.
Identify the outlet with the most nodes within the real_subs and return the
subgraph of the synthetic graph of nodes that drain to that outlet.
Args:
synthetic_G (nx.Graph): The synthetic graph.
real_subs (gpd.GeoDataFrame): The real subcatchments.
Returns:
nx.Graph: The subgraph of the synthetic graph for the outlet with the
most nodes within the real_subs.
int: The id of the outlet.
"""
# Identify which nodes fall within real_subs
nodes_df = pd.DataFrame([d for x,d in synthetic_G.nodes(data=True)],
index = synthetic_G.nodes)
nodes_gdf = gpd.GeoDataFrame(nodes_df,
geometry=gpd.points_from_xy(nodes_df.x,
nodes_df.y),
crs = synthetic_G.graph['crs'])
nodes_joined = nodes_gdf.sjoin(real_subs,
how="right",
predicate="intersects")

# Select the most common outlet
outlet = nodes_joined.outlet.value_counts().idxmax()

# Subselect the matching graph
outlet_nodes = [n for n, d in synthetic_G.nodes(data=True)
if d['outlet'] == outlet]
return synthetic_G.subgraph(outlet_nodes), outlet

def dominant_outlet(G: nx.Graph,
results: pd.DataFrame) -> tuple[nx.Graph,int]:
"""Dominant outlet.
Identify the outlet with highest flow along it and return the
subgraph of the graph of nodes that drain to that outlet.
Args:
G (nx.Graph): The graph.
results (pd.DataFrame): The results, which include a 'flow' and 'id'
column.
Returns:
nx.Graph: The subgraph of nodes/arcs that the reach max flow outlet
int: The id of the outlet.
"""
# Identify outlets of the graph
outlets = [n for n, outdegree in G.out_degree()
if outdegree == 0]
outlet_arcs = [d['id'] for u,v,d in G.edges(data=True)
if v in outlets]

# Identify the outlet with the highest flow
outlet_flows = results.loc[(results.variable == 'flow') &
(results.object.isin(outlet_arcs))]
max_outlet_arc = outlet_flows.groupby('object').value.mean().idxmax()
max_outlet = [v for u,v,d in G.edges(data=True)
if d['id'] == max_outlet_arc][0]

# Subselect the matching graph
sg = G.subgraph(nx.ancestors(G, max_outlet) | {max_outlet})
return sg, max_outlet
77 changes: 69 additions & 8 deletions tests/test_metric_utilities.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from pathlib import Path

import geopandas as gpd
import networkx as nx
import pandas as pd
import shapely

from swmmanywhere import metric_utilities as mu
from swmmanywhere.graph_utilities import load_graph
from swmmanywhere.metric_utilities import metrics as sm


def test_bias_flood_depth():
Expand Down Expand Up @@ -31,19 +34,77 @@ def test_bias_flood_depth():
})

# Run the metric
val = sm.bias_flood_depth(synthetic_results = synthetic_results,
val = mu.metrics.bias_flood_depth(synthetic_results = synthetic_results,
real_results = real_results,
synthetic_subs = synthetic_subs,
real_subs = real_subs)
assert val == -0.29523809523809524

def test_kstest_betweenness():
"""Test the kstest_betweenness metric."""
def test_best_outlet_match():
"""Test the best_outlet_match and ks_betweenness."""
G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json')
val = sm.kstest_betweenness(synthetic_G = G, real_G = G)
subs = [shapely.Polygon([(700262, 5709928),
(700262, 5709883),
(700351, 5709883),
(700351, 5709906),
(700306, 5709906),
(700306, 5709928),
(700262, 5709928)]),
shapely.Polygon([(700306, 5709928),
(700284, 5709928),
(700284, 5709950),
(700374, 5709950),
(700374, 5709906),
(700351, 5709906),
(700306, 5709906),
(700306, 5709928)]),
shapely.Polygon([(700351, 5709883),
(700351, 5709906),
(700374, 5709906),
(700374, 5709883),
(700396, 5709883),
(700396, 5709816),
(700329, 5709816),
(700329, 5709838),
(700329, 5709883),
(700351, 5709883)])]

subs = gpd.GeoDataFrame(data = {'id' : [107733,
1696030874,
6277683849]
},
geometry = subs,
crs = G.graph['crs'])

sg, outlet = mu.best_outlet_match(synthetic_G = G,
real_subs = subs)
outlets = nx.get_node_attributes(sg, 'outlet')
assert len(set(outlets.values())) == 1
assert outlet == 12354833

results = pd.DataFrame([{'object' : 4253560,
'variable' : 'flow',
'value' : 10},
{'object' : '',
'variable' : 'flow',
'value' : 5}])

val = mu.metrics.kstest_betweenness(synthetic_G = G,
real_G = G,
real_subs = subs,
real_results = results)

assert val == 0.0

# Move the outlet up one node
G_ = G.copy()
G_.remove_node(list(G.nodes)[0])
val = sm.kstest_betweenness(synthetic_G = G_, real_G = G)
assert val == 0.286231884057971
nx.set_node_attributes(G_,
list(G_.in_edges(outlet))[0][0],
'outlet')
G_.remove_node(outlet)

val = mu.metrics.kstest_betweenness(synthetic_G = G_,
real_G = G,
real_subs = subs,
real_results = results)
assert val == 0.4

0 comments on commit 44f304f

Please sign in to comment.