From 13ffd18ce8793eee511c122f8262732e8c0d2d8e Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Thu, 21 Mar 2024 13:52:42 +0000
Subject: [PATCH 1/2] Create debug_interpolation.py

---
 swmmanywhere/misc/debug_interpolation.py | 51 ++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 swmmanywhere/misc/debug_interpolation.py

diff --git a/swmmanywhere/misc/debug_interpolation.py b/swmmanywhere/misc/debug_interpolation.py
new file mode 100644
index 00000000..db8ff920
--- /dev/null
+++ b/swmmanywhere/misc/debug_interpolation.py
@@ -0,0 +1,51 @@
+"""Test how well SWMM simulations line up."""
+from pathlib import Path
+
+import geopandas as gpd
+import pandas as pd
+
+from swmmanywhere import metric_utilities as mu
+from swmmanywhere.graph_utilities import load_graph
+
+base_dir = Path(r'C:\Users\bdobson\Documents\data\swmmanywhere\cranbrook')
+real_results = pd.read_parquet(base_dir / 'real' / 'real_results.parquet')
+synthetic_results = pd.read_parquet(base_dir / 'bbox_1' / 'model_1' /\
+                                     'results.parquet')
+synthetic_G = load_graph(base_dir / 'bbox_1' / 'model_1' / 'assign_id_graph.json')
+real_G = load_graph(base_dir / 'real' / 'graph.json')
+real_subs = gpd.read_file(base_dir / 'real' / 'subcatchments.geojson')
+
+# [from mu.outlet_nse_flow]
+# Identify synthetic and real arcs that flow into the best outlet node
+_, syn_outlet = mu.best_outlet_match(synthetic_G, real_subs)
+syn_ids = [d['id'] for u,v,d in synthetic_G.edges(data=True)
+            if v == syn_outlet]
+_, real_outlet =  mu.dominant_outlet(real_G, real_results)
+real_ids = [d['id'] for u,v,d in real_G.edges(data=True)
+            if v == real_outlet]
+
+variable = 'flow'
+
+# [from mu.align_calc_nse]
+# Format dates
+synthetic_results['date'] = pd.to_datetime(synthetic_results['date'])
+real_results['date'] = pd.to_datetime(real_results['date'])
+
+# Extract data
+syn_data = mu.extract_var(synthetic_results, variable)
+syn_data = syn_data.loc[syn_data.id.isin(syn_ids)]
+syn_data = syn_data.groupby('date').value.sum()
+
+real_data = mu.extract_var(real_results, variable)
+real_data = real_data.loc[real_data.id.isin(real_ids)]
+real_data = real_data.groupby('date').value.sum()
+
+# Align data
+df = pd.merge(syn_data, 
+                real_data, 
+                left_index = True,
+                right_index = True,
+                suffixes=('_syn', '_real'), 
+                how='outer').sort_index()
+
+print(str(df.dropna().shape[0] / df.shape[0] * 100))

From f2dd454155ebe17bb5033dafdeacadd485e0088d Mon Sep 17 00:00:00 2001
From: Dobson <bdobson@ic.ac.uk>
Date: Thu, 21 Mar 2024 14:10:59 +0000
Subject: [PATCH 2/2] Update debug_interpolation.py

---
 swmmanywhere/misc/debug_interpolation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/swmmanywhere/misc/debug_interpolation.py b/swmmanywhere/misc/debug_interpolation.py
index db8ff920..2db96b0d 100644
--- a/swmmanywhere/misc/debug_interpolation.py
+++ b/swmmanywhere/misc/debug_interpolation.py
@@ -49,3 +49,5 @@
                 how='outer').sort_index()
 
 print(str(df.dropna().shape[0] / df.shape[0] * 100))
+# We find that syn_data and real_data perfect align 0.2% of the time. Thus, 
+# the interpolation in align_calc_nse is certainly needed.
\ No newline at end of file