Skip to content

Commit

Permalink
Update preprocessing.py
Browse files Browse the repository at this point in the history
Refactor run_downloads to separate out each dataset.
  • Loading branch information
Dobson committed Mar 1, 2024
1 parent 92970e8 commit 7020df3
Showing 1 changed file with 92 additions and 66 deletions.
158 changes: 92 additions & 66 deletions swmmanywhere/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,96 +153,122 @@ def write_df(df: pd.DataFrame | gpd.GeoDataFrame,
else:
df.to_json(fid)

def prepare_precipitation(bbox: tuple[float, float, float, float],
addresses: parameters.FilePaths,
api_keys: dict[str, str],
target_crs: str,
source_crs: str = 'EPSG:4326'):
"""Download and reproject precipitation data."""
if addresses.precipitation.exists():
return
print(f'downloading precipitation to {addresses.precipitation}')
precip = prepare_data.download_precipitation(bbox,
api_keys['cds_username'],
api_keys['cds_api_key'])
precip = precip.reset_index()
precip = go.reproject_df(precip,
source_crs,
target_crs)
write_df(precip,
addresses.precipitation)

def prepare_elvation(bbox: tuple[float, float, float, float],
addresses: parameters.FilePaths,
api_keys: dict[str, str],
target_crs: str):
"""Download and reproject elevation data."""
if addresses.elevation.exists():
return
print(f'downloading elevation to {addresses.elevation}')
with tempfile.TemporaryDirectory() as temp_dir:
fid = Path(temp_dir) / 'elevation.tif'
prepare_data.download_elevation(fid,
bbox,
api_keys['nasadem_key']
)
go.reproject_raster(target_crs,
fid,
addresses.elevation)

def prepare_building(bbox: tuple[float, float, float, float],
addresses: parameters.FilePaths,
target_crs: str):
"""Download, trim and reproject building data."""
if addresses.building.exists():
return

if not addresses.national_building.exists():
print(f'downloading buildings to {addresses.national_building}')
prepare_data.download_buildings(addresses.national_building,
bbox[0],
bbox[1])

print(f'trimming buildings to {addresses.building}')
national_buildings = gpd.read_parquet(addresses.national_building)
buildings = national_buildings.cx[bbox[0]:bbox[2], bbox[1]:bbox[3]] # type: ignore

buildings = buildings.to_crs(target_crs)
write_df(buildings,addresses.building)

def prepare_street(bbox: tuple[float, float, float, float],
addresses: parameters.FilePaths,
target_crs: str,
source_crs: str = 'EPSG:4326'):
"""Download and reproject street graph."""
if addresses.street.exists():
return
print(f'downloading street network to {addresses.street}')
street_network = prepare_data.download_street(bbox)
street_network = go.reproject_graph(street_network,
source_crs,
target_crs)
gu.save_graph(street_network, addresses.street)

def prepare_river(bbox: tuple[float, float, float, float],
addresses: parameters.FilePaths,
target_crs: str,
source_crs: str = 'EPSG:4326'):
"""Download and reproject river graph."""
if addresses.river.exists():
return
print(f'downloading river network to {addresses.river}')
river_network = prepare_data.download_river(bbox)
river_network = go.reproject_graph(river_network,
source_crs,
target_crs)
gu.save_graph(river_network, addresses.river)

def run_downloads(bbox: tuple[float, float, float, float],
addresses: parameters.FilePaths,
api_keys: dict[str, str]):
"""Run the data downloads.
Run the precipitation, elevation, building, street and river network
downloads. If the data already exists, do not download it again. Assumes
that data downloads are in EPSG:4326 and reprojects them to the UTM zone.
downloads. If the data already exists, do not download it again. Reprojects
data to the UTM zone.
Args:
bbox (tuple[float, float, float, float]): Bounding box coordinates in
the format (minx, miny, maxx, maxy) in EPSG:4326.
addresses (FilePaths): Class containing the addresses of the directories.
api_keys (dict): Dictionary containing the API keys.
"""
source_crs = 'EPSG:4326'
target_crs = go.get_utm_epsg(bbox[0], bbox[1])

# Download precipitation data
#TODO precipitation dates..?
if not addresses.precipitation.exists():
print(f'downloading precipitation to {addresses.precipitation}')
precip = prepare_data.download_precipitation(bbox,
api_keys['cds_username'],
api_keys['cds_api_key'])
precip = precip.reset_index()
precip = go.reproject_df(precip,
source_crs,
target_crs)
write_df(precip,
addresses.precipitation)
prepare_precipitation(bbox, addresses, api_keys, target_crs)

# Download elevation data
if not addresses.elevation.exists():
print(f'downloading elevation to {addresses.elevation}')
with tempfile.TemporaryDirectory() as temp_dir:
fid = Path(temp_dir) / 'elevation.tif'
prepare_data.download_elevation(fid,
bbox,
api_keys['nasadem_key']
)
go.reproject_raster(target_crs,
fid,
addresses.elevation)

else:
print('elevation already exists')
prepare_elvation(bbox, addresses, api_keys, target_crs)

# Download building data
if not addresses.national_building.exists():
print(f'downloading buildings to {addresses.national_building}')
prepare_data.download_buildings(addresses.national_building,
bbox[0],
bbox[1])
else:
print('buildings already exist')

# Trim and reproject buildings to bbox
if not addresses.building.exists():
print(f'trimming buildings to {addresses.building}')
national_buildings = gpd.read_parquet(addresses.national_building)
buildings = national_buildings.cx[bbox[0]:bbox[2], bbox[1]:bbox[3]] # type: ignore

buildings = buildings.to_crs(target_crs)
write_df(buildings,
addresses.building)
else:
print('buildings already trimmed')
prepare_building(bbox, addresses, target_crs)

# Download street network data
if not addresses.street.exists():
print(f'downloading street network to {addresses.street}')
street_network = prepare_data.download_street(bbox)
street_network = go.reproject_graph(street_network,
source_crs,
target_crs)
gu.save_graph(street_network, addresses.street)
else:
print('street network already exists')
prepare_street(bbox, addresses, target_crs)

# Download river network data
if not addresses.river.exists():
print(f'downloading river network to {addresses.river}')
river_network = prepare_data.download_river(bbox)
river_network = go.reproject_graph(river_network,
source_crs,
target_crs)
gu.save_graph(river_network, addresses.river)
else:
print('river network already exists')
prepare_river(bbox, addresses, target_crs)

def create_starting_graph(addresses: parameters.FilePaths):
"""Create the starting graph.
Expand Down

0 comments on commit 7020df3

Please sign in to comment.