-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Aggregate all dims simultaneously in conservative (#51)
* dot product over all dims simultaneously * add benchmarking against xesmf * add changelog * add output_chunks arg, match input chunks otherwise * update changelog * add notebook to docs
- Loading branch information
Showing
6 changed files
with
424 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Performance of `xesmf` vs `xarray-regrid`\n", | ||
"\n", | ||
"Compare the two conservative methods using a moderately-sized synthetic dask dataset of about 4GB." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import dask.array as da\n", | ||
"import xarray as xr\n", | ||
"import xesmf\n", | ||
"\n", | ||
"import xarray_regrid\n", | ||
"\n", | ||
"bounds = dict(south=-90, north=90, west=-180, east=180)\n", | ||
"\n", | ||
"source = xarray_regrid.Grid(\n", | ||
" resolution_lat=0.25,\n", | ||
" resolution_lon=0.25,\n", | ||
" **bounds,\n", | ||
").create_regridding_dataset()\n", | ||
"\n", | ||
"target = xarray_regrid.Grid(\n", | ||
" resolution_lat=1,\n", | ||
" resolution_lon=1,\n", | ||
" **bounds,\n", | ||
").create_regridding_dataset()\n", | ||
"\n", | ||
"\n", | ||
"def source_data(source, chunks, n_times=1000):\n", | ||
" data = da.random.random(\n", | ||
" size=(n_times, source.latitude.size, source.longitude.size),\n", | ||
" chunks=chunks,\n", | ||
" ).astype(\"float32\")\n", | ||
"\n", | ||
" data = xr.DataArray(\n", | ||
" data,\n", | ||
" dims=[\"time\", \"latitude\", \"longitude\"],\n", | ||
" coords={\n", | ||
" \"time\": xr.date_range(\"2000-01-01\", periods=n_times, freq=\"D\"),\n", | ||
" \"latitude\": source.latitude,\n", | ||
" \"longitude\": source.longitude,\n", | ||
" }\n", | ||
" )\n", | ||
"\n", | ||
" return data\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Chunking\n", | ||
"\n", | ||
"Test \"pancake\" (chunked in time) and \"churro\" (chunked in space) chunks of different sizes. The \"small\" versions are about 4 MB, and the \"large\" are about 100 MB." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"chunk_schemes = {\n", | ||
" \"pancake_small\": (1, -1, -1),\n", | ||
" \"pancake_large\": (25, -1, -1),\n", | ||
" \"churro_small\": (-1, 32, 32),\n", | ||
" \"churro_large\": (-1, 160, 160),\n", | ||
"}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 20, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xesmf/backend.py:56: UserWarning: Latitude is outside of [-90, 90]\n", | ||
" warnings.warn('Latitude is outside of [-90, 90]')\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xesmf/backend.py:56: UserWarning: Latitude is outside of [-90, 90]\n", | ||
" warnings.warn('Latitude is outside of [-90, 90]')\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# For larger grids, generating weights is quite expensive\n", | ||
"xesmf_regridder = xesmf.Regridder(source, target, \"conservative\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Timings\n", | ||
"\n", | ||
"Run timings for different chunkings schemes and with NaN skipping enabled and disabled, across both libraries. Compare the ratio of `xesmf / xarray-regrid` to see the speedup factor of using this library." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 21, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 72.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (32, 32).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 72.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (32, 32).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 72.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (32, 32).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 72.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (32, 32).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 6.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (160, 160).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 6.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (160, 160).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 6.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (160, 160).\n", | ||
" result_var = func(*data_vars)\n", | ||
"/home/slevang/miniconda3/envs/xarray-regrid/lib/python3.12/site-packages/xarray/core/computation.py:320: PerformanceWarning: Regridding is increasing the number of chunks by a factor of 6.0, you might want to specify sizes in `output_chunks` in the regridder call. Default behaviour is to preserve the chunk sizes from the input (160, 160).\n", | ||
" result_var = func(*data_vars)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import time\n", | ||
"\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"pd.options.display.precision = 1\n", | ||
"\n", | ||
"\n", | ||
"def do_regrid(data, target, skipna):\n", | ||
" data.regrid.conservative(target, skipna=skipna).compute()\n", | ||
"\n", | ||
"\n", | ||
"def do_xesmf(data, target, skipna):\n", | ||
" xesmf_regridder(data, skipna=skipna).compute()\n", | ||
"\n", | ||
"\n", | ||
"def timing_grid(func, repeats=2):\n", | ||
" times = pd.DataFrame(\n", | ||
" index=chunk_schemes.keys(),\n", | ||
" columns=[\"skipna=False\", \"skipna=True\"],\n", | ||
" )\n", | ||
" for name, chunks in chunk_schemes.items():\n", | ||
" data = source_data(source, chunks)\n", | ||
" for skipna in [False, True]:\n", | ||
" execution_times = []\n", | ||
" for _ in range(repeats):\n", | ||
" start = time.perf_counter()\n", | ||
" func(data, target, skipna)\n", | ||
" end = time.perf_counter()\n", | ||
" execution_times.append(end - start)\n", | ||
" # Sometimes the first execution is a little slower\n", | ||
" times.loc[name, f\"skipna={skipna}\"] = min(execution_times)\n", | ||
"\n", | ||
" return times\n", | ||
"\n", | ||
"\n", | ||
"regrid_times = timing_grid(do_regrid)\n", | ||
"xesmf_times = timing_grid(do_xesmf)\n", | ||
"ratio = xesmf_times / regrid_times\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Results\n", | ||
"\n", | ||
"With current implementations, `xesmf` is slightly faster for large pancake-style chunks. `xarray-regrid` is much faster for small chunks, especially churro-style.\n", | ||
"\n", | ||
"These tests were run on an 8-core Intel i7 Ubuntu desktop:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>skipna=False</th>\n", | ||
" <th>skipna=True</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>pancake_small</th>\n", | ||
" <td>3.7</td>\n", | ||
" <td>7.2</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>pancake_large</th>\n", | ||
" <td>0.6</td>\n", | ||
" <td>1.1</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>churro_small</th>\n", | ||
" <td>14.2</td>\n", | ||
" <td>16.9</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>churro_large</th>\n", | ||
" <td>1.8</td>\n", | ||
" <td>2.4</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" skipna=False skipna=True\n", | ||
"pancake_small 3.7 7.2\n", | ||
"pancake_large 0.6 1.1\n", | ||
"churro_small 14.2 16.9\n", | ||
"churro_large 1.8 2.4" | ||
] | ||
}, | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"ratio" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "xarray-regrid", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.