From a260f8f9906a133edb1f5502e0c6f1e90d4ac066 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 12 Apr 2024 18:12:21 +1200 Subject: [PATCH] :pencil2: Edit URLs to shapefiles from UNOSAT (#138) * :pencil2: Edit URL to Mount Talakmau landslide shapefile Old CERN link seems to be unavailable. New link from https://unosat.org/products/3064 points to https://unosat.org/static/unosat_filesystem/3064/LS20220308IDN_SHP.zip instead. * :boom: Change vector segmentation mask tutorial to Northern Kelantan The link to the 20191215 Johor flood water shapefile is no longer available, so switching to 20170104 flood shapefile over the Narathiwat Province in Thailand and Northern Kelantan State in Malaysia. Using an internet archive link this time instead of the UNOSAT link, not only to future-proof against future link rot, but also because the server hosting https://unosat.org/static/unosat_filesystem/2460/FL20170106THA_SHP.zip does not support range requests. Increased chip size from 128x128 to 512x512 to reduce number of batches. * :white_check_mark: Update bbox coords in test_pystac_client_item_search Minor changes to the bounding box coordinates of the returned STAC item. * :rotating_light: Bump GitHub Actions workflows to fix deprecations Updated actions/checkout and actions/setup-python to silence deprecated warning on Node.js 16 (see https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/). * :pushpin: Pin to Python 3.11.8 to avoid dask.dataframe TypeError Xref https://github.com/dask/dask/issues/11038 * :bug: Replace UNOSAT link with Wayback Machine permalink The UNOSAT link doesn't support range requests, resulting in a `DataSourceError: Range downloading not supported by this server!` error. Using the Internet Archive's permalink instead. * :pushpin: Temporarily pin dask to <2024.3.0 on readthedocs build The latest dask=2024.4.1 version installed via readthedocs has a bug with `import datashader` raising `ModuleNotFoundError: No module named 'dask_expr'`. Xref https://github.com/holoviz/datashader/issues/1319 * :pushpin: Pin to dask=2024.2.1 The less than sign at c24338a4e63836f28c482ba050572be1226f0115 didn't work on the readthedocs build. --- .github/workflows/ci-tests.yml | 10 +++--- .github/workflows/publish-to-pypi.yml | 6 ++-- docs/.readthedocs.yaml | 6 +++- docs/stacking.md | 2 +- docs/vector-segmentation-masks.md | 36 +++++++++---------- zen3geo/tests/test_datapipes_pystac_client.py | 6 ++-- 6 files changed, 35 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 9ebc87c..558c2d5 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.10", "3.11"] + python-version: ["3.8", "3.10", "3.11.8"] os: [ubuntu-22.04] # Is it a draft Pull Request (true or false)? isDraft: @@ -29,7 +29,7 @@ jobs: exclude: - python-version: '3.8' isDraft: true - - python-version: '3.11' + - python-version: '3.11.8' isDraft: true # Only install optional packages on Ubuntu-22.04/Python 3.10 and 3.11 include: @@ -37,17 +37,17 @@ jobs: python-version: '3.10' extra-packages: '--extras "raster spatial stac vector"' - os: 'ubuntu-22.04' - python-version: '3.11' + python-version: '3.11.8' extra-packages: '--extras "raster spatial stac vector"' steps: # Checkout current git repository - name: Checkout - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 # Install Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@bd6b4b6205c4dbad673328db7b31b7fab9e241c0 # v4.6.1 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 5fbeeb4..90839f9 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -30,15 +30,15 @@ jobs: steps: - name: Checkout - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: # fetch all history so that poetry-dynamic-versioning works fetch-depth: 0 - name: Set up Python 3.11 - uses: actions/setup-python@bd6b4b6205c4dbad673328db7b31b7fab9e241c0 # v4.6.1 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: - python-version: '3.11' + python-version: '3.11.8' - name: Install Poetry and dynamic-versioning plugin run: | diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 7f9d045..fc52a85 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -23,7 +23,11 @@ build: # see https://github.com/gjoseph92/stackstac/pull/208 # Need to wait for rasterio/GDAL to support float16 # see https://gdal.org/api/raster_c_api.html#_CPPv412GDALDataType - - "pip install stackstac==0.4.4" + # Install dask<2024.3.0 to prevent + # ModuleNotFoundError: No module named 'dask_expr' + # ImportError: Dask dataframe requirements are not installed + # https://github.com/holoviz/datashader/issues/1319 + - "pip install stackstac==0.4.4 dask==2024.2.1" # Optional but recommended, declare the Python requirements required # to build your documentation diff --git a/docs/stacking.md b/docs/stacking.md index fadee6d..1afa5d4 100644 --- a/docs/stacking.md +++ b/docs/stacking.md @@ -197,7 +197,7 @@ we'll first load the digitized landslide polygons from a vector file πŸ“ using ```{code-cell} # https://gdal.org/user/virtual_file_systems.html#vsizip-zip-archives -shape_url = "/vsizip/vsicurl/https://unosat-maps.web.cern.ch/ID/LS20220308IDN/LS20220308IDN_SHP.zip/LS20220308IDN_SHP/S2_20220304_LandslideExtent_MountTalakmau.shp" +shape_url = "/vsizip/vsicurl/https://web.archive.org/web/20240202034335/https://unosat.org/static/unosat_filesystem/3064/LS20220308IDN_SHP.zip/LS20220308IDN_SHP/S2_20220304_LandslideExtent_MountTalakmau.shp" dp_shapes = torchdata.datapipes.iter.IterableWrapper(iterable=[shape_url]) dp_pyogrio = dp_shapes.read_from_pyogrio() diff --git a/docs/vector-segmentation-masks.md b/docs/vector-segmentation-masks.md index 922d219..daaca69 100644 --- a/docs/vector-segmentation-masks.md +++ b/docs/vector-segmentation-masks.md @@ -43,21 +43,21 @@ import zen3geo ## 0️⃣ Find cloud-hosted raster and vector data β›³ -In this case study, we'll look at the flood water extent over Johor, -Malaysia πŸ‡²πŸ‡Ύ on 15 Dec 2019 that were digitized by πŸ‡ΊπŸ‡³ UNITAR-UNOSAT's rapid -mapping service over Synthetic Aperture Radar (SAR) πŸ›°οΈ images. Specifically, -we'll be using the πŸ‡ͺπŸ‡Ί Sentinel-1 Ground Range Detected (GRD) product's VV -polarization channel. +In this case study, we'll look at the flood water extent over the Narathiwat Province +in Thailand πŸ‡ΉπŸ‡­ and the Northern Kelantan State in Malaysia πŸ‡²πŸ‡Ύ on 04 Jan 2017 that were +digitized by πŸ‡ΊπŸ‡³ UNITAR-UNOSAT's rapid mapping service over Synthetic Aperture Radar +(SAR) πŸ›°οΈ images. Specifically, we'll be using the πŸ‡ͺπŸ‡Ί Sentinel-1 Ground Range Detected +(GRD) product's VV polarization channel. πŸ”— Links: -- https://www.unitar.org/maps/unosat-rapid-mapping-service -- https://unitar.org/maps/countries -- [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=103.6637%2C2.1494&z=8.49&v=2&d=sentinel-1-grd&s=false%3A%3A100%3A%3Atrue&ae=0&m=cql%3Afc3d85b6ab43d3e8ebe168da0206f2cf&r=VV%2C+VH+False-color+composite) +- https://www.unitar.org/maps +- https://unitar.org/maps/all-maps +- [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=102.7555%2C5.7222&z=7.92&v=2&d=sentinel-1-grd&m=cql%3Afdba821238c1a390e7c75d7ced805b2e&r=VV%2C+VH+False-color+composite&s=false%3A%3A100%3A%3Atrue&sr=desc&ae=0) To start, let's get the πŸ›°οΈ satellite scene we'll be using for this tutorial. ```{code-cell} -item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20191215T224757_20191215T224822_030365_037955" +item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20170104T225443_20170104T225512_014688_017E5D" # Load the individual item metadata and sign the assets item = pystac.Item.from_file(item_url) @@ -65,10 +65,10 @@ signed_item = planetary_computer.sign(item) signed_item ``` -This is how the Sentinel-1 🩻 image looks like over Johor in Peninsular -Malaysia on 15 Dec 2019. +This is how the Sentinel-1 🩻 image looks like over Southern Thailand / Northern +Peninsular Malaysia on 04 Jan 2017. -![Sentinel-1 GRD image over Johor, Malaysia on 20191215](https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1A_IW_GRDH_1SDV_20191215T224757_20191215T224822_030365_037955&assets=vv&assets=vh&expression=vv%3Bvh%3Bvv%2Fvh&rescale=0%2C600&rescale=0%2C270&rescale=0%2C9&asset_as_band=True&tile_format=png&format=png) +![Sentinel-1 GRD image over Southern Thailand and Northern Peninsular Malaysia on 20170104](https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1A_IW_GRDH_1SDV_20170104T225443_20170104T225512_014688_017E5D&assets=vv&assets=vh&expression=vv%3Bvh%3Bvv%2Fvh&rescale=0%2C600&rescale=0%2C270&rescale=0%2C9&asset_as_band=True&tile_format=png&format=png) ### Load and reproject image data πŸ”„ @@ -164,7 +164,7 @@ of the analysis extent areas we'll be working on later. it = iter(dp_decibel_image) dataarray = next(it) -da_clip = dataarray.rio.clip_box(minx=371483, miny=190459, maxx=409684, maxy=229474) +da_clip = dataarray.rio.clip_box(minx=125718, miny=523574, maxx=326665, maxy=722189) da_clip.isel(band=0).plot.imshow(figsize=(11.5, 9), cmap="Blues_r", vmin=18, vmax=26) ``` @@ -182,12 +182,12 @@ We'll be converting these vector polygons to 🌈 raster masks later. πŸ”— Links: - https://github.com/UNITAR-UNOSAT/UNOSAT-AI-Based-Rapid-Mapping-Service -- [Humanitarian Data Exchange link to polygon dataset](https://data.humdata.org/dataset/waters-extents-as-of-15-december-2019-over-kota-tinggi-and-mersing-district-johor-state-of) +- [UNOSAT link to polygon dataset](https://unosat.org/products/2460) - [Disaster Risk Monitoring Using Satellite Imagery online course](https://courses.nvidia.com/courses/course-v1:DLI+S-ES-01+V1) ```{code-cell} # https://gdal.org/user/virtual_file_systems.html#vsizip-zip-archives -shape_url = "/vsizip/vsicurl/https://unosat-maps.web.cern.ch/MY/FL20191217MYS/FL20191217MYS_SHP.zip/ST1_20191215_WaterExtent_Johor_AOI2.shp" +shape_url = "/vsizip/vsicurl/https://web.archive.org/web/20240411214446/https://unosat.org/static/unosat_filesystem/2460/FL20170106THA_SHP.zip/ST20170104_SatelliteDetectedWaterAndSaturatedSoil.shp" ``` This is a shapefile containing πŸ”· polygons of the mapped water extent. Let's @@ -419,16 +419,16 @@ plt.show() ### Slice into chips and turn into tensors πŸ—‘οΈ -To cut πŸ”ͺ the {py:class}`xarray.Dataset` into 128x128 sized chips, we'll use +To cut πŸ”ͺ the {py:class}`xarray.Dataset` into 512x512 sized chips, we'll use {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name: `slice_with_xbatcher`). Refer to {doc}`./chipping` if you need a πŸ§‘β€πŸŽ“ refresher. ```{code-cell} -dp_xbatcher = dp_dataset.slice_with_xbatcher(input_dims={"y": 128, "x": 128}) +dp_xbatcher = dp_dataset.slice_with_xbatcher(input_dims={"y": 512, "x": 512}) dp_xbatcher ``` -Next step is to convert the 128x128 chips into a {py:class}`torch.Tensor` via +Next step is to convert the 512x512 chips into a {py:class}`torch.Tensor` via {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`). The πŸ›°οΈ Sentinel-1 image and πŸ’§ water mask will be split out at this point too. diff --git a/zen3geo/tests/test_datapipes_pystac_client.py b/zen3geo/tests/test_datapipes_pystac_client.py index d9abe0f..3214d79 100644 --- a/zen3geo/tests/test_datapipes_pystac_client.py +++ b/zen3geo/tests/test_datapipes_pystac_client.py @@ -42,9 +42,9 @@ def test_pystac_client_item_search(): assert stac_item.bbox == [ 149.965907628116, - -35.199398016548095, - 152.10531016837078, - -32.972806586656844, + -35.199398016548116, + 152.1053101683708, + -32.97280658665687, ] assert stac_item.datetime.isoformat() == "2001-07-02T00:00:00+00:00" assert stac_item.geometry["type"] == "Polygon"