From 171829f22d2c7646fcce85d1766a617d094b3fa6 Mon Sep 17 00:00:00 2001 From: Michele Thornton Date: Mon, 18 Nov 2024 20:19:43 -0500 Subject: [PATCH] minor edit and executed cells --- .../NotebookBasics/SMCE_Basics.ipynb | 258 ++++++++++++++++-- 1 file changed, 235 insertions(+), 23 deletions(-) diff --git a/book/tutorials/NotebookBasics/SMCE_Basics.ipynb b/book/tutorials/NotebookBasics/SMCE_Basics.ipynb index f2d4824..015943a 100644 --- a/book/tutorials/NotebookBasics/SMCE_Basics.ipynb +++ b/book/tutorials/NotebookBasics/SMCE_Basics.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "source": [ "## Overview\n", - "This tutorial will explore BioSCape Science Managed Cloud Environment (SMCE) including how to access and explore using Python methods." + "This tutorial will explore the BioSCape Science Managed Cloud Environment (SMCE) including how to access and explore amazon Simple Storage Service (S3) using Python open source tools." ] }, { @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "63e38fc4-363c-4701-ac33-a9e8bc98cf05", "metadata": {}, "outputs": [], @@ -74,10 +74,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "e9d5ecaf-04b1-4ad5-b39d-a55b471bcdc2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/AVNG',\n", + " 'bioscape-data/BioSCapeVegPolys2023_10_18',\n", + " 'bioscape-data/BioSCapeVegPolys2023_10_18.geoparquet',\n", + " 'bioscape-data/LVIS',\n", + " 'bioscape-data/PRISM',\n", + " 'bioscape-data/bioscape_avng.yaml']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Use S3Fs to list the BioSCape data on the BioSCape SMCE S3 storaage\n", "files = s3.ls('bioscape-data/')\n", @@ -111,10 +127,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "689c7069-7bb0-4fa9-a500-b8ac629bc1ec", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/AVNG/',\n", + " 'bioscape-data/AVNG/ang20231022t092801',\n", + " 'bioscape-data/AVNG/ang20231022t094938',\n", + " 'bioscape-data/AVNG/ang20231022t101052',\n", + " 'bioscape-data/AVNG/ang20231022t103357',\n", + " 'bioscape-data/AVNG/ang20231022t105533',\n", + " 'bioscape-data/AVNG/ang20231022t111800',\n", + " 'bioscape-data/AVNG/ang20231022t113923',\n", + " 'bioscape-data/AVNG/ang20231022t120313',\n", + " 'bioscape-data/AVNG/ang20231022t122317']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "AVNG_flightlines = s3.ls('bioscape-data/AVNG/')\n", "AVNG_flightlines[:10]" @@ -122,10 +158,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "6b12d72d-f2e7-428a-9e2a-a9daeaa8d0de", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "394" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "AVNGfl_count = len(AVNG_flightlines)\n", "AVNGfl_count" @@ -133,10 +180,69 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "df46a7e3-5846-4548-9b57-d068f223db8e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_001',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_002',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_003',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_004',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_005',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_006',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_007',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_008',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_009',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_010',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_011',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_012',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_013',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_014',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_015',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_016',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_017',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_018',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_019',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_020',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_021',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_022',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_023',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_024',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_025',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_026',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_027',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_028',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_029',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_030',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_031',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_032',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_033',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_034',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_035',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_036',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_037',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_038',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_039',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_040',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_041',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_042',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_043',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_044',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_045',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_046',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_047',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_048']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# looking into the ang20231022t092801 folder\n", "AVNG_scenes = s3.ls('bioscape-data/AVNG/ang20231022t092801')\n", @@ -145,10 +251,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c20fbbd2-b932-446d-8981-baae543bca79", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_LOC',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_LOC.hdr',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_LOC_ORT',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_LOC_ORT.hdr',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_OBS',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_OBS.hdr',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_OBS_ORT',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L1B_ORT_main_46dd9a4a_OBS_ORT.hdr',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_RFL_ORT',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_RFL_ORT.hdr',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_RFL_ORT.json',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_RFL_ORT_QL.tif',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_UNC_ORT',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_UNC_ORT.hdr',\n", + " 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_RFL_UNC_COMBINED_ORT.json']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Explore the AVNG folder that holds an AVNG scene's data\n", "AVNG_scene_data = s3.ls('bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000')\n", @@ -181,15 +312,34 @@ "source": [ "#### Open a file from an S3 Bucket - S3Fs\n", "- Calling `open()` on a **`S3FileSystem`** (typically using a context manager) provides an S3File for read or write access to a particular key.\n", - "- can be used with other projects that consume the file interface like `gzip` or `pandas`." + "- `open` can be used with other projects that consume the file interface like `gzip` or `pandas`." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "8cd9871f-6ae7-4698-b2f6-8b3c50c71fd6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ENVI\n", + "description = {\n", + "L2A Analytyical per-pixel surface retrieval}\n", + "samples = 719\n", + "lines = 615\n", + "bands = 425\n", + "header offset = 0\n", + "file type = ENVI Standard\n", + "data type = 4\n", + "interleave = bil\n", + "byte order = 0\n", + "map info = {UTM, 1, 1, 290290.1514036929, 6352647.360537699, 6.3, 6.3, 34, South, WGS-84, units=Meters, rotation=0.0}\n" + ] + } + ], "source": [ "# Print the first 12 lines of and ENVI header file\n", "hdr_link = 'bioscape-data/AVNG/ang20231022t092801/ang20231022t092801_000/ang20231022t092801_000_L2A_OE_main_27577724_RFL_ORT.hdr'\n", @@ -209,10 +359,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "daebaac1-6bdd-448b-8b4e-073767a75d12", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/PRISM/L2']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "PRISM_flightlines = s3.ls('bioscape-data/PRISM')\n", "PRISM_flightlines" @@ -229,10 +390,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "0c9dc196-ece2-4b02-ae12-cad2db9460db", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/PRISM/L2/prm20231022t141344_rfl_ort',\n", + " 'bioscape-data/PRISM/L2/prm20231022t141344_rfl_ort.hdr',\n", + " 'bioscape-data/PRISM/L2/prm20231025t060817_rfl_ort',\n", + " 'bioscape-data/PRISM/L2/prm20231025t060817_rfl_ort.hdr',\n", + " 'bioscape-data/PRISM/L2/prm20231025t062740_rfl_ort',\n", + " 'bioscape-data/PRISM/L2/prm20231025t062740_rfl_ort.hdr',\n", + " 'bioscape-data/PRISM/L2/prm20231025t063541_rfl_ort',\n", + " 'bioscape-data/PRISM/L2/prm20231025t063541_rfl_ort.hdr',\n", + " 'bioscape-data/PRISM/L2/prm20231025t064655_rfl_ort',\n", + " 'bioscape-data/PRISM/L2/prm20231025t064655_rfl_ort.hdr']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "PRISM_flightlines = s3.ls('bioscape-data/PRISM/L2')\n", "PRISM_flightlines[:10]" @@ -256,10 +437,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "562510bd-3502-4c4e-b52c-c124ec042cec", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/LVIS/L1B', 'bioscape-data/LVIS/L2']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "LVIS_flightlines = s3.ls('bioscape-data/LVIS/')\n", "LVIS_flightlines" @@ -275,10 +467,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "eabb26dc-90ab-4109-b229-36cf64c70d38", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_027373.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_027526.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_027815.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_027902.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_027990.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_028077.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_028551.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_028761.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_028852.TXT',\n", + " 'bioscape-data/LVIS/L2/LVISF2_BioSCape2023_1020_R2404_028939.TXT']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "LVIS_flightlines_L2 = s3.ls('bioscape-data/LVIS/L2')\n", "LVIS_flightlines_L2[:10]"