-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
117 additions
and
303 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,186 +1,147 @@ | ||
import requests | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import matplotlib.dates as mdates | ||
import seaborn as sns | ||
from datetime import datetime, timedelta | ||
import warnings | ||
import io | ||
import datetime | ||
import numpy as np | ||
import h5py | ||
from shapely.geometry import Polygon, Point | ||
import logging | ||
import argparse | ||
from io import BytesIO | ||
from earthaccess import Auth, DataCollections | ||
from utils.get_poly import get_huc8_polygon, simplify_polygon | ||
|
||
# Set up the plotting theme | ||
sns.set_theme(style="darkgrid") | ||
warnings.filterwarnings("ignore") | ||
# Conditionally import matplotlib | ||
import importlib.util | ||
matplotlib_spec = importlib.util.find_spec("matplotlib") | ||
matplotlib_available = matplotlib_spec is not None | ||
|
||
# Configure logging | ||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | ||
|
||
def search_smap_data(start_date, end_date, bounding_box, bearer_token, short_name="SPL2SMP_E", version="006"): | ||
def search_and_download_smap_data(date, auth): | ||
""" | ||
Searches for SMAP data using the CMR API. | ||
Search for the most recent SMAP L3 data up to the given date and download it. | ||
""" | ||
cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json" | ||
headers = { | ||
"Authorization": f"Bearer {bearer_token}" | ||
} | ||
params = { | ||
"short_name": short_name, | ||
"version": version, | ||
"temporal": f"{start_date}T00:00:00Z,{end_date}T23:59:59Z", | ||
"bounding_box": bounding_box, | ||
"page_size": 100, | ||
"page_num": 1 | ||
} | ||
collection = DataCollections.smap_l3_soil_moisture | ||
end_date = date | ||
start_date = end_date - datetime.timedelta(days=7) # Look for data up to a week before the specified date | ||
|
||
response = requests.get(cmr_url, headers=headers, params=params) | ||
logging.info(f"CMR URL: {response.url}") | ||
response.raise_for_status() | ||
return response.json() | ||
|
||
def get_smap_soil_moisture_data(granule_url, bearer_token): | ||
""" | ||
Retrieves soil moisture data from the SMAP granule URL. | ||
""" | ||
headers = { | ||
"Authorization": f"Bearer {bearer_token}" | ||
} | ||
try: | ||
granules = collection.search( | ||
temporal=(start_date, end_date), | ||
bounding_box=(-180, -90, 180, 90), # Global coverage | ||
limit=1, | ||
sort_key="-start_date" | ||
) | ||
|
||
if not granules: | ||
logging.error(f"No SMAP data found from {start_date} to {end_date}") | ||
return None | ||
|
||
granule = granules[0] | ||
logging.info(f"Found SMAP data for date: {granule.date}") | ||
|
||
# Download the data | ||
data = granule.download(auth) | ||
return BytesIO(data) | ||
|
||
response = requests.get(granule_url, headers=headers) | ||
response.raise_for_status() | ||
return response.content | ||
except Exception as e: | ||
logging.error(f"Error searching or downloading SMAP data: {e}") | ||
return None | ||
|
||
def debug_hdf5_structure(data): | ||
def extract_soil_moisture(hdf_file, polygon): | ||
""" | ||
Debug function to print the structure of the HDF5 file. | ||
Extract soil moisture data for the given polygon from the HDF file. | ||
""" | ||
with h5py.File(io.BytesIO(data), 'r') as f: | ||
def print_structure(name, obj): | ||
logging.debug(name) | ||
f.visititems(print_structure) | ||
try: | ||
with h5py.File(hdf_file, 'r') as file: | ||
soil_moisture = file['Soil_Moisture_Retrieval_Data']['soil_moisture'][:] | ||
lat = file['cell_lat'][:] | ||
lon = file['cell_lon'][:] | ||
|
||
shape = Polygon(polygon) | ||
mask = np.zeros_like(soil_moisture, dtype=bool) | ||
|
||
for i in range(soil_moisture.shape[0]): | ||
for j in range(soil_moisture.shape[1]): | ||
if shape.contains(Point(lon[j], lat[i])): | ||
mask[i, j] = True | ||
|
||
valid_data = soil_moisture[mask & (soil_moisture != -9999)] | ||
if len(valid_data) > 0: | ||
return np.mean(valid_data), soil_moisture, lat, lon, mask | ||
else: | ||
logging.warning("No valid soil moisture data found within the polygon") | ||
return None, None, None, None, None | ||
except Exception as e: | ||
logging.error(f"Error extracting soil moisture data: {e}") | ||
return None, None, None, None, None | ||
|
||
def visualize_soil_moisture(polygon, soil_moisture, lat, lon, mask, average_moisture): | ||
if not matplotlib_available: | ||
logging.error("Matplotlib is not installed. Cannot create visualization.") | ||
return | ||
|
||
def process_smap_data(data): | ||
""" | ||
Processes the SMAP data from HDF5 format into a pandas DataFrame. | ||
""" | ||
with h5py.File(io.BytesIO(data), 'r') as f: | ||
# Debug the structure of the HDF5 file | ||
logging.info("HDF5 file structure:") | ||
f.visititems(lambda name, obj: logging.info(name)) | ||
|
||
# Extract data from Soil_Moisture_Retrieval_Data group | ||
soil_moisture_data = f['Soil_Moisture_Retrieval_Data']['soil_moisture_option3'][:] | ||
latitude_data = f['Soil_Moisture_Retrieval_Data']['latitude'][:] | ||
longitude_data = f['Soil_Moisture_Retrieval_Data']['longitude'][:] | ||
date_data = f['Soil_Moisture_Retrieval_Data']['tb_time_utc'][:].astype('U') | ||
|
||
# Attempt to extract quality flag data if it exists | ||
quality_flag_data = None | ||
if 'retrieval_qual_flag_option3' in f['Soil_Moisture_Retrieval_Data']: | ||
quality_flag_data = f['Soil_Moisture_Retrieval_Data']['retrieval_qual_flag_option3'][:] | ||
else: | ||
quality_flag_data = f['Soil_Moisture_Retrieval_Data']['retrieval_qual_flag'][:] | ||
import matplotlib.pyplot as plt | ||
from matplotlib.patches import Polygon as mplPolygon | ||
|
||
df = pd.DataFrame({ | ||
'date': date_data, | ||
'latitude': latitude_data, | ||
'longitude': longitude_data, | ||
'soil_moisture': soil_moisture_data, | ||
'quality_flag': quality_flag_data | ||
}) | ||
fig, ax = plt.subplots(figsize=(10, 8)) | ||
|
||
# Filter out invalid soil moisture values and ensure recommended quality | ||
df = df[(df['soil_moisture'] != -9999.0) & (df['quality_flag'] & 0b1 == 0)] | ||
# Plot the entire soil moisture data | ||
im = ax.imshow(soil_moisture, cmap='YlGnBu', extent=[lon.min(), lon.max(), lat.min(), lat.max()], | ||
origin='lower', alpha=0.5) | ||
|
||
return df | ||
|
||
def clean_and_convert_dates(date_series): | ||
""" | ||
Cleans and converts a series of date strings to datetime objects, filtering out invalid formats. | ||
""" | ||
def try_parsing_date(text): | ||
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S%z"): | ||
try: | ||
return datetime.strptime(text, fmt) | ||
except ValueError: | ||
continue | ||
return None | ||
|
||
cleaned_dates = date_series.apply(try_parsing_date) | ||
return cleaned_dates.dropna() | ||
|
||
def plot_soil_moisture(df): | ||
""" | ||
Plots the soil moisture data. | ||
""" | ||
df['date'] = clean_and_convert_dates(df['date']) | ||
# Highlight the polygon | ||
poly = mplPolygon(polygon, facecolor='none', edgecolor='red', linewidth=2) | ||
ax.add_patch(poly) | ||
|
||
if df.empty: | ||
logging.info("No valid soil moisture data available to plot.") | ||
return | ||
# Set the extent to focus on the polygon | ||
min_lon, min_lat = np.min(polygon, axis=0) | ||
max_lon, max_lat = np.max(polygon, axis=0) | ||
ax.set_xlim(min_lon - 1, max_lon + 1) | ||
ax.set_ylim(min_lat - 1, max_lat + 1) | ||
|
||
plt.figure(figsize=(14, 7)) | ||
plt.scatter(df['date'], df['soil_moisture'], marker='o') | ||
plt.title('Daily Soil Moisture') | ||
plt.xlabel('Date') | ||
plt.ylabel('Soil Moisture') | ||
plt.grid(True) | ||
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1)) | ||
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) | ||
plt.gcf().autofmt_xdate() | ||
plt.show() | ||
|
||
def get_smap_timeseries(bearer_token, start_date, end_date, latitude, longitude): | ||
""" | ||
Main function to retrieve and process SMAP soil moisture data. | ||
""" | ||
bounding_box = f"{longitude-0.1},{latitude-0.1},{longitude+0.1},{latitude+0.1}" | ||
plt.colorbar(im, label='Soil Moisture') | ||
plt.title(f'Soil Moisture Map\nAverage within polygon: {average_moisture:.4f}') | ||
plt.xlabel('Longitude') | ||
plt.ylabel('Latitude') | ||
|
||
logging.info(f"Bounding Box: {bounding_box}") | ||
logging.info(f"Start Date: {start_date}") | ||
logging.info(f"End Date: {end_date}") | ||
|
||
search_results = search_smap_data(start_date, end_date, bounding_box, bearer_token) | ||
|
||
# Check if any granules are found | ||
if 'feed' in search_results and 'entry' in search_results['feed'] and len(search_results['feed']['entry']) > 0: | ||
granule_url = search_results['feed']['entry'][0]['links'][0]['href'] | ||
logging.info(f"Granule URL: {granule_url}") | ||
|
||
# Get soil moisture data from the first granule (for simplicity) | ||
soil_moisture_data = get_smap_soil_moisture_data(granule_url, bearer_token) | ||
|
||
# Debug the HDF5 file structure | ||
debug_hdf5_structure(soil_moisture_data) | ||
plt.show() | ||
|
||
# Process the data | ||
df_soil_moisture = process_smap_data(soil_moisture_data) | ||
logging.info(df_soil_moisture.head()) | ||
def main(date, lat, lon, visual): | ||
# Set up authentication | ||
auth = Auth() | ||
auth.login() | ||
|
||
# Plot the data | ||
plot_soil_moisture(df_soil_moisture) | ||
|
||
return df_soil_moisture | ||
else: | ||
logging.warning("No granules found for the given search criteria.") | ||
return pd.DataFrame() | ||
# Get and simplify the HUC8 polygon | ||
huc8_polygon = get_huc8_polygon(lat, lon) | ||
if not huc8_polygon: | ||
logging.error("Failed to retrieve HUC8 polygon") | ||
return | ||
|
||
def main(bearer_token, start_date, end_date, latitude, longitude): | ||
df_soil_moisture = get_smap_timeseries(bearer_token, start_date, end_date, latitude, longitude) | ||
if not df_soil_moisture.empty: | ||
logging.info("Soil moisture data retrieved successfully.") | ||
simplified_polygon = simplify_polygon(huc8_polygon) | ||
logging.info(f"Simplified polygon: {simplified_polygon}") | ||
|
||
hdf_file = search_and_download_smap_data(date, auth) | ||
if hdf_file: | ||
soil_moisture, full_soil_moisture, lat_data, lon_data, mask = extract_soil_moisture(hdf_file, simplified_polygon) | ||
if soil_moisture is not None: | ||
logging.info(f"Average soil moisture for the given polygon on or before {date}: {soil_moisture:.4f}") | ||
|
||
if visual: | ||
if matplotlib_available: | ||
visualize_soil_moisture(simplified_polygon, full_soil_moisture, lat_data, lon_data, mask, soil_moisture) | ||
else: | ||
logging.warning("Matplotlib is not installed. Skipping visualization.") | ||
else: | ||
logging.error("Failed to calculate soil moisture") | ||
else: | ||
logging.warning("Failed to retrieve soil moisture data.") | ||
return df_soil_moisture | ||
logging.error("Failed to find or download SMAP data") | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description='Fetch SMAP soil moisture data.') | ||
parser.add_argument('--bearer_token', type=str, required=True, help='Bearer token for authentication') | ||
parser.add_argument('--start_date', type=str, required=True, help='Start date in the format YYYY-MM-DD') | ||
parser.add_argument('--end_date', type=str, required=True, help='End date in the format YYYY-MM-DD') | ||
parser.add_argument('--latitude', type=float, required=True, help='Latitude of the location') | ||
parser.add_argument('--longitude', type=float, required=True, help='Longitude of the location') | ||
parser = argparse.ArgumentParser(description='Calculate average soil moisture for a HUC8 polygon from SMAP L3 data.') | ||
parser.add_argument('--date', type=lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').date(), required=True, help='Date in YYYY-MM-DD format') | ||
parser.add_argument('--lat', type=float, required=True, help='Latitude of the point within the desired HUC8 polygon') | ||
parser.add_argument('--lon', type=float, required=True, help='Longitude of the point within the desired HUC8 polygon') | ||
parser.add_argument('--visual', action='store_true', help='Enable matplotlib visualization') | ||
args = parser.parse_args() | ||
|
||
main(args.bearer_token, args.start_date, args.end_date, args.latitude, args.longitude) | ||
main(args.date, args.lat, args.lon, args.visual) |
Oops, something went wrong.