diff --git a/openFlowML/nasa_moisture.py b/openFlowML/nasa_moisture.py index df44a78..b15e98f 100644 --- a/openFlowML/nasa_moisture.py +++ b/openFlowML/nasa_moisture.py @@ -1,186 +1,147 @@ -import requests -import pandas as pd -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -import seaborn as sns -from datetime import datetime, timedelta -import warnings -import io +import datetime +import numpy as np import h5py +from shapely.geometry import Polygon, Point import logging import argparse +from io import BytesIO +from earthaccess import Auth, DataCollections +from utils.get_poly import get_huc8_polygon, simplify_polygon -# Set up the plotting theme -sns.set_theme(style="darkgrid") -warnings.filterwarnings("ignore") +# Conditionally import matplotlib +import importlib.util +matplotlib_spec = importlib.util.find_spec("matplotlib") +matplotlib_available = matplotlib_spec is not None # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -def search_smap_data(start_date, end_date, bounding_box, bearer_token, short_name="SPL2SMP_E", version="006"): +def search_and_download_smap_data(date, auth): """ - Searches for SMAP data using the CMR API. + Search for the most recent SMAP L3 data up to the given date and download it. """ - cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json" - headers = { - "Authorization": f"Bearer {bearer_token}" - } - params = { - "short_name": short_name, - "version": version, - "temporal": f"{start_date}T00:00:00Z,{end_date}T23:59:59Z", - "bounding_box": bounding_box, - "page_size": 100, - "page_num": 1 - } + collection = DataCollections.smap_l3_soil_moisture + end_date = date + start_date = end_date - datetime.timedelta(days=7) # Look for data up to a week before the specified date - response = requests.get(cmr_url, headers=headers, params=params) - logging.info(f"CMR URL: {response.url}") - response.raise_for_status() - return response.json() - -def get_smap_soil_moisture_data(granule_url, bearer_token): - """ - Retrieves soil moisture data from the SMAP granule URL. - """ - headers = { - "Authorization": f"Bearer {bearer_token}" - } + try: + granules = collection.search( + temporal=(start_date, end_date), + bounding_box=(-180, -90, 180, 90), # Global coverage + limit=1, + sort_key="-start_date" + ) + + if not granules: + logging.error(f"No SMAP data found from {start_date} to {end_date}") + return None + + granule = granules[0] + logging.info(f"Found SMAP data for date: {granule.date}") + + # Download the data + data = granule.download(auth) + return BytesIO(data) - response = requests.get(granule_url, headers=headers) - response.raise_for_status() - return response.content + except Exception as e: + logging.error(f"Error searching or downloading SMAP data: {e}") + return None -def debug_hdf5_structure(data): +def extract_soil_moisture(hdf_file, polygon): """ - Debug function to print the structure of the HDF5 file. + Extract soil moisture data for the given polygon from the HDF file. """ - with h5py.File(io.BytesIO(data), 'r') as f: - def print_structure(name, obj): - logging.debug(name) - f.visititems(print_structure) + try: + with h5py.File(hdf_file, 'r') as file: + soil_moisture = file['Soil_Moisture_Retrieval_Data']['soil_moisture'][:] + lat = file['cell_lat'][:] + lon = file['cell_lon'][:] + + shape = Polygon(polygon) + mask = np.zeros_like(soil_moisture, dtype=bool) + + for i in range(soil_moisture.shape[0]): + for j in range(soil_moisture.shape[1]): + if shape.contains(Point(lon[j], lat[i])): + mask[i, j] = True + + valid_data = soil_moisture[mask & (soil_moisture != -9999)] + if len(valid_data) > 0: + return np.mean(valid_data), soil_moisture, lat, lon, mask + else: + logging.warning("No valid soil moisture data found within the polygon") + return None, None, None, None, None + except Exception as e: + logging.error(f"Error extracting soil moisture data: {e}") + return None, None, None, None, None + +def visualize_soil_moisture(polygon, soil_moisture, lat, lon, mask, average_moisture): + if not matplotlib_available: + logging.error("Matplotlib is not installed. Cannot create visualization.") + return -def process_smap_data(data): - """ - Processes the SMAP data from HDF5 format into a pandas DataFrame. - """ - with h5py.File(io.BytesIO(data), 'r') as f: - # Debug the structure of the HDF5 file - logging.info("HDF5 file structure:") - f.visititems(lambda name, obj: logging.info(name)) - - # Extract data from Soil_Moisture_Retrieval_Data group - soil_moisture_data = f['Soil_Moisture_Retrieval_Data']['soil_moisture_option3'][:] - latitude_data = f['Soil_Moisture_Retrieval_Data']['latitude'][:] - longitude_data = f['Soil_Moisture_Retrieval_Data']['longitude'][:] - date_data = f['Soil_Moisture_Retrieval_Data']['tb_time_utc'][:].astype('U') - - # Attempt to extract quality flag data if it exists - quality_flag_data = None - if 'retrieval_qual_flag_option3' in f['Soil_Moisture_Retrieval_Data']: - quality_flag_data = f['Soil_Moisture_Retrieval_Data']['retrieval_qual_flag_option3'][:] - else: - quality_flag_data = f['Soil_Moisture_Retrieval_Data']['retrieval_qual_flag'][:] + import matplotlib.pyplot as plt + from matplotlib.patches import Polygon as mplPolygon - df = pd.DataFrame({ - 'date': date_data, - 'latitude': latitude_data, - 'longitude': longitude_data, - 'soil_moisture': soil_moisture_data, - 'quality_flag': quality_flag_data - }) + fig, ax = plt.subplots(figsize=(10, 8)) - # Filter out invalid soil moisture values and ensure recommended quality - df = df[(df['soil_moisture'] != -9999.0) & (df['quality_flag'] & 0b1 == 0)] + # Plot the entire soil moisture data + im = ax.imshow(soil_moisture, cmap='YlGnBu', extent=[lon.min(), lon.max(), lat.min(), lat.max()], + origin='lower', alpha=0.5) - return df - -def clean_and_convert_dates(date_series): - """ - Cleans and converts a series of date strings to datetime objects, filtering out invalid formats. - """ - def try_parsing_date(text): - for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S%z"): - try: - return datetime.strptime(text, fmt) - except ValueError: - continue - return None - - cleaned_dates = date_series.apply(try_parsing_date) - return cleaned_dates.dropna() - -def plot_soil_moisture(df): - """ - Plots the soil moisture data. - """ - df['date'] = clean_and_convert_dates(df['date']) + # Highlight the polygon + poly = mplPolygon(polygon, facecolor='none', edgecolor='red', linewidth=2) + ax.add_patch(poly) - if df.empty: - logging.info("No valid soil moisture data available to plot.") - return + # Set the extent to focus on the polygon + min_lon, min_lat = np.min(polygon, axis=0) + max_lon, max_lat = np.max(polygon, axis=0) + ax.set_xlim(min_lon - 1, max_lon + 1) + ax.set_ylim(min_lat - 1, max_lat + 1) - plt.figure(figsize=(14, 7)) - plt.scatter(df['date'], df['soil_moisture'], marker='o') - plt.title('Daily Soil Moisture') - plt.xlabel('Date') - plt.ylabel('Soil Moisture') - plt.grid(True) - plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1)) - plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) - plt.gcf().autofmt_xdate() - plt.show() - -def get_smap_timeseries(bearer_token, start_date, end_date, latitude, longitude): - """ - Main function to retrieve and process SMAP soil moisture data. - """ - bounding_box = f"{longitude-0.1},{latitude-0.1},{longitude+0.1},{latitude+0.1}" + plt.colorbar(im, label='Soil Moisture') + plt.title(f'Soil Moisture Map\nAverage within polygon: {average_moisture:.4f}') + plt.xlabel('Longitude') + plt.ylabel('Latitude') - logging.info(f"Bounding Box: {bounding_box}") - logging.info(f"Start Date: {start_date}") - logging.info(f"End Date: {end_date}") - - search_results = search_smap_data(start_date, end_date, bounding_box, bearer_token) - - # Check if any granules are found - if 'feed' in search_results and 'entry' in search_results['feed'] and len(search_results['feed']['entry']) > 0: - granule_url = search_results['feed']['entry'][0]['links'][0]['href'] - logging.info(f"Granule URL: {granule_url}") - - # Get soil moisture data from the first granule (for simplicity) - soil_moisture_data = get_smap_soil_moisture_data(granule_url, bearer_token) - - # Debug the HDF5 file structure - debug_hdf5_structure(soil_moisture_data) + plt.show() - # Process the data - df_soil_moisture = process_smap_data(soil_moisture_data) - logging.info(df_soil_moisture.head()) +def main(date, lat, lon, visual): + # Set up authentication + auth = Auth() + auth.login() - # Plot the data - plot_soil_moisture(df_soil_moisture) - - return df_soil_moisture - else: - logging.warning("No granules found for the given search criteria.") - return pd.DataFrame() + # Get and simplify the HUC8 polygon + huc8_polygon = get_huc8_polygon(lat, lon) + if not huc8_polygon: + logging.error("Failed to retrieve HUC8 polygon") + return -def main(bearer_token, start_date, end_date, latitude, longitude): - df_soil_moisture = get_smap_timeseries(bearer_token, start_date, end_date, latitude, longitude) - if not df_soil_moisture.empty: - logging.info("Soil moisture data retrieved successfully.") + simplified_polygon = simplify_polygon(huc8_polygon) + logging.info(f"Simplified polygon: {simplified_polygon}") + + hdf_file = search_and_download_smap_data(date, auth) + if hdf_file: + soil_moisture, full_soil_moisture, lat_data, lon_data, mask = extract_soil_moisture(hdf_file, simplified_polygon) + if soil_moisture is not None: + logging.info(f"Average soil moisture for the given polygon on or before {date}: {soil_moisture:.4f}") + + if visual: + if matplotlib_available: + visualize_soil_moisture(simplified_polygon, full_soil_moisture, lat_data, lon_data, mask, soil_moisture) + else: + logging.warning("Matplotlib is not installed. Skipping visualization.") + else: + logging.error("Failed to calculate soil moisture") else: - logging.warning("Failed to retrieve soil moisture data.") - return df_soil_moisture + logging.error("Failed to find or download SMAP data") if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Fetch SMAP soil moisture data.') - parser.add_argument('--bearer_token', type=str, required=True, help='Bearer token for authentication') - parser.add_argument('--start_date', type=str, required=True, help='Start date in the format YYYY-MM-DD') - parser.add_argument('--end_date', type=str, required=True, help='End date in the format YYYY-MM-DD') - parser.add_argument('--latitude', type=float, required=True, help='Latitude of the location') - parser.add_argument('--longitude', type=float, required=True, help='Longitude of the location') + parser = argparse.ArgumentParser(description='Calculate average soil moisture for a HUC8 polygon from SMAP L3 data.') + parser.add_argument('--date', type=lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').date(), required=True, help='Date in YYYY-MM-DD format') + parser.add_argument('--lat', type=float, required=True, help='Latitude of the point within the desired HUC8 polygon') + parser.add_argument('--lon', type=float, required=True, help='Longitude of the point within the desired HUC8 polygon') + parser.add_argument('--visual', action='store_true', help='Enable matplotlib visualization') args = parser.parse_args() - main(args.bearer_token, args.start_date, args.end_date, args.latitude, args.longitude) \ No newline at end of file + main(args.date, args.lat, args.lon, args.visual) \ No newline at end of file diff --git a/openFlowML/nasa_moisture2.py b/openFlowML/nasa_moisture2.py deleted file mode 100644 index b15e98f..0000000 --- a/openFlowML/nasa_moisture2.py +++ /dev/null @@ -1,147 +0,0 @@ -import datetime -import numpy as np -import h5py -from shapely.geometry import Polygon, Point -import logging -import argparse -from io import BytesIO -from earthaccess import Auth, DataCollections -from utils.get_poly import get_huc8_polygon, simplify_polygon - -# Conditionally import matplotlib -import importlib.util -matplotlib_spec = importlib.util.find_spec("matplotlib") -matplotlib_available = matplotlib_spec is not None - -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -def search_and_download_smap_data(date, auth): - """ - Search for the most recent SMAP L3 data up to the given date and download it. - """ - collection = DataCollections.smap_l3_soil_moisture - end_date = date - start_date = end_date - datetime.timedelta(days=7) # Look for data up to a week before the specified date - - try: - granules = collection.search( - temporal=(start_date, end_date), - bounding_box=(-180, -90, 180, 90), # Global coverage - limit=1, - sort_key="-start_date" - ) - - if not granules: - logging.error(f"No SMAP data found from {start_date} to {end_date}") - return None - - granule = granules[0] - logging.info(f"Found SMAP data for date: {granule.date}") - - # Download the data - data = granule.download(auth) - return BytesIO(data) - - except Exception as e: - logging.error(f"Error searching or downloading SMAP data: {e}") - return None - -def extract_soil_moisture(hdf_file, polygon): - """ - Extract soil moisture data for the given polygon from the HDF file. - """ - try: - with h5py.File(hdf_file, 'r') as file: - soil_moisture = file['Soil_Moisture_Retrieval_Data']['soil_moisture'][:] - lat = file['cell_lat'][:] - lon = file['cell_lon'][:] - - shape = Polygon(polygon) - mask = np.zeros_like(soil_moisture, dtype=bool) - - for i in range(soil_moisture.shape[0]): - for j in range(soil_moisture.shape[1]): - if shape.contains(Point(lon[j], lat[i])): - mask[i, j] = True - - valid_data = soil_moisture[mask & (soil_moisture != -9999)] - if len(valid_data) > 0: - return np.mean(valid_data), soil_moisture, lat, lon, mask - else: - logging.warning("No valid soil moisture data found within the polygon") - return None, None, None, None, None - except Exception as e: - logging.error(f"Error extracting soil moisture data: {e}") - return None, None, None, None, None - -def visualize_soil_moisture(polygon, soil_moisture, lat, lon, mask, average_moisture): - if not matplotlib_available: - logging.error("Matplotlib is not installed. Cannot create visualization.") - return - - import matplotlib.pyplot as plt - from matplotlib.patches import Polygon as mplPolygon - - fig, ax = plt.subplots(figsize=(10, 8)) - - # Plot the entire soil moisture data - im = ax.imshow(soil_moisture, cmap='YlGnBu', extent=[lon.min(), lon.max(), lat.min(), lat.max()], - origin='lower', alpha=0.5) - - # Highlight the polygon - poly = mplPolygon(polygon, facecolor='none', edgecolor='red', linewidth=2) - ax.add_patch(poly) - - # Set the extent to focus on the polygon - min_lon, min_lat = np.min(polygon, axis=0) - max_lon, max_lat = np.max(polygon, axis=0) - ax.set_xlim(min_lon - 1, max_lon + 1) - ax.set_ylim(min_lat - 1, max_lat + 1) - - plt.colorbar(im, label='Soil Moisture') - plt.title(f'Soil Moisture Map\nAverage within polygon: {average_moisture:.4f}') - plt.xlabel('Longitude') - plt.ylabel('Latitude') - - plt.show() - -def main(date, lat, lon, visual): - # Set up authentication - auth = Auth() - auth.login() - - # Get and simplify the HUC8 polygon - huc8_polygon = get_huc8_polygon(lat, lon) - if not huc8_polygon: - logging.error("Failed to retrieve HUC8 polygon") - return - - simplified_polygon = simplify_polygon(huc8_polygon) - logging.info(f"Simplified polygon: {simplified_polygon}") - - hdf_file = search_and_download_smap_data(date, auth) - if hdf_file: - soil_moisture, full_soil_moisture, lat_data, lon_data, mask = extract_soil_moisture(hdf_file, simplified_polygon) - if soil_moisture is not None: - logging.info(f"Average soil moisture for the given polygon on or before {date}: {soil_moisture:.4f}") - - if visual: - if matplotlib_available: - visualize_soil_moisture(simplified_polygon, full_soil_moisture, lat_data, lon_data, mask, soil_moisture) - else: - logging.warning("Matplotlib is not installed. Skipping visualization.") - else: - logging.error("Failed to calculate soil moisture") - else: - logging.error("Failed to find or download SMAP data") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Calculate average soil moisture for a HUC8 polygon from SMAP L3 data.') - parser.add_argument('--date', type=lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').date(), required=True, help='Date in YYYY-MM-DD format') - parser.add_argument('--lat', type=float, required=True, help='Latitude of the point within the desired HUC8 polygon') - parser.add_argument('--lon', type=float, required=True, help='Longitude of the point within the desired HUC8 polygon') - parser.add_argument('--visual', action='store_true', help='Enable matplotlib visualization') - args = parser.parse_args() - - main(args.date, args.lat, args.lon, args.visual) \ No newline at end of file