Skip to content

Commit

Permalink
update nasa
Browse files Browse the repository at this point in the history
  • Loading branch information
tmart234 committed Aug 15, 2024
1 parent 7538961 commit 297b845
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 303 deletions.
273 changes: 117 additions & 156 deletions openFlowML/nasa_moisture.py
Original file line number Diff line number Diff line change
@@ -1,186 +1,147 @@
import requests
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from datetime import datetime, timedelta
import warnings
import io
import datetime
import numpy as np
import h5py
from shapely.geometry import Polygon, Point
import logging
import argparse
from io import BytesIO
from earthaccess import Auth, DataCollections
from utils.get_poly import get_huc8_polygon, simplify_polygon

# Set up the plotting theme
sns.set_theme(style="darkgrid")
warnings.filterwarnings("ignore")
# Conditionally import matplotlib
import importlib.util
matplotlib_spec = importlib.util.find_spec("matplotlib")
matplotlib_available = matplotlib_spec is not None

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def search_smap_data(start_date, end_date, bounding_box, bearer_token, short_name="SPL2SMP_E", version="006"):
def search_and_download_smap_data(date, auth):
"""
Searches for SMAP data using the CMR API.
Search for the most recent SMAP L3 data up to the given date and download it.
"""
cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json"
headers = {
"Authorization": f"Bearer {bearer_token}"
}
params = {
"short_name": short_name,
"version": version,
"temporal": f"{start_date}T00:00:00Z,{end_date}T23:59:59Z",
"bounding_box": bounding_box,
"page_size": 100,
"page_num": 1
}
collection = DataCollections.smap_l3_soil_moisture
end_date = date
start_date = end_date - datetime.timedelta(days=7) # Look for data up to a week before the specified date

response = requests.get(cmr_url, headers=headers, params=params)
logging.info(f"CMR URL: {response.url}")
response.raise_for_status()
return response.json()

def get_smap_soil_moisture_data(granule_url, bearer_token):
"""
Retrieves soil moisture data from the SMAP granule URL.
"""
headers = {
"Authorization": f"Bearer {bearer_token}"
}
try:
granules = collection.search(
temporal=(start_date, end_date),
bounding_box=(-180, -90, 180, 90), # Global coverage
limit=1,
sort_key="-start_date"
)

if not granules:
logging.error(f"No SMAP data found from {start_date} to {end_date}")
return None

granule = granules[0]
logging.info(f"Found SMAP data for date: {granule.date}")

# Download the data
data = granule.download(auth)
return BytesIO(data)

response = requests.get(granule_url, headers=headers)
response.raise_for_status()
return response.content
except Exception as e:
logging.error(f"Error searching or downloading SMAP data: {e}")
return None

def debug_hdf5_structure(data):
def extract_soil_moisture(hdf_file, polygon):
"""
Debug function to print the structure of the HDF5 file.
Extract soil moisture data for the given polygon from the HDF file.
"""
with h5py.File(io.BytesIO(data), 'r') as f:
def print_structure(name, obj):
logging.debug(name)
f.visititems(print_structure)
try:
with h5py.File(hdf_file, 'r') as file:
soil_moisture = file['Soil_Moisture_Retrieval_Data']['soil_moisture'][:]
lat = file['cell_lat'][:]
lon = file['cell_lon'][:]

shape = Polygon(polygon)
mask = np.zeros_like(soil_moisture, dtype=bool)

for i in range(soil_moisture.shape[0]):
for j in range(soil_moisture.shape[1]):
if shape.contains(Point(lon[j], lat[i])):
mask[i, j] = True

valid_data = soil_moisture[mask & (soil_moisture != -9999)]
if len(valid_data) > 0:
return np.mean(valid_data), soil_moisture, lat, lon, mask
else:
logging.warning("No valid soil moisture data found within the polygon")
return None, None, None, None, None
except Exception as e:
logging.error(f"Error extracting soil moisture data: {e}")
return None, None, None, None, None

def visualize_soil_moisture(polygon, soil_moisture, lat, lon, mask, average_moisture):
if not matplotlib_available:
logging.error("Matplotlib is not installed. Cannot create visualization.")
return

def process_smap_data(data):
"""
Processes the SMAP data from HDF5 format into a pandas DataFrame.
"""
with h5py.File(io.BytesIO(data), 'r') as f:
# Debug the structure of the HDF5 file
logging.info("HDF5 file structure:")
f.visititems(lambda name, obj: logging.info(name))

# Extract data from Soil_Moisture_Retrieval_Data group
soil_moisture_data = f['Soil_Moisture_Retrieval_Data']['soil_moisture_option3'][:]
latitude_data = f['Soil_Moisture_Retrieval_Data']['latitude'][:]
longitude_data = f['Soil_Moisture_Retrieval_Data']['longitude'][:]
date_data = f['Soil_Moisture_Retrieval_Data']['tb_time_utc'][:].astype('U')

# Attempt to extract quality flag data if it exists
quality_flag_data = None
if 'retrieval_qual_flag_option3' in f['Soil_Moisture_Retrieval_Data']:
quality_flag_data = f['Soil_Moisture_Retrieval_Data']['retrieval_qual_flag_option3'][:]
else:
quality_flag_data = f['Soil_Moisture_Retrieval_Data']['retrieval_qual_flag'][:]
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon as mplPolygon

df = pd.DataFrame({
'date': date_data,
'latitude': latitude_data,
'longitude': longitude_data,
'soil_moisture': soil_moisture_data,
'quality_flag': quality_flag_data
})
fig, ax = plt.subplots(figsize=(10, 8))

# Filter out invalid soil moisture values and ensure recommended quality
df = df[(df['soil_moisture'] != -9999.0) & (df['quality_flag'] & 0b1 == 0)]
# Plot the entire soil moisture data
im = ax.imshow(soil_moisture, cmap='YlGnBu', extent=[lon.min(), lon.max(), lat.min(), lat.max()],
origin='lower', alpha=0.5)

return df

def clean_and_convert_dates(date_series):
"""
Cleans and converts a series of date strings to datetime objects, filtering out invalid formats.
"""
def try_parsing_date(text):
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S%z"):
try:
return datetime.strptime(text, fmt)
except ValueError:
continue
return None

cleaned_dates = date_series.apply(try_parsing_date)
return cleaned_dates.dropna()

def plot_soil_moisture(df):
"""
Plots the soil moisture data.
"""
df['date'] = clean_and_convert_dates(df['date'])
# Highlight the polygon
poly = mplPolygon(polygon, facecolor='none', edgecolor='red', linewidth=2)
ax.add_patch(poly)

if df.empty:
logging.info("No valid soil moisture data available to plot.")
return
# Set the extent to focus on the polygon
min_lon, min_lat = np.min(polygon, axis=0)
max_lon, max_lat = np.max(polygon, axis=0)
ax.set_xlim(min_lon - 1, max_lon + 1)
ax.set_ylim(min_lat - 1, max_lat + 1)

plt.figure(figsize=(14, 7))
plt.scatter(df['date'], df['soil_moisture'], marker='o')
plt.title('Daily Soil Moisture')
plt.xlabel('Date')
plt.ylabel('Soil Moisture')
plt.grid(True)
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.gcf().autofmt_xdate()
plt.show()

def get_smap_timeseries(bearer_token, start_date, end_date, latitude, longitude):
"""
Main function to retrieve and process SMAP soil moisture data.
"""
bounding_box = f"{longitude-0.1},{latitude-0.1},{longitude+0.1},{latitude+0.1}"
plt.colorbar(im, label='Soil Moisture')
plt.title(f'Soil Moisture Map\nAverage within polygon: {average_moisture:.4f}')
plt.xlabel('Longitude')
plt.ylabel('Latitude')

logging.info(f"Bounding Box: {bounding_box}")
logging.info(f"Start Date: {start_date}")
logging.info(f"End Date: {end_date}")

search_results = search_smap_data(start_date, end_date, bounding_box, bearer_token)

# Check if any granules are found
if 'feed' in search_results and 'entry' in search_results['feed'] and len(search_results['feed']['entry']) > 0:
granule_url = search_results['feed']['entry'][0]['links'][0]['href']
logging.info(f"Granule URL: {granule_url}")

# Get soil moisture data from the first granule (for simplicity)
soil_moisture_data = get_smap_soil_moisture_data(granule_url, bearer_token)

# Debug the HDF5 file structure
debug_hdf5_structure(soil_moisture_data)
plt.show()

# Process the data
df_soil_moisture = process_smap_data(soil_moisture_data)
logging.info(df_soil_moisture.head())
def main(date, lat, lon, visual):
# Set up authentication
auth = Auth()
auth.login()

# Plot the data
plot_soil_moisture(df_soil_moisture)

return df_soil_moisture
else:
logging.warning("No granules found for the given search criteria.")
return pd.DataFrame()
# Get and simplify the HUC8 polygon
huc8_polygon = get_huc8_polygon(lat, lon)
if not huc8_polygon:
logging.error("Failed to retrieve HUC8 polygon")
return

def main(bearer_token, start_date, end_date, latitude, longitude):
df_soil_moisture = get_smap_timeseries(bearer_token, start_date, end_date, latitude, longitude)
if not df_soil_moisture.empty:
logging.info("Soil moisture data retrieved successfully.")
simplified_polygon = simplify_polygon(huc8_polygon)
logging.info(f"Simplified polygon: {simplified_polygon}")

hdf_file = search_and_download_smap_data(date, auth)
if hdf_file:
soil_moisture, full_soil_moisture, lat_data, lon_data, mask = extract_soil_moisture(hdf_file, simplified_polygon)
if soil_moisture is not None:
logging.info(f"Average soil moisture for the given polygon on or before {date}: {soil_moisture:.4f}")

if visual:
if matplotlib_available:
visualize_soil_moisture(simplified_polygon, full_soil_moisture, lat_data, lon_data, mask, soil_moisture)
else:
logging.warning("Matplotlib is not installed. Skipping visualization.")
else:
logging.error("Failed to calculate soil moisture")
else:
logging.warning("Failed to retrieve soil moisture data.")
return df_soil_moisture
logging.error("Failed to find or download SMAP data")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Fetch SMAP soil moisture data.')
parser.add_argument('--bearer_token', type=str, required=True, help='Bearer token for authentication')
parser.add_argument('--start_date', type=str, required=True, help='Start date in the format YYYY-MM-DD')
parser.add_argument('--end_date', type=str, required=True, help='End date in the format YYYY-MM-DD')
parser.add_argument('--latitude', type=float, required=True, help='Latitude of the location')
parser.add_argument('--longitude', type=float, required=True, help='Longitude of the location')
parser = argparse.ArgumentParser(description='Calculate average soil moisture for a HUC8 polygon from SMAP L3 data.')
parser.add_argument('--date', type=lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').date(), required=True, help='Date in YYYY-MM-DD format')
parser.add_argument('--lat', type=float, required=True, help='Latitude of the point within the desired HUC8 polygon')
parser.add_argument('--lon', type=float, required=True, help='Longitude of the point within the desired HUC8 polygon')
parser.add_argument('--visual', action='store_true', help='Enable matplotlib visualization')
args = parser.parse_args()

main(args.bearer_token, args.start_date, args.end_date, args.latitude, args.longitude)
main(args.date, args.lat, args.lon, args.visual)
Loading

0 comments on commit 297b845

Please sign in to comment.