From b75ae2f3b3563878382e202b36c5ae6668434b99 Mon Sep 17 00:00:00 2001 From: Binita Date: Mon, 4 Nov 2024 15:32:27 -0600 Subject: [PATCH] added new check for opendap for umm-g and echo-g --- pyQuARC/code/custom_validator.py | 50 +++++++++++++++++++++++++++++ pyQuARC/schemas/check_messages.json | 8 +++++ pyQuARC/schemas/checks.json | 5 +++ pyQuARC/schemas/rule_mapping.json | 28 ++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index bf3620d1..1973ca75 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -277,3 +277,53 @@ def count_check(count, values, key): items = [items] num_items = len(items) return {"valid": int(count) == num_items, "value": (count, num_items)} + + @staticmethod + def opendap_link_check(related_urls, key, extra=None): + """ + Checks if the related_urls contains an OPeNDAP link with the type "OPENDAP DATA" or URL containing "opendap". + + Args: + related_urls (list): The related_urls field of the object, expected to be a list of URL objects. + key (dict): A dictionary with "type" and "url_keyword" keys for the checks. + extra (optional): An additional argument to match the expected function call signature. This argument is ignored. + + Returns: + dict: A validation result indicating whether a valid OPeNDAP link is present and the link itself if found. + """ + + # If related_urls is None or not provided, initialize it as an empty list + if not related_urls: + related_urls = [] + + # If related_urls is not a list, assume it's a single URL string and wrap it in a list of one dictionary + elif isinstance(related_urls, str): + related_urls = [{"URL": related_urls, "Type": key.get("type", "OPENDAP DATA")}] + + # Default return object if no valid OPeNDAP link is found + return_obj = { + "valid": False, + "value": "None" + } + + # Extract type and keyword from key for clearer conditions + type_to_check = key.get("type", "OPENDAP DATA").upper() + url_keyword = key.get("url_keyword", "opendap").lower() + + # Process each URL object in the list + for url_obj in related_urls: + # Ensure that url_obj is a dictionary before accessing its fields + if not isinstance(url_obj, dict): + continue + + # Check for "opendap" in the URL + url_value = url_obj.get("URL", "").lower() + type_field = url_obj.get("Type", "").upper() + + # Check if the URL contains "opendap" or if the Type matches "OPENDAP DATA" + if url_keyword in url_value or type_field == type_to_check: + return_obj["valid"] = True + return_obj["value"] = url_obj.get("URL", "None") + break + + return return_obj diff --git a/pyQuARC/schemas/check_messages.json b/pyQuARC/schemas/check_messages.json index 0b8b38c8..18bde23f 100644 --- a/pyQuARC/schemas/check_messages.json +++ b/pyQuARC/schemas/check_messages.json @@ -1070,5 +1070,13 @@ "url": "https://wiki.earthdata.nasa.gov/display/CMR/Spatial+Extent" }, "remediation": "Recommend providing the horizontal pixel resolution, if applicable. If provided, this information will be indexed in the EDSC 'Horizontal Data Resolution' search facet which allows users to search by spatial resolution." + }, + "opendap_link_check": { + "failure": "No OPeNDAP URL is provided in the granule fields. An OPeNDAP link is recommended for data access.", + "help": { + "message": "OPeNDAP links allow for direct data access through the OPeNDAP protocol.", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs" + }, + "remediation": "Recommend providing an OPeNDAP in the granule's Online Resources or Related URLs fields for enhanced data accessibility." } } \ No newline at end of file diff --git a/pyQuARC/schemas/checks.json b/pyQuARC/schemas/checks.json index 778f4da3..ef303aa6 100644 --- a/pyQuARC/schemas/checks.json +++ b/pyQuARC/schemas/checks.json @@ -298,5 +298,10 @@ "data_type": "custom", "check_function": "count_check", "available": true + }, + "opendap_link_check": { + "data_type": "custom", + "check_function": "opendap_link_check", + "available": true } } diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index 2e3acc41..b6aab68e 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -3745,6 +3745,34 @@ "severity": "error", "check_id": "string_compare" }, + "opendap_link_check": { + "rule_name": "OPeNDAP Link Presence Check", + "fields_to_apply": { + "echo-g": [ + { + "fields": [ + "Granule/OnlineResources/OnlineResource/URL" + ] + } + ], + "umm-g": [ + { + "fields": [ + "RelatedURLs/URL" + ] + } + ] + }, + "data": [ + { + "type": "OPENDAP DATA", + "url_keyword": "opendap" + } + ], + "relation": "contains", + "check_id": "opendap_link_check", + "severity": "warning" + }, "location_keyword_presence_check": { "rule_name": "Location Keyword Presence Check", "fields_to_apply": {