diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index c94189a7c..c8b2a6b48 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,4 +1,4 @@ # These are supported funding model platforms #github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] -custom: ['https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=WT27AS28UFSNW&source=url'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] +custom: ['https://github.com/geopython/pygeoapi/wiki/Sponsorship'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4d98b4478..3805b9aa0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -94,7 +94,6 @@ jobs: pip3 install -r requirements-manager.txt pip3 install -r requirements-django.txt python3 setup.py install - pip3 install --upgrade "sqlalchemy<2" pip3 install --global-option=build_ext --global-option="-I/usr/include/gdal" GDAL==`gdal-config --version` #pip3 install --upgrade rasterio==1.1.8 - name: setup test data ⚙️ diff --git a/.github/workflows/vulnerabilities.yml b/.github/workflows/vulnerabilities.yml index 3252252ed..d8ac5199c 100644 --- a/.github/workflows/vulnerabilities.yml +++ b/.github/workflows/vulnerabilities.yml @@ -22,7 +22,7 @@ jobs: working-directory: . steps: - name: Checkout pygeoapi - uses: actions/checkout@v4 + uses: actions/checkout@master - name: Scan vulnerabilities with trivy uses: aquasecurity/trivy-action@master with: @@ -37,6 +37,9 @@ jobs: docker buildx build -t ${{ github.repository }}:${{ github.sha }} --platform linux/amd64 --no-cache -f Dockerfile . - name: Scan locally built Docker image for vulnerabilities with trivy uses: aquasecurity/trivy-action@master + env: + TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2 + TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1 with: scan-type: image exit-code: 1 diff --git a/docs/source/_static/openapi_admin.png b/docs/source/_static/openapi_admin.png new file mode 100644 index 000000000..8fd9af443 Binary files /dev/null and b/docs/source/_static/openapi_admin.png differ diff --git a/docs/source/admin-api.rst b/docs/source/admin-api.rst index 346df0a91..19ce12b9a 100644 --- a/docs/source/admin-api.rst +++ b/docs/source/admin-api.rst @@ -14,6 +14,15 @@ The API is enabled with the following server configuration: server: admin: true # boolean on whether to enable Admin API. +.. note:: + + If you generate the OpenAPI definition after enabling the admin API, the admin routes will be exposed on ``/openapi`` + + .. image:: /_static/openapi_admin.png + :alt: admin routes + :align: center + + Access control -------------- diff --git a/docs/source/conf.py b/docs/source/conf.py index dff89f13c..6a9e11d61 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -112,7 +112,7 @@ def __getattr__(cls, name): # built documents. # # The short X.Y version. -version = '0.18.dev0' +version = '0.19.dev0' # The full version, including alpha/beta/rc tags. release = version diff --git a/docs/source/data-publishing/ogcapi-features.rst b/docs/source/data-publishing/ogcapi-features.rst index 00d48a020..12478c1d2 100644 --- a/docs/source/data-publishing/ogcapi-features.rst +++ b/docs/source/data-publishing/ogcapi-features.rst @@ -145,7 +145,11 @@ To publish an ESRI `Feature Service`_ or `Map Service`_ specify the URL for the * ``id_field`` will often be ``OBJECTID``, ``objectid``, or ``FID``. * If the map or feature service is not shared publicly, the ``username`` and ``password`` fields can be set in the - configuration to authenticate into the service. + configuration to authenticate to the service. +* If the map or feature service is self-hosted and not shared publicly, the ``token_service`` and optional ``referer`` fields + can be set in the configuration to authenticate to the service. + +To publish from an ArcGIS online hosted service: .. code-block:: yaml @@ -158,6 +162,24 @@ To publish an ESRI `Feature Service`_ or `Map Service`_ specify the URL for the crs: 4326 # Optional crs (default is EPSG:4326) username: username # Optional ArcGIS username password: password # Optional ArcGIS password + token_service: https://your.server.com/arcgis/sharing/rest/generateToken # optional URL to your generateToken service + referer: https://your.server.com # optional referer, defaults to https://www.arcgis.com if not set + +To publish from a self-hosted service that is not publicly accessible, the ``token_service`` field is required: + +.. code-block:: yaml + + providers: + - type: feature + name: ESRI + data: https://your.server.com/arcgis/rest/services/your-layer/MapServer/0 + id_field: objectid + time_field: date_in_your_device_time_zone # Optional time field + crs: 4326 # Optional crs (default is EPSG:4326) + username: username # Optional ArcGIS username + password: password # Optional ArcGIS password + token_service: https://your.server.com/arcgis/sharing/rest/generateToken # Optional url to your generateToken service + referer: https://your.server.com # Optional referer, defaults to https://www.arcgis.com if not set GeoJSON ^^^^^^^ diff --git a/docs/source/security.rst b/docs/source/security.rst index 706ae6d10..df81f8a18 100644 --- a/docs/source/security.rst +++ b/docs/source/security.rst @@ -14,4 +14,5 @@ as required. The following projects provide security frameworks atop pygeoapi: * `fastgeoapi `_ -* `pygeoapi-auth `_ +* `pygeoapi-auth-deployment `_ +* `pygeoapi-auth `_ (Python package for use along with pygeoapi-auth-deployment) diff --git a/pygeoapi/__init__.py b/pygeoapi/__init__.py index e66240e72..9c5d35688 100644 --- a/pygeoapi/__init__.py +++ b/pygeoapi/__init__.py @@ -30,7 +30,7 @@ # # ================================================================= -__version__ = '0.18.dev0' +__version__ = '0.19.dev0' import click try: diff --git a/pygeoapi/api/itemtypes.py b/pygeoapi/api/itemtypes.py index 661f5cd80..008b28cb7 100644 --- a/pygeoapi/api/itemtypes.py +++ b/pygeoapi/api/itemtypes.py @@ -121,29 +121,22 @@ def get_collection_queryables(api: API, request: Union[APIRequest, Any], HTTPStatus.NOT_FOUND, headers, request.format, 'NotFound', msg) LOGGER.debug('Creating collection queryables') - try: - LOGGER.debug('Loading feature provider') - p = load_plugin('provider', get_provider_by_type( - api.config['resources'][dataset]['providers'], 'feature')) - except ProviderTypeError: + + p = None + for pt in ['feature', 'coverage', 'record']: try: - LOGGER.debug('Loading coverage provider') + LOGGER.debug(f'Loading {pt} provider') p = load_plugin('provider', get_provider_by_type( - api.config['resources'][dataset]['providers'], 'coverage')) # noqa + api.config['resources'][dataset]['providers'], pt)) + break except ProviderTypeError: - LOGGER.debug('Loading record provider') - p = load_plugin('provider', get_provider_by_type( - api.config['resources'][dataset]['providers'], 'record')) - finally: - msg = 'queryables not available for this collection' - return api.get_exception( - HTTPStatus.BAD_REQUEST, headers, request.format, - 'NoApplicableError', msg) + LOGGER.debug(f'Providing type {pt} not found') - except ProviderGenericError as err: + if p is None: + msg = 'queryables not available for this collection' return api.get_exception( - err.http_status_code, headers, request.format, - err.ogc_exception_code, err.message) + HTTPStatus.BAD_REQUEST, headers, request.format, + 'NoApplicableError', msg) queryables = { 'type': 'object', diff --git a/pygeoapi/flask_app.py b/pygeoapi/flask_app.py index ecc3e53ce..d3a59d602 100644 --- a/pygeoapi/flask_app.py +++ b/pygeoapi/flask_app.py @@ -279,11 +279,7 @@ def collection_items(collection_id, item_id=None): """ if item_id is None: - if request.method == 'GET': # list items - return execute_from_flask(itemtypes_api.get_collection_items, - request, collection_id, - skip_valid_check=True) - elif request.method == 'POST': # filter or manage items + if request.method == 'POST': # filter or manage items if request.content_type is not None: if request.content_type == 'application/geo+json': return execute_from_flask( @@ -298,6 +294,10 @@ def collection_items(collection_id, item_id=None): return execute_from_flask( itemtypes_api.manage_collection_item, request, 'options', collection_id, skip_valid_check=True) + else: # GET: list items + return execute_from_flask(itemtypes_api.get_collection_items, + request, collection_id, + skip_valid_check=True) elif request.method == 'DELETE': return execute_from_flask(itemtypes_api.manage_collection_item, diff --git a/pygeoapi/openapi.py b/pygeoapi/openapi.py index 3cf5c0b8f..fe390b742 100644 --- a/pygeoapi/openapi.py +++ b/pygeoapi/openapi.py @@ -134,6 +134,52 @@ def gen_response_object(description: str, media_type: str, return response +def gen_contact(cfg: dict) -> dict: + """ + Generates an OpenAPI contact object with OGC extensions + based on OGC API - Records contact + + :param cfg: `dict` of configuration + + :returns: `dict` of OpenAPI contact object + """ + + contact = { + 'name': cfg['metadata']['provider']['name'], + 'url': cfg['metadata']['provider']['url'], + 'email': cfg['metadata']['contact']['email'] + } + + contact['x-ogc-serviceContact'] = { + 'name': cfg['metadata']['contact']['name'], + 'position': cfg['metadata']['contact']['position'], + 'addresses': [{ + 'deliveryPoint': [cfg['metadata']['contact']['address']], + 'city': cfg['metadata']['contact']['city'], + 'administrativeArea': cfg['metadata']['contact']['stateorprovince'], # noqa + 'postalCode': cfg['metadata']['contact']['postalcode'], + 'country': cfg['metadata']['contact']['country'] + }], + 'phones': [{ + 'type': 'main', 'value': cfg['metadata']['contact']['phone'] + }, { + 'type': 'fax', 'value': cfg['metadata']['contact']['fax'] + }], + 'emails': [{ + 'value': cfg['metadata']['contact']['email'] + }], + 'contactInstructions': cfg['metadata']['contact']['instructions'], + 'links': [{ + 'type': 'text/html', + 'href': cfg['metadata']['contact']['url'] + }], + 'hoursOfService': cfg['metadata']['contact']['hours'], + 'roles': [cfg['metadata']['contact']['role']] + } + + return contact + + def get_oas_30(cfg: dict, fail_on_invalid_collection: bool = True) -> dict: """ Generates an OpenAPI 3.0 Document @@ -167,11 +213,7 @@ def get_oas_30(cfg: dict, fail_on_invalid_collection: bool = True) -> dict: 'x-keywords': l10n.translate(cfg['metadata']['identification']['keywords'], locale_), # noqa 'termsOfService': cfg['metadata']['identification']['terms_of_service'], - 'contact': { - 'name': cfg['metadata']['provider']['name'], - 'url': cfg['metadata']['provider']['url'], - 'email': cfg['metadata']['contact']['email'] - }, + 'contact': gen_contact(cfg), 'license': { 'name': cfg['metadata']['license']['name'], 'url': cfg['metadata']['license']['url'] diff --git a/pygeoapi/process/manager/dummy.py b/pygeoapi/process/manager/dummy.py index 7c3a7037b..6528f53a7 100644 --- a/pygeoapi/process/manager/dummy.py +++ b/pygeoapi/process/manager/dummy.py @@ -116,7 +116,8 @@ def execute_process( self._send_in_progress_notification(subscriber) processor = self.get_processor(process_id) try: - jfmt, outputs = processor.execute(data_dict) + jfmt, outputs = processor.execute( + data_dict, outputs=requested_outputs) current_status = JobStatus.successful self._send_success_notification(subscriber, outputs) except Exception as err: diff --git a/pygeoapi/process/manager/mongodb_.py b/pygeoapi/process/manager/mongodb_.py index 2b64806cf..44bce6dbe 100644 --- a/pygeoapi/process/manager/mongodb_.py +++ b/pygeoapi/process/manager/mongodb_.py @@ -32,6 +32,7 @@ from pymongo import MongoClient +from pygeoapi.api import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.base import ( JobNotFoundError, JobResultNotFoundError, @@ -151,8 +152,16 @@ def get_job_result(self, job_id): if entry["status"] != "successful": LOGGER.info("JOBMANAGER - job not finished or failed") return (None,) - with open(entry["location"], "r") as file: - data = json.load(file) + if not entry["location"]: + LOGGER.warning(f"job {job_id!r} - unknown result location") + raise JobResultNotFoundError() + if entry["mimetype"] in (None, FORMAT_TYPES[F_JSON], + FORMAT_TYPES[F_JSONLD]): + with open(entry["location"], "r") as file: + data = json.load(file) + else: + with open(entry["location"], "rb") as file: + data = file.read() LOGGER.info("JOBMANAGER - MongoDB job result queried") return entry["mimetype"], data except Exception as err: diff --git a/pygeoapi/process/manager/postgresql.py b/pygeoapi/process/manager/postgresql.py index 72f3b75c4..16d25ab8f 100644 --- a/pygeoapi/process/manager/postgresql.py +++ b/pygeoapi/process/manager/postgresql.py @@ -49,6 +49,7 @@ from sqlalchemy.engine import make_url from sqlalchemy.orm import Session +from pygeoapi.api import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.base import ( JobNotFoundError, JobResultNotFoundError, @@ -292,8 +293,13 @@ def get_job_result(self, job_id: str) -> Tuple[str, Any]: else: try: location = Path(location) - with location.open(encoding='utf-8') as fh: - result = json.load(fh) + if mimetype in (None, FORMAT_TYPES[F_JSON], + FORMAT_TYPES[F_JSONLD]): + with location.open('r', encoding='utf-8') as fh: + result = json.load(fh) + else: + with location.open('rb') as fh: + result = fh.read() except (TypeError, FileNotFoundError, json.JSONDecodeError): raise JobResultNotFoundError() else: diff --git a/pygeoapi/process/manager/tinydb_.py b/pygeoapi/process/manager/tinydb_.py index 2f022a33c..b04d29a49 100644 --- a/pygeoapi/process/manager/tinydb_.py +++ b/pygeoapi/process/manager/tinydb_.py @@ -37,6 +37,7 @@ import tinydb from filelock import FileLock +from pygeoapi.api import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.base import ( JobNotFoundError, JobResultNotFoundError, @@ -211,8 +212,13 @@ def get_job_result(self, job_id: str) -> Tuple[str, Any]: else: try: location = Path(location) - with location.open('r', encoding='utf-8') as filehandler: - result = json.load(filehandler) + if mimetype in (None, FORMAT_TYPES[F_JSON], + FORMAT_TYPES[F_JSONLD]): + with location.open('r', encoding='utf-8') as filehandler: + result = json.load(filehandler) + else: + with location.open('rb') as filehandler: + result = filehandler.read() except (TypeError, FileNotFoundError, json.JSONDecodeError): raise JobResultNotFoundError() else: diff --git a/pygeoapi/provider/esri.py b/pygeoapi/provider/esri.py index 8179a705b..47d74e2b9 100644 --- a/pygeoapi/provider/esri.py +++ b/pygeoapi/provider/esri.py @@ -62,8 +62,9 @@ def __init__(self, provider_def): self.crs = provider_def.get('crs', '4326') self.username = provider_def.get('username') self.password = provider_def.get('password') + self.token_url = provider_def.get('token_service', ARCGIS_URL) + self.token_referer = provider_def.get('referer', GENERATE_TOKEN_URL) self.token = None - self.session = Session() self.login() @@ -194,16 +195,15 @@ def login(self): msg = 'Missing ESRI login information, not setting token' LOGGER.debug(msg) return - params = { 'f': 'pjson', 'username': self.username, 'password': self.password, - 'referer': ARCGIS_URL + 'referer': self.token_referer } LOGGER.debug('Logging in') - with self.session.post(GENERATE_TOKEN_URL, data=params) as r: + with self.session.post(self.token_url, data=params) as r: self.token = r.json().get('token') # https://enterprise.arcgis.com/en/server/latest/administer/windows/about-arcgis-tokens.htm self.session.headers.update({ diff --git a/pygeoapi/provider/postgresql.py b/pygeoapi/provider/postgresql.py index d56bb7878..eb4b21454 100644 --- a/pygeoapi/provider/postgresql.py +++ b/pygeoapi/provider/postgresql.py @@ -62,7 +62,8 @@ import shapely from sqlalchemy import create_engine, MetaData, PrimaryKeyConstraint, asc, desc from sqlalchemy.engine import URL -from sqlalchemy.exc import InvalidRequestError, OperationalError +from sqlalchemy.exc import ConstraintColumnNotFoundError, \ + InvalidRequestError, OperationalError from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm import Session, load_only from sqlalchemy.sql.expression import and_ @@ -515,7 +516,7 @@ def get_table_model( sqlalchemy_table_def = metadata.tables[f'{schema}.{table_name}'] try: sqlalchemy_table_def.append_constraint(PrimaryKeyConstraint(id_field)) - except KeyError: + except (ConstraintColumnNotFoundError, KeyError): raise ProviderQueryError( f"No such id_field column ({id_field}) on {schema}.{table_name}.") diff --git a/pygeoapi/provider/rasterio_.py b/pygeoapi/provider/rasterio_.py index 69d2dbca8..3b0fbc2c7 100644 --- a/pygeoapi/provider/rasterio_.py +++ b/pygeoapi/provider/rasterio_.py @@ -80,6 +80,8 @@ def get_fields(self): dtype2 = dtype if dtype.startswith('float'): dtype2 = 'number' + elif dtype.startswith('int'): + dtype2 = 'integer' self._fields[i2] = { 'title': name, @@ -240,16 +242,15 @@ def query(self, properties=[], subsets={}, bbox=None, bbox_crs=4326, out_meta['units'] = _data.units LOGGER.debug('Serializing data in memory') - with MemoryFile() as memfile: - with memfile.open(**out_meta) as dest: - dest.write(out_image) - - if format_ == 'json': - LOGGER.debug('Creating output in CoverageJSON') - out_meta['bands'] = args['indexes'] - return self.gen_covjson(out_meta, out_image) - - else: # return data in native format + if format_ == 'json': + LOGGER.debug('Creating output in CoverageJSON') + out_meta['bands'] = args['indexes'] + return self.gen_covjson(out_meta, out_image) + + else: # return data in native format + with MemoryFile() as memfile: + with memfile.open(**out_meta) as dest: + dest.write(out_image) LOGGER.debug('Returning data in native format') return memfile.read() diff --git a/pygeoapi/provider/xarray_.py b/pygeoapi/provider/xarray_.py index 585879282..9ed2726b1 100644 --- a/pygeoapi/provider/xarray_.py +++ b/pygeoapi/provider/xarray_.py @@ -85,13 +85,19 @@ def __init__(self, provider_def): else: data_to_open = self.data - self._data = open_func(data_to_open) + try: + self._data = open_func(data_to_open) + except ValueError as err: + # Manage non-cf-compliant time dimensions + if 'time' in str(err): + self._data = open_func(self.data, decode_times=False) + else: + raise err + self.storage_crs = self._parse_storage_crs(provider_def) self._coverage_properties = self._get_coverage_properties() - self.axes = [self._coverage_properties['x_axis_label'], - self._coverage_properties['y_axis_label'], - self._coverage_properties['time_axis_label']] + self.axes = self._coverage_properties['axes'] self.get_fields() except Exception as err: @@ -101,15 +107,17 @@ def __init__(self, provider_def): def get_fields(self): if not self._fields: for key, value in self._data.variables.items(): - if len(value.shape) >= 3: + if key not in self._data.coords: LOGGER.debug('Adding variable') dtype = value.dtype if dtype.name.startswith('float'): dtype = 'number' + elif dtype.name.startswith('int'): + dtype = 'integer' self._fields[key] = { 'type': dtype, - 'title': value.attrs['long_name'], + 'title': value.attrs.get('long_name'), 'x-ogc-unit': value.attrs.get('units') } @@ -142,9 +150,9 @@ def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326, data = self._data[[*properties]] - if any([self._coverage_properties['x_axis_label'] in subsets, - self._coverage_properties['y_axis_label'] in subsets, - self._coverage_properties['time_axis_label'] in subsets, + if any([self._coverage_properties.get('x_axis_label') in subsets, + self._coverage_properties.get('y_axis_label') in subsets, + self._coverage_properties.get('time_axis_label') in subsets, datetime_ is not None]): LOGGER.debug('Creating spatio-temporal subset') @@ -163,18 +171,36 @@ def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326, self._coverage_properties['y_axis_label'] in subsets, len(bbox) > 0]): msg = 'bbox and subsetting by coordinates are exclusive' - LOGGER.warning(msg) + LOGGER.error(msg) raise ProviderQueryError(msg) else: - query_params[self._coverage_properties['x_axis_label']] = \ - slice(bbox[0], bbox[2]) - query_params[self._coverage_properties['y_axis_label']] = \ - slice(bbox[1], bbox[3]) + x_axis_label = self._coverage_properties['x_axis_label'] + x_coords = data.coords[x_axis_label] + if x_coords.values[0] > x_coords.values[-1]: + LOGGER.debug( + 'Reversing slicing of x axis from high to low' + ) + query_params[x_axis_label] = slice(bbox[2], bbox[0]) + else: + query_params[x_axis_label] = slice(bbox[0], bbox[2]) + y_axis_label = self._coverage_properties['y_axis_label'] + y_coords = data.coords[y_axis_label] + if y_coords.values[0] > y_coords.values[-1]: + LOGGER.debug( + 'Reversing slicing of y axis from high to low' + ) + query_params[y_axis_label] = slice(bbox[3], bbox[1]) + else: + query_params[y_axis_label] = slice(bbox[1], bbox[3]) LOGGER.debug('bbox_crs is not currently handled') if datetime_ is not None: - if self._coverage_properties['time_axis_label'] in subsets: + if self._coverage_properties['time_axis_label'] is None: + msg = 'Dataset does not contain a time axis' + LOGGER.error(msg) + raise ProviderQueryError(msg) + elif self._coverage_properties['time_axis_label'] in subsets: msg = 'datetime and temporal subsetting are exclusive' LOGGER.error(msg) raise ProviderQueryError(msg) @@ -196,13 +222,15 @@ def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326, LOGGER.warning(err) raise ProviderQueryError(err) - if (any([data.coords[self.x_field].size == 0, - data.coords[self.y_field].size == 0, - data.coords[self.time_field].size == 0])): + if any(size == 0 for size in data.sizes.values()): msg = 'No data found' LOGGER.warning(msg) raise ProviderNoDataError(msg) + if format_ == 'json': + # json does not support float32 + data = _convert_float32_to_float64(data) + out_meta = { 'bbox': [ data.coords[self.x_field].values[0], @@ -210,18 +238,20 @@ def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326, data.coords[self.x_field].values[-1], data.coords[self.y_field].values[-1] ], - "time": [ - _to_datetime_string(data.coords[self.time_field].values[0]), - _to_datetime_string(data.coords[self.time_field].values[-1]) - ], "driver": "xarray", "height": data.sizes[self.y_field], "width": data.sizes[self.x_field], - "time_steps": data.sizes[self.time_field], "variables": {var_name: var.attrs for var_name, var in data.variables.items()} } + if self.time_field is not None: + out_meta['time'] = [ + _to_datetime_string(data.coords[self.time_field].values[0]), + _to_datetime_string(data.coords[self.time_field].values[-1]), + ] + out_meta["time_steps"] = data.sizes[self.time_field] + LOGGER.debug('Serializing data in memory') if format_ == 'json': LOGGER.debug('Creating output in CoverageJSON') @@ -230,9 +260,11 @@ def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326, LOGGER.debug('Returning data in native zarr format') return _get_zarr_data(data) else: # return data in native format - with tempfile.TemporaryFile() as fp: + with tempfile.NamedTemporaryFile() as fp: LOGGER.debug('Returning data in native NetCDF format') - fp.write(data.to_netcdf()) + data.to_netcdf( + fp.name + ) # we need to pass a string to be able to use the "netcdf4" engine # noqa fp.seek(0) return fp.read() @@ -249,7 +281,6 @@ def gen_covjson(self, metadata, data, fields): LOGGER.debug('Creating CoverageJSON domain') minx, miny, maxx, maxy = metadata['bbox'] - mint, maxt = metadata['time'] selected_fields = { key: value for key, value in self.fields.items() @@ -285,11 +316,6 @@ def gen_covjson(self, metadata, data, fields): 'start': maxy, 'stop': miny, 'num': metadata['height'] - }, - self.time_field: { - 'start': mint, - 'stop': maxt, - 'num': metadata['time_steps'] } }, 'referencing': [{ @@ -304,6 +330,14 @@ def gen_covjson(self, metadata, data, fields): 'ranges': {} } + if self.time_field is not None: + mint, maxt = metadata['time'] + cj['domain']['axes'][self.time_field] = { + 'start': mint, + 'stop': maxt, + 'num': metadata['time_steps'], + } + for key, value in selected_fields.items(): parameter = { 'type': 'Parameter', @@ -322,7 +356,6 @@ def gen_covjson(self, metadata, data, fields): cj['parameters'][key] = parameter data = data.fillna(None) - data = _convert_float32_to_float64(data) try: for key, value in selected_fields.items(): @@ -330,13 +363,18 @@ def gen_covjson(self, metadata, data, fields): 'type': 'NdArray', 'dataType': value['type'], 'axisNames': [ - 'y', 'x', self._coverage_properties['time_axis_label'] + 'y', 'x' ], 'shape': [metadata['height'], - metadata['width'], - metadata['time_steps']] + metadata['width']] } cj['ranges'][key]['values'] = data[key].values.flatten().tolist() # noqa + + if self.time_field is not None: + cj['ranges'][key]['axisNames'].append( + self._coverage_properties['time_axis_label'] + ) + cj['ranges'][key]['shape'].append(metadata['time_steps']) except IndexError as err: LOGGER.warning(err) raise ProviderQueryError('Invalid query parameter') @@ -382,31 +420,37 @@ def _get_coverage_properties(self): self._data.coords[self.x_field].values[-1], self._data.coords[self.y_field].values[-1], ], - 'time_range': [ - _to_datetime_string( - self._data.coords[self.time_field].values[0] - ), - _to_datetime_string( - self._data.coords[self.time_field].values[-1] - ) - ], 'bbox_crs': 'http://www.opengis.net/def/crs/OGC/1.3/CRS84', 'crs_type': 'GeographicCRS', 'x_axis_label': self.x_field, 'y_axis_label': self.y_field, - 'time_axis_label': self.time_field, 'width': self._data.sizes[self.x_field], 'height': self._data.sizes[self.y_field], - 'time': self._data.sizes[self.time_field], - 'time_duration': self.get_time_coverage_duration(), 'bbox_units': 'degrees', - 'resx': np.abs(self._data.coords[self.x_field].values[1] - - self._data.coords[self.x_field].values[0]), - 'resy': np.abs(self._data.coords[self.y_field].values[1] - - self._data.coords[self.y_field].values[0]), - 'restime': self.get_time_resolution() + 'resx': np.abs( + self._data.coords[self.x_field].values[1] + - self._data.coords[self.x_field].values[0] + ), + 'resy': np.abs( + self._data.coords[self.y_field].values[1] + - self._data.coords[self.y_field].values[0] + ), } + if self.time_field is not None: + properties['time_axis_label'] = self.time_field + properties['time_range'] = [ + _to_datetime_string( + self._data.coords[self.time_field].values[0] + ), + _to_datetime_string( + self._data.coords[self.time_field].values[-1] + ), + ] + properties['time'] = self._data.sizes[self.time_field] + properties['time_duration'] = self.get_time_coverage_duration() + properties['restime'] = self.get_time_resolution() + # Update properties based on the xarray's CRS epsg_code = self.storage_crs.to_epsg() LOGGER.debug(f'{epsg_code}') @@ -425,10 +469,12 @@ def _get_coverage_properties(self): properties['axes'] = [ properties['x_axis_label'], - properties['y_axis_label'], - properties['time_axis_label'] + properties['y_axis_label'] ] + if self.time_field is not None: + properties['axes'].append(properties['time_axis_label']) + return properties @staticmethod @@ -455,7 +501,8 @@ def get_time_resolution(self): :returns: time resolution string """ - if self._data[self.time_field].size > 1: + if self.time_field is not None \ + and self._data[self.time_field].size > 1: time_diff = (self._data[self.time_field][1] - self._data[self.time_field][0]) @@ -472,6 +519,9 @@ def get_time_coverage_duration(self): :returns: time coverage duration string """ + if self.time_field is None: + return None + dur = self._data[self.time_field][-1] - self._data[self.time_field][0] ms_difference = dur.values.astype('timedelta64[ms]').astype(np.double) @@ -634,7 +684,7 @@ def _convert_float32_to_float64(data): for var_name in data.variables: if data[var_name].dtype == 'float32': og_attrs = data[var_name].attrs - data[var_name] = data[var_name].astype('float64') + data[var_name] = data[var_name].astype('float64', copy=False) data[var_name].attrs = og_attrs return data diff --git a/pygeoapi/starlette_app.py b/pygeoapi/starlette_app.py index 00ed727d2..b1979f502 100644 --- a/pygeoapi/starlette_app.py +++ b/pygeoapi/starlette_app.py @@ -334,11 +334,7 @@ async def collection_items(request: Request, collection_id=None, item_id=None): if 'item_id' in request.path_params: item_id = request.path_params['item_id'] if item_id is None: - if request.method == 'GET': # list items - return await execute_from_starlette( - itemtypes_api.get_collection_items, request, collection_id, - skip_valid_check=True) - elif request.method == 'POST': # filter or manage items + if request.method == 'POST': # filter or manage items content_type = request.headers.get('content-type') if content_type is not None: if content_type == 'application/geo+json': @@ -357,6 +353,10 @@ async def collection_items(request: Request, collection_id=None, item_id=None): itemtypes_api.manage_collection_item, request, 'options', collection_id, skip_valid_check=True, ) + else: # GET: list items + return await execute_from_starlette( + itemtypes_api.get_collection_items, request, collection_id, + skip_valid_check=True) elif request.method == 'DELETE': return await execute_from_starlette( @@ -511,12 +511,13 @@ async def get_job_result_resource(request: Request, api_.get_job_result_resource, request, job_id, resource) -async def get_collection_edr_query(request: Request, collection_id=None, instance_id=None): # noqa +async def get_collection_edr_query(request: Request, collection_id=None, instance_id=None, location_id=None): # noqa """ OGC EDR API endpoints :param collection_id: collection identifier :param instance_id: instance identifier + :param location_id: location id of a /locations/ query :returns: HTTP response """ @@ -527,10 +528,15 @@ async def get_collection_edr_query(request: Request, collection_id=None, instanc if 'instance_id' in request.path_params: instance_id = request.path_params['instance_id'] - query_type = request["path"].split('/')[-1] # noqa + if 'location_id' in request.path_params: + location_id = request.path_params['location_id'] + query_type = 'locations' + else: + query_type = request['path'].split('/')[-1] + return await execute_from_starlette( edr_api.get_collection_edr_query, request, collection_id, - instance_id, query_type, + instance_id, query_type, location_id, skip_valid_check=True, ) diff --git a/pygeoapi/templates/collections/edr/query.html b/pygeoapi/templates/collections/edr/query.html index ac7f17d2d..d3af4ce3d 100644 --- a/pygeoapi/templates/collections/edr/query.html +++ b/pygeoapi/templates/collections/edr/query.html @@ -139,7 +139,7 @@ if (!firstLayer) { firstLayer = layer; layer.on('afterAdd', () => { - zoomToLayers([layers]) + zoomToLayers([layer]) if (!cov.coverages) { if (isVerticalProfile(cov) || isTimeSeries(cov)) { layer.openPopup(); diff --git a/pygeoapi/util.py b/pygeoapi/util.py index b0db50cd8..d7432a041 100644 --- a/pygeoapi/util.py +++ b/pygeoapi/util.py @@ -168,7 +168,7 @@ def yaml_load(fh: IO) -> dict: # # https://stackoverflow.com/a/55301129 env_matcher = re.compile( - r'.*?\$\{(?P\w+)(:-(?P[^}]+))?\}') + r'.*?\$\{(?P\w+)(:-(?P[^}]*))?\}') def env_constructor(loader, node): result = "" diff --git a/requirements.txt b/requirements.txt index 601f51b83..8ea9bd299 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,5 +14,5 @@ PyYAML rasterio requests shapely -SQLAlchemy<2.0.0 +SQLAlchemy tinydb diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 0477ef81f..faad9ed09 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -575,7 +575,7 @@ def test_conformance(config, api_): assert isinstance(root, dict) assert 'conformsTo' in root - assert len(root['conformsTo']) == 37 + assert len(root['conformsTo']) == 42 assert 'http://www.opengis.net/spec/ogcapi-features-2/1.0/conf/crs' \ in root['conformsTo'] @@ -604,7 +604,7 @@ def test_describe_collections(config, api_): collections = json.loads(response) assert len(collections) == 2 - assert len(collections['collections']) == 9 + assert len(collections['collections']) == 10 assert len(collections['links']) == 3 rsp_headers, code, response = api_.describe_collections(req, 'foo') diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index 2cd445898..ae19c28d6 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -79,6 +79,14 @@ def test_get_collection_queryables(config, api_): assert 'properties' in queryables assert len(queryables['properties']) == 5 + req = mock_api_request({'f': 'json'}) + rsp_headers, code, response = get_collection_queryables(api_, req, 'canada-metadata') # noqa + assert rsp_headers['Content-Type'] == 'application/schema+json' + queryables = json.loads(response) + + assert 'properties' in queryables + assert len(queryables['properties']) == 10 + # test with provider filtered properties api_.config['resources']['obs']['providers'][0]['properties'] = ['stn_id'] diff --git a/tests/pygeoapi-test-config-envvars.yml b/tests/pygeoapi-test-config-envvars.yml index f84fc79ac..f8b7af2e1 100644 --- a/tests/pygeoapi-test-config-envvars.yml +++ b/tests/pygeoapi-test-config-envvars.yml @@ -31,7 +31,7 @@ server: bind: host: 0.0.0.0 port: ${PYGEOAPI_PORT} - url: http://localhost:5000/ + url: ${PYGEOAPI_URL:-http://localhost:5000/} mimetype: application/json; charset=UTF-8 encoding: utf-8 language: en-US @@ -43,6 +43,8 @@ server: map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png attribution: '© OpenStreetMap contributors' + api_rules: # optional API design rules to which pygeoapi should adhere + url_prefix: ${PYGEOAPI_PREFIX:-} logging: level: DEBUG diff --git a/tests/pygeoapi-test-config.yml b/tests/pygeoapi-test-config.yml index 95a868631..58b62484f 100644 --- a/tests/pygeoapi-test-config.yml +++ b/tests/pygeoapi-test-config.yml @@ -398,6 +398,44 @@ resources: name: png mimetype: image/png + canada-metadata: + type: collection + title: + en: Open Canada sample data + fr: Exemple de donn\u00e9es Canada Ouvert + description: + en: Sample metadata records from open.canada.ca + fr: Exemples d'enregistrements de m\u00e9tadonn\u00e9es sur ouvert.canada.ca + keywords: + en: + - canada + - open data + fr: + - canada + - donn\u00e9es ouvertes + links: + - type: text/html + rel: canonical + title: information + href: https://open.canada.ca/en/open-data + hreflang: en-CA + - type: text/html + rel: alternate + title: informations + href: https://ouvert.canada.ca/fr/donnees-ouvertes + hreflang: fr-CA + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + providers: + - type: record + name: TinyDBCatalogue + data: tests/data/open.canada.ca/sample-records.tinydb + id_field: externalId + time_field: created + title_field: title + hello-world: type: process processor: diff --git a/tests/test_config.py b/tests/test_config.py index 394a2bb98..2b93a30fd 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -54,8 +54,19 @@ def test_config_envvars(): assert isinstance(config, dict) assert config['server']['bind']['port'] == 5001 + assert config['server']['url'] == 'http://localhost:5000/' assert config['metadata']['identification']['title'] == \ 'pygeoapi default instance my title' + assert config['server']['api_rules']['url_prefix'] == '' + + os.environ['PYGEOAPI_URL'] = 'https://localhost:5000' + os.environ['PYGEOAPI_PREFIX'] = 'v1' + + with open(get_test_file_path('pygeoapi-test-config-envvars.yml')) as fh: + config = yaml_load(fh) + + assert config['server']['url'] == 'https://localhost:5000' + assert config['server']['api_rules']['url_prefix'] == 'v1' os.environ.pop('PYGEOAPI_PORT') diff --git a/tests/test_manager.py b/tests/test_manager.py index a6f76cf31..7d0d01aa0 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -30,9 +30,11 @@ import pytest -from pygeoapi.process.base import UnknownProcessError +from pygeoapi.process.base import UnknownProcessError, JobNotFoundError from pygeoapi.process.manager.base import get_manager +from .util import get_test_file_path + @pytest.fixture() def config() -> Dict: @@ -41,6 +43,7 @@ def config() -> Dict: 'manager': { 'name': 'TinyDB', 'output_dir': '/tmp', + 'connection': '/tmp/pygeoapi-process-manager-test.db' } }, 'resources': { @@ -71,3 +74,28 @@ def test_get_processor_raises_exception(config): manager = get_manager(config) with pytest.raises(expected_exception=UnknownProcessError): manager.get_processor('foo') + + +def test_get_job_result_binary(config): + manager = get_manager(config) + nc_file = get_test_file_path("tests/data/coads_sst.nc") + job_id = "15eeae38-608c-11ef-81c8-0242ac130002" + job_metadata = { + "type": "process", + "identifier": job_id, + "process_id": "dummy", + "job_start_datetime": "2024-08-22T12:00:00.000000Z", + "job_end_datetime": "2024-08-22T12:00:01.000000Z", + "status": "successful", + "location": nc_file, + "mimetype": "application/x-netcdf", + "message": "Job complete", + "progress": 100 + } + try: + manager.get_job(job_id) + except JobNotFoundError: + manager.add_job(job_metadata) + mimetype, result = manager.get_job_result(job_id) + assert mimetype == "application/x-netcdf" + assert isinstance(result, bytes) diff --git a/tests/test_util.py b/tests/test_util.py index c71ce80a0..d15aac321 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -172,7 +172,7 @@ def test_path_basename(): def test_filter_dict_by_key_value(config): collections = util.filter_dict_by_key_value(config['resources'], 'type', 'collection') - assert len(collections) == 9 + assert len(collections) == 10 notfound = util.filter_dict_by_key_value(config['resources'], 'type', 'foo') diff --git a/tests/test_xarray_zarr_provider.py b/tests/test_xarray_zarr_provider.py index 5163b32a6..ec014e655 100644 --- a/tests/test_xarray_zarr_provider.py +++ b/tests/test_xarray_zarr_provider.py @@ -30,6 +30,7 @@ from numpy import float64, int64 import pytest +import xarray as xr from pygeoapi.provider.xarray_ import XarrayProvider from pygeoapi.util import json_serial @@ -53,6 +54,20 @@ def config(): } +@pytest.fixture() +def config_no_time(tmp_path): + ds = xr.open_zarr(path) + ds = ds.sel(time=ds.time[0]) + ds = ds.drop_vars('time') + ds.to_zarr(tmp_path / 'no_time.zarr') + return { + 'name': 'zarr', + 'type': 'coverage', + 'data': str(tmp_path / 'no_time.zarr'), + 'format': {'name': 'zarr', 'mimetype': 'application/zip'}, + } + + def test_provider(config): p = XarrayProvider(config) @@ -85,3 +100,14 @@ def test_numpy_json_serial(): d = float64(500.00000005) assert json_serial(d) == 500.00000005 + + +def test_no_time(config_no_time): + p = XarrayProvider(config_no_time) + + assert len(p.fields) == 4 + assert p.axes == ['lon', 'lat'] + + coverage = p.query(format='json') + + assert sorted(coverage['domain']['axes'].keys()) == ['x', 'y']