From 3d24ddc0ffc3e56e869edeeda4b0e245925a94fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knud=20M=C3=B6ller?= Date: Mon, 20 Nov 2017 10:21:55 +0100 Subject: [PATCH 1/3] add ogc:Filter constraints to gather stage in CSW harvester --- ckanext/spatial/harvesters/csw.py | 5 ++++- ckanext/spatial/lib/csw_client.py | 3 +-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index 2853a10c..e8a2fb59 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -62,6 +62,9 @@ def get_original_url(self, harvest_object_id): def output_schema(self): return 'gmd' + def get_constraints(self): + return [] + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.CSW.gather') log.debug('CswHarvester gather_stage for job: %r', harvest_job) @@ -92,7 +95,7 @@ def gather_stage(self, harvest_job): log.debug('Starting gathering for %s' % url) guids_in_harvest = set() try: - for identifier in self.csw.getidentifiers(page=10, outputschema=self.output_schema(), cql=cql): + for identifier in self.csw.getidentifiers(page=10, outputschema=self.output_schema(), cql=cql, constraints=self.get_constraints()): try: log.info('Got identifier %s from the CSW', identifier) if identifier is None: diff --git a/ckanext/spatial/lib/csw_client.py b/ckanext/spatial/lib/csw_client.py index 207a0d40..7366a592 100644 --- a/ckanext/spatial/lib/csw_client.py +++ b/ckanext/spatial/lib/csw_client.py @@ -101,9 +101,8 @@ def getrecords(self, qtype=None, keywords=[], def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief", keywords=[], limit=None, page=10, outputschema="gmd", - startposition=0, cql=None, **kw): + startposition=0, cql=None, constraints=[], **kw): from owslib.csw import namespaces - constraints = [] csw = self._ows(**kw) if qtype is not None: From da705a1ea718e920ef0e0ccb5b138cc5f9fc9304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knud=20M=C3=B6ller?= Date: Mon, 20 Nov 2017 13:06:26 +0100 Subject: [PATCH 2/3] add documentation for get_constraints() --- ckanext/spatial/harvesters/csw.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index e8a2fb59..4f3c1ec1 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -63,6 +63,9 @@ def output_schema(self): return 'gmd' def get_constraints(self): + '''Returns the CSW constraints that should be used during gather stage. + Should be overwritten by sub-classes. + ''' return [] def gather_stage(self, harvest_job): From 9cf1b27a470d1fbf476ac964cabc3f76b58a5f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knud=20M=C3=B6ller?= Date: Mon, 4 Dec 2017 11:56:15 +0100 Subject: [PATCH 3/3] pass harvest_job to get_constraints --- ckanext/spatial/harvesters/csw.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index 4f3c1ec1..9875c572 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -62,7 +62,7 @@ def get_original_url(self, harvest_object_id): def output_schema(self): return 'gmd' - def get_constraints(self): + def get_constraints(self, harvest_job): '''Returns the CSW constraints that should be used during gather stage. Should be overwritten by sub-classes. ''' @@ -98,7 +98,7 @@ def gather_stage(self, harvest_job): log.debug('Starting gathering for %s' % url) guids_in_harvest = set() try: - for identifier in self.csw.getidentifiers(page=10, outputschema=self.output_schema(), cql=cql, constraints=self.get_constraints()): + for identifier in self.csw.getidentifiers(page=10, outputschema=self.output_schema(), cql=cql, constraints=self.get_constraints(harvest_job)): try: log.info('Got identifier %s from the CSW', identifier) if identifier is None: