diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d7920a59d..e94c0777a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,8 +4,8 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install requirements @@ -19,7 +19,7 @@ jobs: needs: lint strategy: matrix: - ckan-version: ["2.10", 2.9, 2.9-py2, 2.8, 2.7] + ckan-version: ["2.10", 2.9] fail-fast: false name: CKAN ${{ matrix.ckan-version }} @@ -55,14 +55,8 @@ jobs: # Replace default path to CKAN core config file with the one on the container sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini - name: Setup extension (CKAN >= 2.9) - if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }} run: | ckan -c test.ini db init ckan -c test.ini harvester initdb - - name: Setup extension (CKAN < 2.9) - if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }} - run: | - paster --plugin=ckan db init -c test.ini - paster --plugin=ckanext-harvest harvester initdb -c test.ini - name: Run tests run: pytest --ckan-ini=test.ini --cov=ckanext.harvest --disable-warnings ckanext/harvest/tests diff --git a/README.rst b/README.rst index 9172a1793..e8d5893ac 100644 --- a/README.rst +++ b/README.rst @@ -94,14 +94,8 @@ Configuration Run the following command to create the necessary tables in the database (ensuring the pyenv is activated): -ON CKAN >= 2.9:: - (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester initdb -ON CKAN <= 2.8:: - - (pyenv) $ paster --plugin=ckanext-harvest harvester initdb --config=/etc/ckan/default/production.ini - Finally, restart CKAN to have the changes take effect:: sudo service apache2 restart @@ -213,7 +207,7 @@ IF you want to set a timeout for harvest jobs, you can add this configuration op ckan.harvest.timeout = 1440 -The timeout value is in minutes, so 1440 represents 24 hours. +The timeout value is in minutes, so 1440 represents 24 hours. Any jobs which are timed out will create an error message for the user to see. If you don't specify this setting, the default will be False and there will be no timeout on harvest jobs. @@ -289,9 +283,9 @@ The following operations can be run from the command line as described underneat import) without involving the web UI or the queue backends. This is useful for testing a harvester without having to fire up gather/fetch_consumer processes, as is done in production. - + harvester run-test {source-id/name} force-import=guid1,guid2... - - In order to force an import of particular datasets, useful to + - In order to force an import of particular datasets, useful to target a dataset for dev purposes or when forcing imports on other environments. harvester gather-consumer @@ -335,22 +329,17 @@ The following operations can be run from the command line as described underneat The commands should be run with the pyenv activated and refer to your CKAN configuration file: -ON CKAN >= 2.9:: - (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester --help (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester sources -ON CKAN <= 2.8:: - (pyenv) $ paster --plugin=ckanext-harvest harvester sources --config=/etc/ckan/default/production.ini - **Note that on CKAN >= 2.9 all commands with an underscore in their name changed.** They now use a hyphen instead of an underscore (e.g. ``gather_consumer`` changed to ``gather-consumer``). Authorization ============= -Starting from CKAN 2.0, harvest sources behave exactly the same as datasets +Harvest sources behave exactly the same as datasets (they are actually internally implemented as a dataset type). That means they can be searched and faceted, and that the same authorization rules can be applied to them. The default authorization settings are based on organizations. @@ -700,10 +689,10 @@ harvester run-test You can run a harvester simply using the ``run-test`` command. This is handy for running a harvest with one command in the console and see all the output in-line. It runs the gather, fetch and import stages all in the same process. -You must ensure that you have pip installed ``dev-requirements.txt`` +You must ensure that you have pip installed ``dev-requirements.txt`` in ``/home/ckan/ckan/lib/default/src/ckanext-harvest`` before using the ``run-test`` command. - + This is useful for developing a harvester because you can insert break-points in your harvester, and rerun a harvest without having to restart the gather_consumer and fetch_consumer processes each time. In addition, because it @@ -727,35 +716,17 @@ handles the gathering and another one that handles the fetching and importing. To start the consumers run the following command (make sure you have your python environment activated): -ON CKAN >= 2.9:: - (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester gather-consumer -ON CKAN <= 2.8:: - - (pyenv) $ paster --plugin=ckanext-harvest harvester gather_consumer --config=/etc/ckan/default/production.ini - On another terminal, run the following command: -ON CKAN >= 2.9:: - (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester fetch-consumer -ON CKAN <= 2.8:: - - (pyenv) $ paster --plugin=ckanext-harvest harvester fetch_consumer --config=/etc/ckan/default/production.ini - Finally, on a third console, run the following command to start any pending harvesting jobs: -ON CKAN >= 2.9:: - (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester run -ON CKAN <= 2.8:: - - (pyenv) $ paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/default/production.ini - The ``run`` command not only starts any pending harvesting jobs, but also flags those that are finished, allowing new jobs to be created on that particular source and refreshing the source statistics. That means that you will need to run @@ -771,14 +742,8 @@ circumstance, ensure that the gather & fetch consumers are running and have nothing more to consume, and then run this abort command with the name or id of the harvest source: -ON CKAN >= 2.9:: - (pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester job-abort {source-id/name} -ON CKAN <= 2.8:: - - (pyenv) $ paster --plugin=ckanext-harvest harvester job_abort {source-id/name} --config=/etc/ckan/default/production.ini - Setting up the harvesters on a production server ================================================ @@ -855,42 +820,6 @@ following steps with the one you are using. startsecs=10 - ON CKAN <= 2.8:: - - - ; =============================== - ; ckan harvester - ; =============================== - - [program:ckan_gather_consumer] - - command=/usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester gather_consumer --config=/etc/ckan/default/production.ini - - ; user that owns virtual environment. - user=ckan - - numprocs=1 - stdout_logfile=/var/log/ckan/std/gather_consumer.log - stderr_logfile=/var/log/ckan/std/gather_consumer.log - autostart=true - autorestart=true - startsecs=10 - - [program:ckan_fetch_consumer] - - command=/usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester fetch_consumer --config=/etc/ckan/default/production.ini - - ; user that owns virtual environment. - user=ckan - - numprocs=1 - stdout_logfile=/var/log/ckan/std/fetch_consumer.log - stderr_logfile=/var/log/ckan/std/fetch_consumer.log - autostart=true - autorestart=true - startsecs=10 - - There are a number of things that you will need to replace with your specific installation settings (the example above shows paths from a ckan instance installed via Debian packages): @@ -952,16 +881,9 @@ following steps with the one you are using. Paste this line into your crontab, again replacing the paths to paster and the ini file with yours: - ON CKAN >= 2.9:: - # m h dom mon dow command */15 * * * * /usr/lib/ckan/default/bin/ckan -c /etc/ckan/default/ckan.ini harvester run - ON CKAN <= 2.8:: - - # m h dom mon dow command - */15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/default/production.ini - This particular example will check for pending jobs every fifteen minutes. You can of course modify this periodicity, this `Wikipedia page `_ has a good overview of the crontab syntax. @@ -973,16 +895,9 @@ following steps with the one you are using. Paste this line into your crontab, again replacing the paths to paster/ckan and the ini file with yours: - ON CKAN >= 2.9:: - # m h dom mon dow command 0 5 * * * /usr/lib/ckan/default/bin/ckan -c /etc/ckan/default/ckan.ini harvester clean-harvest-log - ON CKAN <= 2.8:: - - # m h dom mon dow command - 0 5 * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester clean_harvest_log --config=/etc/ckan/default/production.ini - This particular example will perform clean-up each day at 05 AM. You can tweak the value according to your needs. @@ -992,17 +907,17 @@ Extensible actions Recipients on harvest jobs notifications ---------------------------------------- -:code:`harvest_get_notifications_recipients`: you can *chain* this action from another extension to change +:code:`harvest_get_notifications_recipients`: you can *chain* this action from another extension to change the recipients for harvest jobs notifications. .. code-block:: python @toolkit.chained_action def harvest_get_notifications_recipients(up_func, context, data_dict): - """ Harvester plugin notify by default about harvest jobs only to + """ Harvester plugin notify by default about harvest jobs only to admin users of the related organization. Also allow to add custom recipients with this function. - + Return a list of dicts with name and email like {'name': 'John', 'email': 'john@source.com'} """ @@ -1021,7 +936,7 @@ Tests You can run the tests like this:: cd ckanext-harvest - nosetests --reset-db --ckan --with-pylons=test-core.ini ckanext/harvest/tests + pytest --ckan-ini=test.ini ckanext/harvest/tests Here are some common errors and solutions: diff --git a/ckanext/harvest/fanstatic_library/styles/harvest.css b/ckanext/harvest/assets/styles/harvest.css similarity index 100% rename from ckanext/harvest/fanstatic_library/styles/harvest.css rename to ckanext/harvest/assets/styles/harvest.css diff --git a/ckanext/harvest/fanstatic_library/styles/harvest.less b/ckanext/harvest/assets/styles/harvest.less similarity index 100% rename from ckanext/harvest/fanstatic_library/styles/harvest.less rename to ckanext/harvest/assets/styles/harvest.less diff --git a/ckanext/harvest/fanstatic_library/styles/less b/ckanext/harvest/assets/styles/less similarity index 100% rename from ckanext/harvest/fanstatic_library/styles/less rename to ckanext/harvest/assets/styles/less diff --git a/ckanext/harvest/fanstatic_library/styles/mixins.less b/ckanext/harvest/assets/styles/mixins.less similarity index 100% rename from ckanext/harvest/fanstatic_library/styles/mixins.less rename to ckanext/harvest/assets/styles/mixins.less diff --git a/ckanext/harvest/fanstatic_library/styles/variables.less b/ckanext/harvest/assets/styles/variables.less similarity index 100% rename from ckanext/harvest/fanstatic_library/styles/variables.less rename to ckanext/harvest/assets/styles/variables.less diff --git a/ckanext/harvest/fanstatic_library/webassets.yml b/ckanext/harvest/assets/webassets.yml similarity index 100% rename from ckanext/harvest/fanstatic_library/webassets.yml rename to ckanext/harvest/assets/webassets.yml diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index c3dd8605f..746d7271e 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -16,7 +16,7 @@ from ckan.logic.schema import default_create_package_schema from ckan.lib.navl.validators import ignore_missing, ignore -from ckan.lib.munge import munge_title_to_name, substitute_ascii_equivalents +from ckan.lib.munge import munge_title_to_name, munge_tag from ckanext.harvest.model import (HarvestObject, HarvestGatherError, HarvestObjectError, HarvestJob) @@ -25,25 +25,6 @@ from ckanext.harvest.interfaces import IHarvester from ckanext.harvest.logic.schema import unicode_safe -if p.toolkit.check_ckan_version(min_version='2.3'): - from ckan.lib.munge import munge_tag -else: - # Fallback munge_tag for older ckan versions which don't have a decent - # munger - def _munge_to_length(string, min_length, max_length): - '''Pad/truncates a string''' - if len(string) < min_length: - string += '_' * (min_length - len(string)) - if len(string) > max_length: - string = string[:max_length] - return string - - def munge_tag(tag): - tag = substitute_ascii_equivalents(tag) - tag = tag.lower().strip() - tag = re.sub(r'[^a-zA-Z0-9\- ]', '', tag).replace(' ', '-') - tag = _munge_to_length(tag, model.MIN_TAG_LENGTH, model.MAX_TAG_LENGTH) - return tag log = logging.getLogger(__name__) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 1da0787ea..aba454ddb 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -1,11 +1,10 @@ from __future__ import absolute_import -import six import requests from requests.exceptions import HTTPError, RequestException import datetime -from six.moves.urllib.parse import urlencode +from urllib.parse import urlencode from ckan import model from ckan.logic import ValidationError, NotFound, get_action from ckan.lib.helpers import json @@ -119,8 +118,7 @@ def validate_config(self, config): raise ValueError('default_groups must be a *list* of group' ' names/ids') if config_obj['default_groups'] and \ - not isinstance(config_obj['default_groups'][0], - six.string_types): + not isinstance(config_obj['default_groups'][0], str): raise ValueError('default_groups must be a list of group ' 'names/ids (i.e. strings)') @@ -520,7 +518,7 @@ def get_extra(key, package_dict): if existing_extra: package_dict['extras'].remove(existing_extra) # Look for replacement strings - if isinstance(value, six.string_types): + if isinstance(value, str): value = value.format( harvest_source_id=harvest_object.job.source.id, harvest_source_url=harvest_object.job.source.url.strip('/'), diff --git a/ckanext/harvest/helpers.py b/ckanext/harvest/helpers.py index 035e38543..87b3e3c5b 100644 --- a/ckanext/harvest/helpers.py +++ b/ckanext/harvest/helpers.py @@ -57,7 +57,7 @@ def package_list_for_source(source_id): query = logic.get_action('package_search')(context, search_dict) base_url = h.url_for( - '{0}_read'.format(DATASET_TYPE_NAME), + '{0}.read'.format(DATASET_TYPE_NAME), id=harvest_source['name'] ) @@ -124,7 +124,7 @@ def link_for_harvest_object(id=None, guid=None, text=None): obj = logic.get_action('harvest_object_show')(context, {'id': guid, 'attr': 'guid'}) id = obj.id - url = h.url_for('harvest_object_show', id=id) + url = h.url_for('harvest.object_show', id=id) text = text or guid or id link = '{text}'.format(url=url, text=text) @@ -138,13 +138,3 @@ def harvest_source_extra_fields(): continue fields[harvester.info()['name']] = list(harvester.extra_schema().keys()) return fields - - -def bootstrap_version(): - if p.toolkit.check_ckan_version(max_version='2.7.99'): - return 'bs2' - else: - return ( - 'bs2' if - p.toolkit.config.get('ckan.base_public_folder') == 'public-bs2' - else 'bs3') diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index c669ac4ba..924f478d6 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -1,15 +1,15 @@ # -*- coding: utf-8 -*- import hashlib +import html import json -import six import logging import datetime from ckantoolkit import config from sqlalchemy import and_, or_ -from six.moves.urllib.parse import urljoin +from urllib.parse import urljoin from ckan.lib.search.index import PackageSearchIndex from ckan.plugins import toolkit, PluginImplementations @@ -38,10 +38,7 @@ import ckan.lib.mailer as mailer from itertools import islice -if toolkit.check_ckan_version(min_version='2.9.0'): - from ckan.plugins.toolkit import render -else: - from ckan.lib.base import render_jinja2 as render +from ckan.plugins.toolkit import render log = logging.getLogger(__name__) @@ -549,8 +546,10 @@ def harvest_objects_import(context, data_dict): last_objects_count = 0 for obj_id in last_objects_ids: - if segments and \ - str(hashlib.md5(six.ensure_binary(obj_id[0])).hexdigest())[0] not in segments: + _id = obj_id[0] + if isinstance(_id, str): + _id = _id.encode() + if segments and str(hashlib.md5(_id).hexdigest())[0] not in segments: continue obj = session.query(HarvestObject).get(obj_id) @@ -759,7 +758,7 @@ def get_mail_extra_vars(context, source_id, status): errors = job_errors + obj_errors site_url = config.get('ckan.site_url') - job_url = toolkit.url_for('harvest_job_show', source=source['id'], id=last_job['id']) + job_url = toolkit.url_for('harvest.job_show', source=source['id'], id=last_job['id']) full_job_url = urljoin(site_url, job_url) extra_vars = { 'organization': organization, @@ -798,12 +797,7 @@ def prepare_summary_mail(context, source_id, status): def prepare_error_mail(context, source_id, status): extra_vars = get_mail_extra_vars(context, source_id, status) body = render('emails/error_email.txt', extra_vars) - if six.PY34: - import html - body = html.unescape(body) - elif six.PY2: - import HTMLParser - body = HTMLParser.HTMLParser().unescape(body) + body = html.unescape(body) subject = '{} - Harvesting Job - Error Notification'\ .format(config.get('ckan.site_title')) diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index e54120094..2946ea525 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import ckan.plugins as p import ckan.plugins.toolkit as tk from ckan.logic.schema import default_extras_schema @@ -54,10 +53,6 @@ def harvest_source_schema(): schema['extras'] = extras_schema - if p.toolkit.check_ckan_version('2.2'): - from ckan.logic.validators import datasets_with_no_organization_cannot_be_private - schema['private'].append(datasets_with_no_organization_cannot_be_private) - return schema diff --git a/ckanext/harvest/logic/validators.py b/ckanext/harvest/logic/validators.py index f1363a8c5..b54bcfa8a 100644 --- a/ckanext/harvest/logic/validators.py +++ b/ckanext/harvest/logic/validators.py @@ -12,11 +12,7 @@ ) from ckanext.harvest.model import HarvestSource, UPDATE_FREQUENCIES, HarvestJob from ckanext.harvest.interfaces import IHarvester - -import six -from six.moves.urllib.parse import ( - urlparse, urlunparse -) +from urllib.parse import (urlparse, urlunparse) log = logging.getLogger(__name__) @@ -233,7 +229,7 @@ def harvest_source_convert_from_config(key, data, errors, context): def harvest_source_active_validator(value, context): - if isinstance(value, six.string_types): + if isinstance(value, str): if value.lower() == 'true': return True else: @@ -259,6 +255,6 @@ def harvest_object_extras_validator(value, context): if not isinstance(value, dict): raise Invalid('extras must be a dict') for v in value.values(): - if not isinstance(v, six.string_types): + if not isinstance(v, str): raise Invalid('extras must be a dict of strings') return value diff --git a/ckanext/harvest/plugin/__init__.py b/ckanext/harvest/plugin.py similarity index 88% rename from ckanext/harvest/plugin/__init__.py rename to ckanext/harvest/plugin.py index 5eabbe09e..02ab6ae4b 100644 --- a/ckanext/harvest/plugin/__init__.py +++ b/ckanext/harvest/plugin.py @@ -2,10 +2,8 @@ import os import json -import six from logging import getLogger -from six import string_types from collections import OrderedDict from ckan import logic @@ -13,13 +11,10 @@ import ckan.plugins as p from ckan.lib.plugins import DefaultDatasetForm -try: - from ckan.lib.plugins import DefaultTranslation -except ImportError: - class DefaultTranslation(): - pass +from ckan.lib.plugins import DefaultTranslation import ckanext.harvest +from ckanext.harvest import cli, views from ckanext.harvest.model import setup as model_setup from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject from ckanext.harvest.log import DBLogHandler @@ -28,17 +23,13 @@ class DefaultTranslation(): DATASET_TYPE_NAME ) -if p.toolkit.check_ckan_version(min_version='2.9.0'): - from ckanext.harvest.plugin.flask_plugin import MixinPlugin -else: - from ckanext.harvest.plugin.pylons_plugin import MixinPlugin - log = getLogger(__name__) assert not log.disabled -class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTranslation): - +class Harvest(p.SingletonPlugin, DefaultDatasetForm, DefaultTranslation): + p.implements(p.IClick) + p.implements(p.IBlueprint) p.implements(p.IConfigurable) p.implements(p.IConfigurer, inherit=True) p.implements(p.IActions) @@ -47,11 +38,20 @@ class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTransla p.implements(p.IPackageController, inherit=True) p.implements(p.ITemplateHelpers) p.implements(p.IFacets, inherit=True) - if p.toolkit.check_ckan_version(min_version='2.5.0'): - p.implements(p.ITranslation, inherit=True) + p.implements(p.ITranslation, inherit=True) startup = False + # IClick + + def get_commands(self): + return cli.get_commands() + + # IBlueprint + + def get_blueprint(self): + return views.get_blueprints() + # ITranslation def i18n_directory(self): u'''Change the directory of the .mo translation files''' @@ -111,7 +111,7 @@ def before_dataset_search(self, search_params): fq = search_params.get("fq", "") if "dataset_type:harvest" not in fq: - fq = "{0} -dataset_type:harvest".format(fq.encode('utf8') if six.PY2 else fq) + fq = "{0} -dataset_type:harvest".format(fq) search_params.update({"fq": fq}) return search_params @@ -284,38 +284,10 @@ def configure(self, config): self.startup = False def update_config(self, config): - if not p.toolkit.check_ckan_version(min_version='2.0'): - assert 0, 'CKAN before 2.0 not supported by ckanext-harvest - '\ - 'genshi templates not supported any more' - if p.toolkit.asbool(config.get('ckan.legacy_templates', False)): - log.warn('Old genshi templates not supported any more by ' - 'ckanext-harvest so you should set ckan.legacy_templates ' - 'option to True any more.') - p.toolkit.add_template_directory(config, '../templates') - p.toolkit.add_public_directory(config, '../public') - p.toolkit.add_resource('../fanstatic_library', 'ckanext-harvest') - p.toolkit.add_resource('../public/ckanext/harvest/javascript', 'harvest-extra-field') - - if p.toolkit.check_ckan_version(min_version='2.9.0'): - mappings = config.get('ckan.legacy_route_mappings') or {} - if mappings and isinstance(mappings, string_types): - mappings = json.loads(mappings) - - mappings.update({ - 'harvest_read': 'harvest.read', - 'harvest_edit': 'harvest.edit', - }) - bp_routes = [ - "delete", "refresh", "admin", "about", - "clear", "job_list", "job_show_last", "job_show", - "job_abort", "object_show" - ] - mappings.update({ - 'harvest_' + route: 'harvester.' + route - for route in bp_routes - }) - # https://github.com/ckan/ckan/pull/4521 - config['ckan.legacy_route_mappings'] = json.dumps(mappings) + p.toolkit.add_template_directory(config, 'templates') + p.toolkit.add_public_directory(config, 'public') + p.toolkit.add_resource('assets', 'ckanext-harvest') + p.toolkit.add_resource('public/ckanext/harvest/javascript', 'harvest-extra-field') # IActions @@ -347,7 +319,6 @@ def get_helpers(self): 'harvest_frequencies': harvest_helpers.harvest_frequencies, 'link_for_harvest_object': harvest_helpers.link_for_harvest_object, 'harvest_source_extra_fields': harvest_helpers.harvest_source_extra_fields, - 'bootstrap_version': harvest_helpers.bootstrap_version, 'get_harvest_source': harvest_helpers.get_harvest_source, } diff --git a/ckanext/harvest/plugin/flask_plugin.py b/ckanext/harvest/plugin/flask_plugin.py deleted file mode 100644 index 7b8208aaa..000000000 --- a/ckanext/harvest/plugin/flask_plugin.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- - -import ckan.plugins as p -import ckanext.harvest.cli as cli -import ckanext.harvest.views as views - - -class MixinPlugin(p.SingletonPlugin): - p.implements(p.IClick) - p.implements(p.IBlueprint) - - # IClick - - def get_commands(self): - return cli.get_commands() - - # IBlueprint - - def get_blueprint(self): - return views.get_blueprints() diff --git a/ckanext/harvest/plugin/pylons_plugin.py b/ckanext/harvest/plugin/pylons_plugin.py deleted file mode 100644 index 5e6f0e676..000000000 --- a/ckanext/harvest/plugin/pylons_plugin.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- - -import ckan.plugins as p -from ckanext.harvest.utils import DATASET_TYPE_NAME - - -class MixinPlugin(p.SingletonPlugin): - p.implements(p.IRoutes, inherit=True) - - # IRoutes - - def before_map(self, map): - - # Most of the routes are defined via the IDatasetForm interface - # (ie they are the ones for a package type) - controller = "ckanext.harvest.controllers.view:ViewController" - - map.connect( - "{0}_delete".format(DATASET_TYPE_NAME), - "/" + DATASET_TYPE_NAME + "/delete/:id", - controller=controller, - action="delete", - ) - map.connect( - "{0}_refresh".format(DATASET_TYPE_NAME), - "/" + DATASET_TYPE_NAME + "/refresh/:id", - controller=controller, - action="refresh", - ) - map.connect( - "{0}_admin".format(DATASET_TYPE_NAME), - "/" + DATASET_TYPE_NAME + "/admin/:id", - controller=controller, - action="admin", - ) - map.connect( - "{0}_about".format(DATASET_TYPE_NAME), - "/" + DATASET_TYPE_NAME + "/about/:id", - controller=controller, - action="about", - ) - map.connect( - "{0}_clear".format(DATASET_TYPE_NAME), - "/" + DATASET_TYPE_NAME + "/clear/:id", - controller=controller, - action="clear", - ) - - map.connect( - "harvest_job_list", - "/" + DATASET_TYPE_NAME + "/{source}/job", - controller=controller, - action="list_jobs", - ) - map.connect( - "harvest_job_show_last", - "/" + DATASET_TYPE_NAME + "/{source}/job/last", - controller=controller, - action="show_last_job", - ) - map.connect( - "harvest_job_show", - "/" + DATASET_TYPE_NAME + "/{source}/job/{id}", - controller=controller, - action="show_job", - ) - map.connect( - "harvest_job_abort", - "/" + DATASET_TYPE_NAME + "/{source}/job/{id}/abort", - controller=controller, - action="abort_job", - ) - - map.connect( - "harvest_object_show", - "/" + DATASET_TYPE_NAME + "/object/:id", - controller=controller, - action="show_object", - ) - map.connect( - "harvest_object_for_dataset_show", - "/dataset/harvest_object/:id", - controller=controller, - action="show_object", - ref_type="dataset", - ) - - return map diff --git a/ckanext/harvest/templates/base.html b/ckanext/harvest/templates/base.html index 5a379a0b8..d1e58db2e 100644 --- a/ckanext/harvest/templates/base.html +++ b/ckanext/harvest/templates/base.html @@ -2,8 +2,5 @@ {% block styles %} {{ super() }} - - {% set type = 'asset' if h.ckan_version().split('.')|map('int')|list >= [2, 9, 0] else 'resource' %} - {% include 'harvest/snippets/harvest_' ~ type ~ '.html' %} - + {% asset 'ckanext-harvest/harvest_css' %} {% endblock %} diff --git a/ckanext/harvest/templates/harvest/snippets/harvest_asset.html b/ckanext/harvest/templates/harvest/snippets/harvest_asset.html deleted file mode 100644 index b78146a15..000000000 --- a/ckanext/harvest/templates/harvest/snippets/harvest_asset.html +++ /dev/null @@ -1 +0,0 @@ -{% asset 'ckanext-harvest/harvest_css' %} diff --git a/ckanext/harvest/templates/harvest/snippets/harvest_extra_field_asset.html b/ckanext/harvest/templates/harvest/snippets/harvest_extra_field_asset.html deleted file mode 100644 index 47e8b8965..000000000 --- a/ckanext/harvest/templates/harvest/snippets/harvest_extra_field_asset.html +++ /dev/null @@ -1 +0,0 @@ -{% asset 'harvest-extra-field/main' %} diff --git a/ckanext/harvest/templates/harvest/snippets/harvest_extra_field_resource.html b/ckanext/harvest/templates/harvest/snippets/harvest_extra_field_resource.html deleted file mode 100644 index 9e8fda56d..000000000 --- a/ckanext/harvest/templates/harvest/snippets/harvest_extra_field_resource.html +++ /dev/null @@ -1 +0,0 @@ -{% resource 'harvest-extra-field/main' %} diff --git a/ckanext/harvest/templates/harvest/snippets/harvest_resource.html b/ckanext/harvest/templates/harvest/snippets/harvest_resource.html deleted file mode 100644 index 69ce72363..000000000 --- a/ckanext/harvest/templates/harvest/snippets/harvest_resource.html +++ /dev/null @@ -1 +0,0 @@ -{% resource 'ckanext-harvest/styles/harvest.css' %} diff --git a/ckanext/harvest/templates/snippets/add_source_button.html b/ckanext/harvest/templates/snippets/add_source_button.html index d392f98de..ffefb7d88 100644 --- a/ckanext/harvest/templates/snippets/add_source_button.html +++ b/ckanext/harvest/templates/snippets/add_source_button.html @@ -1,7 +1,7 @@ {% set authorized_user = h.check_access('harvest_source_create') %} {% if authorized_user %} - + {{ _('Add Harvest Source') }} diff --git a/ckanext/harvest/templates/snippets/source_item.html b/ckanext/harvest/templates/snippets/source_item.html index c2be2f11e..a8ba26fe6 100644 --- a/ckanext/harvest/templates/snippets/source_item.html +++ b/ckanext/harvest/templates/snippets/source_item.html @@ -19,7 +19,7 @@ {% set truncate_title = truncate_title or 80 %} {% set title = source.title or source.name %} {% set source_type = h.get_pkg_dict_extra(source, 'source_type') %} -{% set url = h.url_for('harvest_admin', id=source.name) if within_organization else h.url_for('harvest_read', id=source.name) %} +{% set url = h.url_for('harvest_admin', id=source.name) if within_organization else h.url_for('harvest.read', id=source.name) %}
  • @@ -47,7 +47,7 @@

    {{ _('Datasets') }}: {{ source.status.total_datasets }} {% endif %} {% if not within_organization and source.organization %} - — {{ _('Organization') }}: {{ h.link_to(source.organization.title or source.organization.name, h.url_for('organization_read', id=source.organization.name)) }} + — {{ _('Organization') }}: {{ h.link_to(source.organization.title or source.organization.name, h.url_for('organization.read', id=source.organization.name)) }} {% endif %}

    diff --git a/ckanext/harvest/templates/source/admin.html b/ckanext/harvest/templates/source/admin.html index a4c5cb75b..fe5b41e65 100644 --- a/ckanext/harvest/templates/source/admin.html +++ b/ckanext/harvest/templates/source/admin.html @@ -6,7 +6,7 @@

    {{ _('Last Harvest Job') }}

    {% if harvest_source.status and harvest_source.status.last_job %} {% snippet "snippets/job_details.html", job=harvest_source.status.last_job %}
    - + {{ _('View full job report') }} diff --git a/ckanext/harvest/templates/source/admin_base.html b/ckanext/harvest/templates/source/admin_base.html index ce65c99f0..20d2634d8 100644 --- a/ckanext/harvest/templates/source/admin_base.html +++ b/ckanext/harvest/templates/source/admin_base.html @@ -7,65 +7,38 @@
  • {{ _('Admin') }}
  • {% endblock %} -{% block action_links %} +{% block content_action %} +
    {% if harvest_source.status and harvest_source.status.last_job and (harvest_source.status.last_job.status == 'New' or harvest_source.status.last_job.status == 'Running') %} Reharvest {% else %} {% set locale = h.dump_json({'content': _('This will re-run the harvesting for this source. Any updates at the source will overwrite the local datasets. Sources with a large number of datasets may take a significant amount of time to finish harvesting. Please confirm you would like us to start reharvesting.')}) %} - - {{ _('Reharvest') }} - {% endif %} {% if harvest_source.status and harvest_source.status.last_job and (harvest_source.status.last_job.status == 'Running') %} - - + {{ _('Stop') }} - {% endif %} {% set locale = h.dump_json({'content': _('Warning: This will remove all datasets for this source, as well as all previous job reports. Are you sure you want to continue?')}) %} - - {{ _('Clear') }} - - - - + {{ _('View harvest source') }} - - -{% endblock %} - -{# CKAN 2.0 #} -{% block actions_content %} - {{ self.action_links() }} -{% endblock %} - -{# CKAN 2.1 #} -{% block content_action %} -
    - {{ self.action_links() }} -
    +
    {% endblock %} {% block page_header_tabs %} - {% set ckan_version = h.ckan_version().split('.')[1] %} - {% if ckan_version | int >= 9 %} - {{ h.build_nav_icon('harvester.admin', _('Dashboard'), id=harvest_source.name, icon='dashboard') }} - {{ h.build_nav_icon('harvester.job_list', _('Jobs'), source=harvest_source.name, icon='reorder') }} - {{ h.build_nav_icon(c.dataset_type ~ '.edit', _('Edit'), id=harvest_source.name, icon='edit') }} - {% else %} - {{ h.build_nav_icon('{0}_admin'.format(c.dataset_type), _('Dashboard'), id=harvest_source.name, icon='dashboard') }} - {{ h.build_nav_icon('{0}_job_list'.format(c.dataset_type), _('Jobs'), source=harvest_source.name, icon='reorder') }} - {{ h.build_nav_icon('{0}_edit'.format(c.dataset_type), _('Edit'), id=harvest_source.name, icon='edit') }} - {% endif %} + {{ h.build_nav_icon('harvester.admin', _('Dashboard'), id=harvest_source.name, icon='dashboard') }} + {{ h.build_nav_icon('harvester.job_list', _('Jobs'), source=harvest_source.name, icon='reorder') }} + {{ h.build_nav_icon(c.dataset_type ~ '.edit', _('Edit'), id=harvest_source.name, icon='edit') }} {% endblock %} diff --git a/ckanext/harvest/templates/source/edit.html b/ckanext/harvest/templates/source/edit.html index 776bc8952..d739d1232 100644 --- a/ckanext/harvest/templates/source/edit.html +++ b/ckanext/harvest/templates/source/edit.html @@ -6,12 +6,7 @@
    {% block form %} - {% if c.form %} - {# CKAN < 2.3 #} - {{ c.form | safe }} - {% else %} - {{- h.snippet(form_snippet, c=c, **form_vars) -}} - {% endif %} + {{- h.snippet(form_snippet, c=c, **form_vars) -}} {% endblock %}
    {% endblock %} diff --git a/ckanext/harvest/templates/source/job/list.html b/ckanext/harvest/templates/source/job/list.html index cea0be739..3e262b522 100644 --- a/ckanext/harvest/templates/source/job/list.html +++ b/ckanext/harvest/templates/source/job/list.html @@ -16,7 +16,7 @@

    {{ _('Harvest Jobs') }}

  • - + {{ _('Job: ') }} {{ job.id }} {% if job.status != 'Finished' %} diff --git a/ckanext/harvest/templates/source/job/read.html b/ckanext/harvest/templates/source/job/read.html index 7c9ce550f..25d32e125 100644 --- a/ckanext/harvest/templates/source/job/read.html +++ b/ckanext/harvest/templates/source/job/read.html @@ -6,7 +6,7 @@

    - {{ h.nav_link(_('Back to job list'), named_route='harvest_job_list', source=harvest_source.name, class_='btn btn-default', icon='arrow-left')}} + {{ h.nav_link(_('Back to job list'), named_route='harvester.job_list', source=harvest_source.name, class_='btn btn-default', icon='arrow-left')}}

    {{ _('Job Report') }}

    @@ -69,7 +69,7 @@

    {{ _('Document Errors') }} {{ _('Remote content') }} {% endif %} - + {{ _('Local content') }} diff --git a/ckanext/harvest/templates/source/new.html b/ckanext/harvest/templates/source/new.html index fc03de3f4..b7feb3d9c 100644 --- a/ckanext/harvest/templates/source/new.html +++ b/ckanext/harvest/templates/source/new.html @@ -13,12 +13,7 @@ {% block primary_content %}
    - {% if c.form %} - {# CKAN < 2.3 #} - {{ c.form | safe }} - {% else %} - {{- h.snippet(form_snippet, c=c, **form_vars) -}} - {% endif %} + {{- h.snippet(form_snippet, c=c, **form_vars) -}}
    {% endblock %} diff --git a/ckanext/harvest/templates/source/new_source_form.html b/ckanext/harvest/templates/source/new_source_form.html index 015371655..324d012e4 100644 --- a/ckanext/harvest/templates/source/new_source_form.html +++ b/ckanext/harvest/templates/source/new_source_form.html @@ -1,10 +1,8 @@ {% import 'macros/form.html' as form %} +{% asset 'harvest-extra-field/main' %} -{% set type = 'asset' if h.ckan_version().split('.')|map('int')|list >= [2, 9, 0] else 'resource' %} -{% include 'harvest/snippets/harvest_extra_field_' ~ type ~ '.html' %} - -
    + {% block errors %}{{ form.errors(error_summary) }}{% endblock %} @@ -14,12 +12,12 @@ {% endcall %} -{{ h.csrf_input() if 'csrf_input' in h }} + {{ h.csrf_input() if 'csrf_input' in h }} {{ form.input('title', id='field-title', label=_('Title'), placeholder=_('eg. A descriptive title'), value=data.title, error=errors.title, classes=['control-full'], attrs={'data-module': 'slug-preview-target'}) }} {% set prefix = 'harvest' %} - {% set domain = h.url_for('{0}_read'.format(c.dataset_type), id='', qualified=true) %} + {% set domain = h.url_for('{0}.read'.format(c.dataset_type), id='', qualified=true) %} {% set domain = domain|replace("http://", "")|replace("https://", "") %} {% set attrs = {'data-module': 'slug-preview-slug', 'data-module-prefix': domain, 'data-module-placeholder': ''} %} diff --git a/ckanext/harvest/templates/source/read_base.html b/ckanext/harvest/templates/source/read_base.html index 984427456..5126e25d5 100644 --- a/ckanext/harvest/templates/source/read_base.html +++ b/ckanext/harvest/templates/source/read_base.html @@ -1,27 +1,13 @@ {% extends "source/base.html" %} -{% block admin_link %} - {% if h.check_access('harvest_source_update', {'id':harvest_source.id }) %} - {{ h.nav_link(_('Admin'), named_route='{0}_admin'.format(c.dataset_type), id=harvest_source.name, class_='btn btn-primary', icon='wrench')}} - {% endif %} -{% endblock %} - -{# CKAN 2.0 #} -{% block actions_content %} - {% if authorized_user %} -
  • {{ self.admin_link() }}
  • - {% endif %} -{% endblock %} - -{# TODO: once #354 is merged in CKAN core .profile-info doesn't exist #} {% block secondary_content %} -
    +

    {{ harvest_source.title }}

    {% if harvest_source.notes %}

    {{ h.markdown_extract(harvest_source.notes, 180) }} - {{ h.nav_link(_('read more'), named_route='{0}_about'.format(c.dataset_type), id=harvest_source.name) }} + {{ h.nav_link(_('read more'), named_route='harvester.about', id=harvest_source.name) }}

    {% else %}

    {{ _('There is no description for this harvest source') }}

    @@ -37,24 +23,20 @@

    {{ harvest_source.title }}

    {% endblock %} {% block primary_content %} -
    +
    {% block page_header %} diff --git a/ckanext/harvest/templates/source/search.html b/ckanext/harvest/templates/source/search.html index b7797b940..a92994322 100644 --- a/ckanext/harvest/templates/source/search.html +++ b/ckanext/harvest/templates/source/search.html @@ -8,23 +8,7 @@ {% endblock %} -{% if g.ckan_base_version.startswith('2.0') %} - {# CKAN 2.0 #} - - {% block add_action_content %} - {{ h.snippet('snippets/add_source_button.html', dataset_type=c.dataset_type) }} - {% endblock %} -{% endif %} - {% block primary_content %} - {% if g.ckan_base_version.startswith('2.0') %} - {# CKAN 2.0 #} - - {% include 'source/search_2.0.html' %} - - {% else %} - {# > CKAN 2.0 #} -
    {% block page_primary_action %} @@ -56,14 +40,12 @@ {{ c.page.pager(q=c.q) }}
    - {% endif %} - -{% endblock %} + {% endblock %} {% block secondary_content %} {% for facet in c.facet_titles %} - {{ h.snippet('snippets/facet_list.html', title=c.facet_titles[facet], name=facet, alternative_url=h.url_for('{0}_search'.format(c.dataset_type))) }} + {{ h.snippet('snippets/facet_list.html', title=c.facet_titles[facet], name=facet, alternative_url=h.url_for('{0}.search'.format(c.dataset_type))) }} {% endfor %} {% endblock %} diff --git a/ckanext/harvest/tests/harvesters/mock_ckan.py b/ckanext/harvest/tests/harvesters/mock_ckan.py index 1d4d6f4d9..af4e3b515 100644 --- a/ckanext/harvest/tests/harvesters/mock_ckan.py +++ b/ckanext/harvest/tests/harvesters/mock_ckan.py @@ -3,17 +3,12 @@ import json import re import copy -import six -from six.moves.urllib.parse import unquote_plus +from urllib.parse import unquote_plus from threading import Thread -if six.PY2: - from SimpleHTTPServer import SimpleHTTPRequestHandler - from SocketServer import TCPServer -else: - from http.server import SimpleHTTPRequestHandler - from socketserver import TCPServer +from http.server import SimpleHTTPRequestHandler +from socketserver import TCPServer PORT = 8998 @@ -171,10 +166,7 @@ def get_org(self, org_ref): def get_url_params(self): params_str = self.path.split('?')[-1] - if six.PY2: - params_unicode = unquote_plus(params_str).decode('utf8') - else: - params_unicode = unquote_plus(params_str) + params_unicode = unquote_plus(params_str) params = params_unicode.split('&') return dict([param.split('=') for param in params]) diff --git a/ckanext/harvest/tests/test_blueprint.py b/ckanext/harvest/tests/test_blueprint.py index dbbca7d3f..a9141da8e 100644 --- a/ckanext/harvest/tests/test_blueprint.py +++ b/ckanext/harvest/tests/test_blueprint.py @@ -1,4 +1,3 @@ -import six import pytest from ckantoolkit import url_for @@ -6,13 +5,6 @@ from ckanext.harvest.tests import factories as harvest_factories -def _assert_in_body(string, response): - if six.PY2: - assert string in response.body.decode('utf8') - else: - assert string in response.body - - @pytest.mark.usefixtures('clean_db', 'clean_index', 'harvest_setup') class TestBlueprint(): @@ -23,42 +15,42 @@ def test_index_page_is_rendered(self, app): response = app.get(u'/harvest') - _assert_in_body(source1['title'], response) - _assert_in_body(source2['title'], response) + assert source1['title'] in response.body + assert source2['title'] in response.body def test_new_form_is_rendered(self, app): - url = url_for('harvest_new') + url = url_for('harvest.new') sysadmin = factories.Sysadmin() env = {"REMOTE_USER": sysadmin['name'].encode('ascii')} response = app.get(url, extra_environ=env) - _assert_in_body('\n' + content - except xml_parser_exception: + except etree.ParseError: try: json.loads(obj['content']) response.content_type = 'application/json; charset=utf-8' @@ -757,7 +746,11 @@ def object_show_view(id, ref_type, response): pass response.headers['Content-Length'] = len(content) - return (response, six.ensure_str(content)) + + if isinstance(content, bytes): + content = content.decode("utf-8") + + return (response, content) except tk.ObjectNotFound as e: return tk.abort(404, _(str(e))) diff --git a/ckanext/harvest/views.py b/ckanext/harvest/views.py index 6ac8eec5a..9185b0624 100644 --- a/ckanext/harvest/views.py +++ b/ckanext/harvest/views.py @@ -5,6 +5,8 @@ import ckanext.harvest.utils as utils +# IDatasetForm provides a "harvest" blueprint for the package type harvest. +# We name the extension blueprint "harvester" to avoid clashing of names. harvester = Blueprint("harvester", __name__) diff --git a/requirements.txt b/requirements.txt index 754dfbf10..93611a3c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,4 @@ ckantoolkit>=0.0.7 pika>=1.1.0,<1.3.0 -enum34; python_version < '3.0' # Required by pika redis requests>=2.11.1 -six>=1.12.0