diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 8425952e..98db1d0a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,18 +8,14 @@ version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-22.04 - apt_packages: - - libmagic-dev tools: - python: "3.10" + python: "3.12" -sphinx: - configuration: doc/conf.py +mkdocs: + configuration: mkdocs.yml formats: all python: install: - - method: pip - path: . - - requirements: doc-requirements.txt + - requirements: docs/requirements.txt diff --git a/doc-requirements.txt b/doc-requirements.txt deleted file mode 100644 index 59709ec7..00000000 --- a/doc-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -sphinx-rtd-theme==2.0.0 diff --git a/doc/_assets/ckan.ico b/doc/_assets/ckan.ico new file mode 100644 index 00000000..0d9295c7 Binary files /dev/null and b/doc/_assets/ckan.ico differ diff --git a/doc/_assets/logo.png b/doc/_assets/logo.png new file mode 100644 index 00000000..3f631bfe Binary files /dev/null and b/doc/_assets/logo.png differ diff --git a/doc/_css/extra.css b/doc/_css/extra.css new file mode 100644 index 00000000..aa607b5a --- /dev/null +++ b/doc/_css/extra.css @@ -0,0 +1,27 @@ +[data-md-color-scheme="ckan"] { + --md-primary-fg-color: #2980b9; + --md-primary-fg-color--light: #ECB7B7; + --md-primary-fg-color--dark: #90030C; +} + +[data-md-color-scheme="slate"] { + --md-primary-fg-color: #2980b9; + --md-primary-fg-color--light: #ECB7B7; + --md-primary-fg-color--dark: #90030C; + --md-hue: 210; +} + + +[data-md-toggle="search"]:not(:checked) ~ .md-header .md-search__form::after { + position: absolute; + top: .3rem; + right: .3rem; + display: block; + padding: .1rem .4rem; + color: var(--md-default-bg-color); + font-weight: bold; + font-size: .8rem; + border: .05rem solid var(--md-default-bg-color--lighter); + border-radius: .1rem; + content: "/"; +} diff --git a/doc/_static/index.png b/doc/_static/index.png new file mode 100644 index 00000000..07e01b07 Binary files /dev/null and b/doc/_static/index.png differ diff --git a/doc/_templates/footer.html b/doc/_templates/footer.html deleted file mode 100644 index d457c8b0..00000000 --- a/doc/_templates/footer.html +++ /dev/null @@ -1,22 +0,0 @@ -
- -
- -

An Open Knowledge project.

- - - -

- Source - — - Issues - — - Twitter @CKANProject -

- -
- diff --git a/doc/changelog.md b/doc/changelog.md new file mode 100644 index 00000000..786b75d5 --- /dev/null +++ b/doc/changelog.md @@ -0,0 +1 @@ +--8<-- "CHANGELOG.md" diff --git a/doc/conf.py b/doc/conf.py deleted file mode 100644 index ebe362d2..00000000 --- a/doc/conf.py +++ /dev/null @@ -1,263 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import datetime -from ckanext.spatial import __version__ - -# -# ckanext-spatial documentation build configuration file, created by -# sphinx-quickstart on Wed Apr 10 17:17:12 2013. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration ----------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.intersphinx'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'ckanext-spatial' -copyright = u'© 2011-{} Open Knowledge Foundation and contributors.'.format(datetime.datetime.utcnow().year) - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = __version__ -# The full version, including alpha/beta/rc tags. -release = __version__ - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - -exclude_trees = ['.build'] - - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinx_rtd_theme' - -#html_theme = 'default' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] -html_sidebars = { - '**': ['globaltoc.html'] -} - - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'ckanext-spatialdoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'ckanext-spatial.tex', u'ckanext-spatial Documentation', - u'Open Knowledge Foundation', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'ckanext-spatial', u'ckanext-spatial Documentation', - [u'Open Knowledge Foundation'], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------------ - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ('index', 'ckanext-spatial', u'ckanext-spatial Documentation', - u'Open Knowledge Foundation', 'ckanext-spatial', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False - - -# Example configuration for intersphinx: refer to the Python standard library. -#intersphinx_mapping = {'http://docs.python.org/': None} diff --git a/doc/csw.md b/doc/csw.md new file mode 100644 index 00000000..0970f2fb --- /dev/null +++ b/doc/csw.md @@ -0,0 +1,206 @@ +# CSW support + +The extension provides the support for the +[CSW](http://www.opengeospatial.org/standards/cat) standard, a +specification from the Open Geospatial Consortium for exposing +geospatial catalogues over the web. + +This support consists of: + +- Ability to import records from CSW servers with the CSW harvester. + See `harvesters`{.interpreted-text role="doc"} for more details. +- Integration with [pycsw](http://pycsw.org) to provide a fully + compliant CSW interface for harvested records. This integration is + described in the following sections. + +## ckan-pycsw + +The spatial extension offers the `ckan-pycsw` command, which allows to +expose the spatial datasets harvested from other sources in a CSW +interface. This is powered by [pycsw](http://pycsw.org), which fully +implements the OGC CSW specification. + +### How it works + +The current implementation is based on CKAN and pycsw being loosely +integrated via the CKAN API. pycsw will be generally installed in the +same server as CKAN (although it can also be run on a separate one), and +the synchronization command will be run regularly to keep the records on +the pycsw repository up to date. This is done using the CKAN API to get +all the datasets identifiers (more precisely the ones from datasets that +have been harvested) and then deciding which ones need to be created, +updated or deleted on the pycsw repository. For those that need to be +created or updated, the original harvested spatial document (ie ISO +19139) is requested from CKAN, and it is then imported using pycsw +internal functions: + + Harvested + datasets + + + | + v + +--------+ +---------+ + | | CKAN API | | + | CKAN | +------------> | pycsw | +------> CSW + | | | | + +--------+ +---------+ + +Remember, only datasets that were harvested with the +`harvesters`{.interpreted-text role="doc"} can currently be exposed via +pycsw. + +All necessary tasks are done with the `ckan-pycsw` command. To get more +details of its usage, run the following: + + cd /usr/lib/ckan/default/src/ckanext-spatial + python bin/ckan_pycsw.py --help + +### Setup + +1. Install pycsw. There are several options for this, depending on your + server setup, check the [pycsw + documentation](http://docs.pycsw.org/en/latest/installation.html). + + !!! Note + CKAN integration requires least pycsw version 1.8.0. In general, use + the latest stable version. + + The following instructions assume that you have installed CKAN via a + [package + install](http://docs.ckan.org/en/latest/install-from-package.html) + and should be run as root, but the steps are the same if you are + setting it up in another location: + + cd /usr/lib/ckan/default/src + source ../bin/activate + + # From now on the virtualenv should be activated + + git clone https://github.com/geopython/pycsw.git + cd pycsw + # always use the latest stable version + git checkout 1.10.4 + pip install -e . + python setup.py build + python setup.py install + +2. Create a database for pycsw. In theory you can use the same database + that CKAN is using, but if you want to keep them separated, use the + following command to create a new one (we\'ll use the same default + user though): + + sudo -u postgres createdb -O ckan_default pycsw -E utf-8 + + It is strongly recommended that you install PostGIS in the pycsw + database, so its spatial functions are used. + +3. Configure pycsw. An example configuration file is included on the + source: + + cp default-sample.cfg default.cfg + + To keep things tidy we will create a symlink to this file on the + CKAN configuration directory: + + ln -s /usr/lib/ckan/default/src/pycsw/default.cfg /etc/ckan/default/pycsw.cfg + + Open the file with your favourite editor. The main settings you + should tweak are `server.home` and `repository.database`: + + [server] + home=/usr/lib/ckan/default/src/pycsw + ... + [repository] + database=postgresql://ckan_default:pass@localhost/pycsw + + The rest of the options are described + [here](http://docs.pycsw.org/en/latest/configuration.html). + +4. Setup the pycsw table. This is done with the `ckan-pycsw` script + (Remember to have the virtualenv activated when running it): + + cd /usr/lib/ckan/default/src/ckanext-spatial + python bin/ckan_pycsw.py setup -p /etc/ckan/default/pycsw.cfg + + At this point you should be ready to run pycsw with the wsgi script + that it includes: + + cd /usr/lib/ckan/default/src/pycsw + python csw.wsgi + + This will run pycsw at . Visiting the + following URL should return you the Capabilities file: + + + +5. Load the CKAN datasets into pycsw. Again, we will use the + `ckan-pycsw` command for this: + + cd /usr/lib/ckan/default/src/ckanext-spatial + python bin/ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg + + When the loading is finished, check that results are returned when + visiting this link: + + + + The `numberOfRecordsMatched` should match the number of harvested + datasets in CKAN (minus import errors). If you run the command again + new or udpated datasets will be synchronized and deleted datasets + from CKAN will be removed from pycsw as well. + +### Setting Service Metadata Keywords + +The CSW standard allows for administrators to set CSW service metadata. +These values can be set in the pycsw configuration `metadata:main` +section. If you would like the CSW service metadata keywords to be +reflective of the CKAN tags, run the following convenience command: + + python ckan_pycsw.py set_keywords -p /etc/ckan/default/pycsw.cfg + +Note that you must have privileges to write to the pycsw configuration +file. + +### Running it on production site + +On a production site you probably want to run the load command regularly +to keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi +like CKAN. + +- To run the load command regularly you can set up a cron job. Type + `crontab -e` and copy the following lines: + + # m h dom mon dow command + 0 * * * * /var/lib/ckan/default/bin/python /var/lib/ckan/default/src/ckanext-spatial/bin/ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg + + This particular example will run the load command every hour. You + can of course modify this periodicity, for instance reducing it for + huge instances. This [Wikipedia + page](http://en.wikipedia.org/wiki/Cron#CRON_expression) has a good + overview of the crontab syntax. + +- To run pycsw under Apache check the pycsw [installation + documentation](http://docs.pycsw.org/en/latest/installation.html#running-on-wsgi) + or follow these quick steps (they assume the paths used in previous + steps): + + - Edit `/etc/apache2/sites-available/ckan_default` and add the + following line just before the existing `WSGIScriptAlias` + directive: + + WSGIScriptAlias /csw /usr/lib/ckan/default/src/pycsw/csw.wsgi + + - Edit the `/usr/lib/ckan/default/src/pycsw/csw.wsgi` file and add + these two lines just after the imports on the top of the file: + + activate_this = os.path.join('/usr/lib/ckan/default/bin/activate_this.py') + execfile(activate_this, {"__file__":activate_this}) + + We need these to activate the virtualenv where we installed + pycsw into. + + - Restart Apache: + + service apache2 restart + + pycsw should be now accessible at diff --git a/doc/csw.rst b/doc/csw.rst deleted file mode 100644 index d2e0e3ec..00000000 --- a/doc/csw.rst +++ /dev/null @@ -1,205 +0,0 @@ -=========== -CSW support -=========== - -The extension provides the support for the CSW_ standard, a specification from -the Open Geospatial Consortium for exposing geospatial catalogues over the web. - -This support consists of: - -* Ability to import records from CSW servers with the CSW harvester. See - :doc:`harvesters` for more details. - -* Integration with pycsw_ to provide a fully compliant CSW interface for - harvested records. This integration is described in the following sections. - - -ckan-pycsw ----------- - -The spatial extension offers the ``ckan-pycsw`` command, which allows to expose -the spatial datasets harvested from other sources in a CSW interface. This is -powered by pycsw_, which fully implements the OGC CSW specification. - -How it works -++++++++++++ - - -The current implementation is based on CKAN and pycsw being loosely integrated -via the CKAN API. pycsw will be generally installed in the same server as CKAN -(although it can also be run on a separate one), and the synchronization -command will be run regularly to keep the records on the pycsw repository up to -date. This is done using the CKAN API to get all the datasets identifiers (more -precisely the ones from datasets that have been harvested) and then deciding -which ones need to be created, updated or deleted on the pycsw repository. For -those that need to be created or updated, the original harvested spatial -document (ie ISO 19139) is requested from CKAN, and it is then imported using -pycsw internal functions:: - - Harvested - datasets - + - | - v - +--------+ +---------+ - | | CKAN API | | - | CKAN | +------------> | pycsw | +------> CSW - | | | | - +--------+ +---------+ - - -Remember, only datasets that were harvested with the :doc:`harvesters` -can currently be exposed via pycsw. - -All necessary tasks are done with the ``ckan-pycsw`` command. To get more -details of its usage, run the following:: - - cd /usr/lib/ckan/default/src/ckanext-spatial - python bin/ckan_pycsw.py --help - - -Setup -+++++ - -1. Install pycsw. There are several options for this, depending on your - server setup, check the `pycsw documentation`_. - - .. note:: CKAN integration requires least pycsw version 1.8.0. In general, - use the latest stable version. - - The following instructions assume that you have installed CKAN via a - `package install`_ and should be run as root, but the steps are the same if - you are setting it up in another location:: - - cd /usr/lib/ckan/default/src - source ../bin/activate - - # From now on the virtualenv should be activated - - git clone https://github.com/geopython/pycsw.git - cd pycsw - # always use the latest stable version - git checkout 1.10.4 - pip install -e . - python setup.py build - python setup.py install - -2. Create a database for pycsw. In theory you can use the same database that - CKAN is using, but if you want to keep them separated, use the following - command to create a new one (we'll use the same default user though):: - - sudo -u postgres createdb -O ckan_default pycsw -E utf-8 - - It is strongly recommended that you install PostGIS in the pycsw database, - so its spatial functions are used. - -3. Configure pycsw. An example configuration file is included on the source:: - - cp default-sample.cfg default.cfg - - To keep things tidy we will create a symlink to this file on the CKAN - configuration directory:: - - ln -s /usr/lib/ckan/default/src/pycsw/default.cfg /etc/ckan/default/pycsw.cfg - - Open the file with your favourite editor. The main settings you should tweak - are ``server.home`` and ``repository.database``:: - - [server] - home=/usr/lib/ckan/default/src/pycsw - ... - [repository] - database=postgresql://ckan_default:pass@localhost/pycsw - - The rest of the options are described `here `_. - -4. Setup the pycsw table. This is done with the ``ckan-pycsw`` script - (Remember to have the virtualenv activated when running it):: - - cd /usr/lib/ckan/default/src/ckanext-spatial - python bin/ckan_pycsw.py setup -p /etc/ckan/default/pycsw.cfg - - At this point you should be ready to run pycsw with the wsgi script that it - includes:: - - cd /usr/lib/ckan/default/src/pycsw - python csw.wsgi - - This will run pycsw at http://localhost:8000. Visiting the following URL - should return you the Capabilities file: - - http://localhost:8000/?service=CSW&version=2.0.2&request=GetCapabilities - -5. Load the CKAN datasets into pycsw. Again, we will use the ``ckan-pycsw`` - command for this:: - - cd /usr/lib/ckan/default/src/ckanext-spatial - python bin/ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg - - When the loading is finished, check that results are returned when visiting - this link: - - http://localhost:8000/?request=GetRecords&service=CSW&version=2.0.2&resultType=results&outputSchema=http://www.isotc211.org/2005/gmd&typeNames=csw:Record&elementSetName=summary - - The ``numberOfRecordsMatched`` should match the number of harvested datasets - in CKAN (minus import errors). If you run the command again new or udpated - datasets will be synchronized and deleted datasets from CKAN will be removed - from pycsw as well. - -Setting Service Metadata Keywords -+++++++++++++++++++++++++++++++++ - -The CSW standard allows for administrators to set CSW service metadata. These -values can be set in the pycsw configuration ``metadata:main`` section. If you -would like the CSW service metadata keywords to be reflective of the CKAN -tags, run the following convenience command:: - - python ckan_pycsw.py set_keywords -p /etc/ckan/default/pycsw.cfg - -Note that you must have privileges to write to the pycsw configuration file. - - -Running it on production site -+++++++++++++++++++++++++++++ - -On a production site you probably want to run the load command regularly to -keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi like CKAN. - -* To run the load command regularly you can set up a cron job. Type ``crontab -e`` - and copy the following lines:: - - # m h dom mon dow command - 0 * * * * /var/lib/ckan/default/bin/python /var/lib/ckan/default/src/ckanext-spatial/bin/ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg - - This particular example will run the load command every hour. You can of - course modify this periodicity, for instance reducing it for huge instances. - This `Wikipedia page `_ - has a good overview of the crontab syntax. - -* To run pycsw under Apache check the pycsw `installation documentation `_ - or follow these quick steps (they assume the paths used in previous steps): - - - Edit ``/etc/apache2/sites-available/ckan_default`` and add the following - line just before the existing ``WSGIScriptAlias`` directive:: - - WSGIScriptAlias /csw /usr/lib/ckan/default/src/pycsw/csw.wsgi - - - Edit the ``/usr/lib/ckan/default/src/pycsw/csw.wsgi`` file and add these two - lines just after the imports on the top of the file:: - - activate_this = os.path.join('/usr/lib/ckan/default/bin/activate_this.py') - execfile(activate_this, {"__file__":activate_this}) - - We need these to activate the virtualenv where we installed pycsw into. - - - Restart Apache:: - - service apache2 restart - - pycsw should be now accessible at http://localhost/csw - -.. _pycsw: http://pycsw.org -.. _pycsw documentation: http://docs.pycsw.org/en/latest/installation.html -.. _package install: http://docs.ckan.org/en/latest/install-from-package.html -.. _CSW: http://www.opengeospatial.org/standards/cat - diff --git a/doc/harvesters.md b/doc/harvesters.md new file mode 100644 index 00000000..e619c56c --- /dev/null +++ b/doc/harvesters.md @@ -0,0 +1,292 @@ +# Spatial Harvesters + +## Overview and Configuration + +The spatial extension provides some harvesters for importing +ISO19139-based metadata into CKAN, as well as providing a base class for +writing new ones. The harvesters use the interface provided by +[ckanext-harvest](https://github.com/okfn/ckanext-harvest), so you will +need to install and set it up first. + +Once ckanext-harvest is installed, you can add the following plugins to +your ini file to enable the different harvesters: + +- `csw_harvester` - CSW server +- `waf_harvester` - WAF (Web Accessible Folder): An online accessible + index page with links to metadata documents +- `doc_harvester` - A single online accessible metadata document. + +Have a look at the [ckanext-harvest +documentation](https://github.com/okfn/ckanext-harvest#the-harvesting-interface) +if you want to have an overview of how the CKAN harvesters work, but +basically there are three separate stages: + +- gather_stage - Aggregates all the remote identifiers for a + particular source (eg identifiers for a CSW server, files for a + WAF). +- fetch_stage - Fetches all the remote documents and stores them on + the database. +- import_stage - Performs all the processing for transforming the + remote content into a CKAN dataset: validates the document, parses + it, converts it to a CKAN dataset dict and saves it in the database. + +The extension provides different XSD and schematron based validators, +and you can also write your own (see [Writing custom +validators](#writing-custom-validators)). You can specify which +validators to use for the remote documents with the following +configuration option: + + ckan.spatial.validator.profiles = iso19193eden + +By default, the import stage will stop if the validation of the +harvested document fails. This can be modified setting the +`ckanext.spatial.harvest.continue_on_validation_errors` to True. The +setting can also be applied at the source level setting to True the +`continue_on_validation_errors` key on the source configuration object. + +By default the harvesting actions (eg creating or updating datasets) +will be performed by the internal site admin user. This is the +recommended setting, but if necessary, it can be overridden with the +`ckanext.spatial.harvest.user_name` config option, eg to support the old +hardcoded `harvest` user: + + ckanext.spatial.harvest.user_name = harvest + +When a document has not been updated remotely, the previous harvest +object is replaced by the current one rather than keeping it, to avoid +cluttering the `harvest_object` table. This means that the +`harvest_object_id` reference on the linked dataset needs to be updated, +by reindexing it. This will happen by default, but if you want to turn +it off (eg if you are doing separate reindexing) it can be turn off with +the following option: + + ckanext.spatial.harvest.reindex_unchanged = False + +You can configure the single harvesters using a JSON object in the +configuration form field. The currently supported configuration options +are: + +- `default_tags`: A list of tags that will be added to all harvested + datasets. Tags don\'t need to previously exist. This field takes a + list of strings. +- `default_extras`: A dictionary of key value pairs that will be added + to extras of the harvested datasets. +- `override_extras`: Assign default extras even if they already exist + in the remote dataset. Default is False (only non existing extras + are added). +- `clean_tags`: By default, tags are not stripped of accent + characters, spaces and capital letters for display. If this option + is set to True, accent characters will be replaced by their ascii + equivalents, capital letters replaced by lower-case ones, and spaces + replaced with dashes. Setting this option to False gives the same + effect as leaving it unset. +- `validator_profiles`: A list of string that specifies a list of + validators that will be applied to the current harvester, overriding + the global ones defined by the \'ckan.spatial.validator.profiles\' + option. + +## Customizing the harvesters + +The default harvesters provided in this extension can be extended from +extensions implementing the `ISpatialHarvester` interface. + +Probably the most useful extension point is `get_package_dict`, which +allows to tweak the dataset fields before creating or updating it: + +```python +import ckan.plugins as p +from ckanext.spatial.interfaces import ISpatialHarvester + +class MyPlugin(p.SingletonPlugin): + + p.implements(ISpatialHarvester, inherit=True) + + def get_package_dict(self, context, data_dict): + + # Check the reference below to see all that's included on data_dict + + package_dict = data_dict['package_dict'] + iso_values = data_dict['iso_values'] + + package_dict['extras'].append( + {'key': 'topic-category', 'value': iso_values.get('topic-category')} + ) + + package_dict['extras'].append( + {'key': 'my-custom-extra', 'value': 'my-custom-value'} + ) + + return package_dict +``` + +`get_validators` allows to register custom validation classes that can +be applied to the harvested documents. Check the [Writing custom +validators](#writing-custom-validators) section to know more about how +to write your custom validators: + +```python +import ckan.plugins as p +from ckanext.spatial.interfaces import ISpatialHarvester +from ckanext.spatial.validation.validation import BaseValidator + +class MyPlugin(p.SingletonPlugin): + + p.implements(ISpatialHarvester, inherit=True) + + def get_validators(self): + return [MyValidator] + + +class MyValidator(BaseValidator): + + name = 'my-validator' + + title= 'My very own validator' + + @classmethod + def is_valid(cls, xml): + + return True, [] +``` + +`transform_to_iso` allows to hook into transformation mechanisms to +transform other formats into ISO1939, the only one directly supported by +the spatial harvesters. + +The full reference for the extension points can be found in [`ckanext/spatial/interfaces.py`](https://github.com/ckan/ckanext-spatial/blob/master/ckanext/spatial/interfaces.py) + +If you need to further customize the default behaviour of the +harvesters, you can either extend `CswHarvester`, `WAFfHarverster` or +the main `SpatialHarvester` class., for instance to override the whole +`import_stage` if the default logic does not suit your needs. + +The +[ckanext-geodatagov](https://github.com/okfn/ckanext-geodatagov/blob/master/ckanext/geodatagov/harvesters/) +extension contains live examples on how to extend the default spatial +harvesters and create new ones for other spatial services like ArcGIS +REST APIs. + +## Writing custom validators + +Validator classes extend the `BaseValidator` class. + +Helper classes are provided for XSD and schematron based validation, and +completely custom logic can be also implemented. Here are some examples +of the most common types: + +- XSD based validators: + + class ISO19139NGDCSchema(XsdValidator): + ''' + XSD based validation for ISO 19139 documents. + + Uses XSD schema from the NOAA National Geophysical Data Center: + + http://ngdc.noaa.gov/metadata/published/xsd/ + + ''' + name = 'iso19139ngdc' + title = 'ISO19139 XSD Schema (NGDC)' + + @classmethod + def is_valid(cls, xml): + xsd_path = 'xml/iso19139ngdc' + + xsd_filepath = os.path.join(os.path.dirname(__file__), + xsd_path, 'schema.xsd') + return cls._is_valid(xml, xsd_filepath, 'NGDC Schema (schema.xsd)') + +- Schematron validators: + + class Gemini2Schematron(SchematronValidator): + name = 'gemini2' + title = 'GEMINI 2.1 Schematron 1.2' + + @classmethod + def get_schematrons(cls): + with resource_stream("ckanext.spatial", + "validation/xml/gemini2/gemini2-schematron-20110906-v1.2.sch") as schema: + return [cls.schematron(schema)] + +- Custom validators: + + class MinimalFGDCValidator(BaseValidator): + + name = 'fgdc_minimal' + title = 'FGDC Minimal Validation' + + _elements = [ + ('Identification Citation Title', '/metadata/idinfo/citation/citeinfo/title'), + ('Identification Citation Originator', '/metadata/idinfo/citation/citeinfo/origin'), + ('Identification Citation Publication Date', '/metadata/idinfo/citation/citeinfo/pubdate'), + ('Identification Description Abstract', '/metadata/idinfo/descript/abstract'), + ('Identification Spatial Domain West Bounding Coordinate', '/metadata/idinfo/spdom/bounding/westbc'), + ('Identification Spatial Domain East Bounding Coordinate', '/metadata/idinfo/spdom/bounding/eastbc'), + ('Identification Spatial Domain North Bounding Coordinate', '/metadata/idinfo/spdom/bounding/northbc'), + ('Identification Spatial Domain South Bounding Coordinate', '/metadata/idinfo/spdom/bounding/southbc'), + ('Metadata Reference Information Contact Address Type', '/metadata/metainfo/metc/cntinfo/cntaddr/addrtype'), + ('Metadata Reference Information Contact Address State', '/metadata/metainfo/metc/cntinfo/cntaddr/state'), + ] + + @classmethod + def is_valid(cls, xml): + + errors = [] + + for title, xpath in cls._elements: + element = xml.xpath(xpath) + if len(element) == 0 or not element[0].text: + errors.append(('Element not found: {0}'.format(title), None)) + if len(errors): + return False, errors + + return True, [] + +The +[validation.py](https://github.com/ckan/ckanext-spatial/blob/master/ckanext/spatial/validation/validation.py) +file included in the ckanext-spatial extension contains more examples of +the different types. + +Remember that after registering your own validators you must specify +them on the following configuration option: + + ckan.spatial.validator.profiles = iso19193eden,my-validator + +## Harvest Metadata API + +This plugin allows to access the actual harvested document via API +requests. It is enabled with the following plugin: + + ckan.plugins = spatial_harvest_metadata_api + +(It was previously known as `inspire_api`) + +To view the harvest objects (containing the harvested metadata) in the +web interface, these controller locations are added: + +- raw XML document: /harvest/object/{id} +- HTML representation: /harvest/object/{id}/html + +!!! Note + The old URLs are now deprecated and redirect to the previously + mentioned: + + - /api/2/rest/harvestobject/\/xml + - /api/2/rest/harvestobject/\/html + +For those harvest objects that have an original document (which was +transformed to ISO), this can be accessed via: + +- raw XML document: /harvest/object/{id}/original +- HTML representation: /harvest/object/{id}/html/original + +The HTML representation is created via an XSLT transformation. The +extension provides an XSLT file that should work on ISO 19139 based +documents, but if you want to use your own on your extension, you can +override it using the following configuration options: + + ckanext.spatial.harvest.xslt_html_content = ckanext.myext:templates/xslt/custom.xslt + ckanext.spatial.harvest.xslt_html_content_original = ckanext.myext:templates/xslt/custom2.xslt + +If your project does not transform different metadata types you can +ignore the second option. diff --git a/doc/harvesters.rst b/doc/harvesters.rst deleted file mode 100644 index c785b036..00000000 --- a/doc/harvesters.rst +++ /dev/null @@ -1,322 +0,0 @@ -================== -Spatial Harvesters -================== - -Overview and Configuration --------------------------- - -The spatial extension provides some harvesters for importing ISO19139-based -metadata into CKAN, as well as providing a base class for writing new ones. -The harvesters use the interface provided by ckanext-harvest_, so you will need -to install and set it up first. - -Once ckanext-harvest is installed, you can add the following plugins to your -ini file to enable the different harvesters (If you are upgrading from a -previous version to CKAN 2.0 see legacy_harvesters_): - -* ``csw_harvester`` - CSW server -* ``waf_harvester`` - WAF (Web Accessible Folder): An online accessible index - page with links to metadata documents -* ``doc_harvester`` - A single online accessible metadata document. - -Have a look at the `ckanext-harvest documentation`_ if you want to have an -overview of how the CKAN harvesters work, but basically there are three -separate stages: - -* gather_stage - Aggregates all the remote identifiers for a particular source - (eg identifiers for a CSW server, files for a WAF). -* fetch_stage - Fetches all the remote documents and stores them on the - database. -* import_stage - Performs all the processing for transforming the remote - content into a CKAN dataset: validates the document, parses it, converts it - to a CKAN dataset dict and saves it in the database. - -The extension provides different XSD and schematron based validators, and you -can also write your own (see `Writing custom validators`_). You can -specify which validators to use for the remote documents with the following -configuration option:: - - ckan.spatial.validator.profiles = iso19193eden - -By default, the import stage will stop if the validation of the harvested -document fails. This can be modified setting the -``ckanext.spatial.harvest.continue_on_validation_errors`` to True. The setting -can also be applied at the source level setting to True the -``continue_on_validation_errors`` key on the source configuration object. - -By default the harvesting actions (eg creating or updating datasets) will be -performed by the internal site admin user. This is the recommended setting, -but if necessary, it can be overridden with the -``ckanext.spatial.harvest.user_name`` config option, eg to support the old -hardcoded 'harvest' user:: - - ckanext.spatial.harvest.user_name = harvest - -When a document has not been updated remotely, the previous harvest object is -replaced by the current one rather than keeping it, to avoid cluttering the -``harvest_object`` table. This means that the ``harvest_object_id`` reference -on the linked dataset needs to be updated, by reindexing it. This will happen -by default, but if you want to turn it off (eg if you are doing separate -reindexing) it can be turn off with the following option:: - - ckanext.spatial.harvest.reindex_unchanged = False - -You can configure the single harvesters using a JSON object in the configuration form field. -The currently supported configuration options are: - -* ``default_tags``: A list of tags that will be added to all harvested datasets. - Tags don't need to previously exist. This field takes a list of strings. -* ``default_extras``: A dictionary of key value pairs that will be added to extras of the harvested datasets. -* ``override_extras``: Assign default extras even if they already exist in the remote dataset. - Default is False (only non existing extras are added). -* ``clean_tags``: By default, tags are not stripped of accent characters, spaces and - capital letters for display. If this option is set to True, accent characters will - be replaced by their ascii equivalents, capital letters replaced by lower-case ones, - and spaces replaced with dashes. Setting this option to False gives the same effect as leaving it unset. -* ``validator_profiles``: A list of string that specifies a list of validators that will be applied to the - current harvester, overriding the global ones defined by the 'ckan.spatial.validator.profiles' option. - - -Customizing the harvesters --------------------------- - -The default harvesters provided in this extension can be extended from -extensions implementing the ``ISpatialHarvester`` interface. - -Probably the most useful extension point is ``get_package_dict``, which -allows to tweak the dataset fields before creating or updating it:: - - import ckan.plugins as p - from ckanext.spatial.interfaces import ISpatialHarvester - - class MyPlugin(p.SingletonPlugin): - - p.implements(ISpatialHarvester, inherit=True) - - def get_package_dict(self, context, data_dict): - - # Check the reference below to see all that's included on data_dict - - package_dict = data_dict['package_dict'] - iso_values = data_dict['iso_values'] - - package_dict['extras'].append( - {'key': 'topic-category', 'value': iso_values.get('topic-category')} - ) - - package_dict['extras'].append( - {'key': 'my-custom-extra', 'value': 'my-custom-value'} - ) - - return package_dict - -``get_validators`` allows to register custom validation classes that can be -applied to the harvested documents. Check the `Writing custom validators`_ -section to know more about how to write your custom validators:: - - import ckan.plugins as p - from ckanext.spatial.interfaces import ISpatialHarvester - from ckanext.spatial.validation.validation import BaseValidator - - class MyPlugin(p.SingletonPlugin): - - p.implements(ISpatialHarvester, inherit=True) - - def get_validators(self): - return [MyValidator] - - - class MyValidator(BaseValidator): - - name = 'my-validator' - - title= 'My very own validator' - - @classmethod - def is_valid(cls, xml): - - return True, [] - - -``transform_to_iso`` allows to hook into transformation mechanisms to -transform other formats into ISO1939, the only one directly supported by -the spatial harvesters. - -Here is the full reference for the provided extension points: - -.. autoclass:: ckanext.spatial.interfaces.ISpatialHarvester - :members: - -If you need to further customize the default behaviour of the harvesters, you -can either extend ``CswHarvester``, ``WAFfHarverster`` or the main -``SpatialHarvester`` class., for instance to override the whole -``import_stage`` if the default logic does not suit your -needs. - -The `ckanext-geodatagov`_ extension contains live examples on how to extend -the default spatial harvesters and create new ones for other spatial services -like ArcGIS REST APIs. - -Writing custom validators -------------------------- - - -Validator classes extend the ``BaseValidator`` class: - -.. autoclass:: ckanext.spatial.validation.validation.BaseValidator - :members: - -Helper classes are provided for XSD and schematron based validation, and -completely custom logic can be also implemented. Here are some examples of -the most common types: - -* XSD based validators:: - - class ISO19139NGDCSchema(XsdValidator): - ''' - XSD based validation for ISO 19139 documents. - - Uses XSD schema from the NOAA National Geophysical Data Center: - - http://ngdc.noaa.gov/metadata/published/xsd/ - - ''' - name = 'iso19139ngdc' - title = 'ISO19139 XSD Schema (NGDC)' - - @classmethod - def is_valid(cls, xml): - xsd_path = 'xml/iso19139ngdc' - - xsd_filepath = os.path.join(os.path.dirname(__file__), - xsd_path, 'schema.xsd') - return cls._is_valid(xml, xsd_filepath, 'NGDC Schema (schema.xsd)') - - - -* Schematron validators:: - - class Gemini2Schematron(SchematronValidator): - name = 'gemini2' - title = 'GEMINI 2.1 Schematron 1.2' - - @classmethod - def get_schematrons(cls): - with resource_stream("ckanext.spatial", - "validation/xml/gemini2/gemini2-schematron-20110906-v1.2.sch") as schema: - return [cls.schematron(schema)] - - -* Custom validators:: - - class MinimalFGDCValidator(BaseValidator): - - name = 'fgdc_minimal' - title = 'FGDC Minimal Validation' - - _elements = [ - ('Identification Citation Title', '/metadata/idinfo/citation/citeinfo/title'), - ('Identification Citation Originator', '/metadata/idinfo/citation/citeinfo/origin'), - ('Identification Citation Publication Date', '/metadata/idinfo/citation/citeinfo/pubdate'), - ('Identification Description Abstract', '/metadata/idinfo/descript/abstract'), - ('Identification Spatial Domain West Bounding Coordinate', '/metadata/idinfo/spdom/bounding/westbc'), - ('Identification Spatial Domain East Bounding Coordinate', '/metadata/idinfo/spdom/bounding/eastbc'), - ('Identification Spatial Domain North Bounding Coordinate', '/metadata/idinfo/spdom/bounding/northbc'), - ('Identification Spatial Domain South Bounding Coordinate', '/metadata/idinfo/spdom/bounding/southbc'), - ('Metadata Reference Information Contact Address Type', '/metadata/metainfo/metc/cntinfo/cntaddr/addrtype'), - ('Metadata Reference Information Contact Address State', '/metadata/metainfo/metc/cntinfo/cntaddr/state'), - ] - - @classmethod - def is_valid(cls, xml): - - errors = [] - - for title, xpath in cls._elements: - element = xml.xpath(xpath) - if len(element) == 0 or not element[0].text: - errors.append(('Element not found: {0}'.format(title), None)) - if len(errors): - return False, errors - - return True, [] - - -The `validation.py`_ file included in the ckanext-spatial extension contains -more examples of the different types. - -Remember that after registering your own validators you must specify them on -the following configuration option:: - - ckan.spatial.validator.profiles = iso19193eden,my-validator - - -.. _validation.py: https://github.com/ckan/ckanext-spatial/blob/master/ckanext/spatial/validation/validation.py - -Harvest Metadata API --------------------- - -This plugin allows to access the actual harvested document via API requests. -It is enabled with the following plugin:: - - ckan.plugins = spatial_harvest_metadata_api - -(It was previously known as ``inspire_api``) - -To view the harvest objects (containing the harvested metadata) in the web -interface, these controller locations are added: - -* raw XML document: /harvest/object/{id} -* HTML representation: /harvest/object/{id}/html - -.. note:: The old URLs are now deprecated and redirect to the previously - mentioned: - - * /api/2/rest/harvestobject//xml - * /api/2/rest/harvestobject//html - - -For those harvest objects that have an original document (which was transformed -to ISO), this can be accessed via: - -* raw XML document: /harvest/object/{id}/original -* HTML representation: /harvest/object/{id}/html/original - -The HTML representation is created via an XSLT transformation. The extension -provides an XSLT file that should work on ISO 19139 based documents, but if you -want to use your own on your extension, you can override it using the following -configuration options:: - - ckanext.spatial.harvest.xslt_html_content = ckanext.myext:templates/xslt/custom.xslt - ckanext.spatial.harvest.xslt_html_content_original = ckanext.myext:templates/xslt/custom2.xslt - -If your project does not transform different metadata types you can ignore the -second option. - -.. _legacy_harvesters: - -Legacy harvesters ------------------ - -Prior to CKAN 2.0, the spatial harvesters available on this extension were -based on the GEMINI2 format, an ISO19139 profile used by the UK Location -Programme, and the logic for creating or updating datasets and the resulting -fields were somehow adapted to the needs for this particular project. The -harvesters were still generic enough and should work fine with other ISO19139 -based sources, but extra care has been put to make the new harvesters more -generic and robust, so these ones should only be used on existing instances: - -* ``gemini_csw_harvester`` -* ``gemini_waf_harvester`` -* ``gemini_doc_harvester`` - -If you are using these harvesters please consider upgrading to the new -versions described on the previous section. - - -.. todo:: Validation library details - - -.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest -.. _ckanext-harvest documentation: https://github.com/okfn/ckanext-harvest#the-harvesting-interface -.. _ckanext-geodatagov: https://github.com/okfn/ckanext-geodatagov/blob/master/ckanext/geodatagov/harvesters/ diff --git a/doc/index.md b/doc/index.md new file mode 100644 index 00000000..723b4cee --- /dev/null +++ b/doc/index.md @@ -0,0 +1,18 @@ +# ckanext-spatial - Geo related plugins for CKAN + +[![Tests](https://github.com/ckan/ckanext-spatial/workflows/Tests/badge.svg?branch=master)](https://github.com/ckan/ckanext-spatial/actions) + +This extension contains plugins that add geospatial capabilities to +[CKAN](http://ckan.org). + +You should have a CKAN instance installed before adding these plugins. +Head to the [CKAN documentation](http://docs.ckan.org) for information +on how to set up CKAN. + +The extension allows to perform spatial queries and display the dataset +extent on the frontend. It also provides harvesters to import geospatial +metadata into CKAN from other sources, as well as commands to support +the OGC CSW standard via [pycsw](http://pycsw.org). + + +![Spatial search](_static/index.png) diff --git a/doc/index.rst b/doc/index.rst deleted file mode 100644 index 5fd0e767..00000000 --- a/doc/index.rst +++ /dev/null @@ -1,29 +0,0 @@ -============================================== -ckanext-spatial - Geo related plugins for CKAN -============================================== - -This extension contains plugins that add geospatial capabilities to CKAN_. - -You should have a CKAN instance installed before adding these plugins. Head to -the `CKAN documentation`_ for information on how to set up CKAN. - -The extension allows to perform spatial queries and display the dataset extent -on the frontend. It also provides harvesters to import geospatial metadata into -CKAN from other sources, as well as commands to support the OGC CSW standard via pycsw_. - -Contents: - -.. toctree:: - :maxdepth: 2 - - install - spatial-search - harvesters - csw - previews - map-widgets - -.. _CKAN: http://ckan.org -.. _CKAN Documentation: http://docs.ckan.org -.. _GeoJSON: http://geojson.org -.. _pycsw: http://pycsw.org diff --git a/doc/install.md b/doc/install.md new file mode 100644 index 00000000..923c5a2d --- /dev/null +++ b/doc/install.md @@ -0,0 +1,161 @@ +# Installation and Setup + +Check the [Troubleshooting](#troubleshooting) section if you get errors +at any stage. + +All commands assume an existing CKAN database named `ckan_default`. + +## Install the extension + +!!! Note + The package names and paths shown are the defaults on Ubuntu installs. + Adjust the package names and the paths if you are using a different + platform. + +1. Install some packages needed by the extension dependencies: + + sudo apt-get install python-dev libxml2-dev libxslt1-dev libgeos-c1 + +2. Activate your CKAN virtual environment, for example: + + . /usr/lib/ckan/default/bin/activate + +3. Install the ckanext-spatial Python package into your virtual + environment: + + pip install -e "git+https://github.com/ckan/ckanext-spatial.git#egg=ckanext-spatial" + +4. Install the rest of Python modules required by the extension: + + pip install -r /usr/lib/ckan/default/src/ckanext-spatial/requirements.txt + +5. Restart CKAN. For example if you\'ve deployed CKAN with Apache on + Ubuntu: + + sudo service apache2 reload + +To use the [spatial harvesters](harvesters.md), you will need to +install and configure the harvester extension: +[ckanext-harvest](https://github.com/okfn/ckanext-harvest). Follow the +install instructions on its documentation for details on how to set it +up. + +## Configuration + +Add the following plugins to the `ckan.plugins` directive in the CKAN +ini file: + + ckan.plugins = spatial_metadata spatial_query + +## Troubleshooting + +Here are some common problems you may find when installing or using the +extension: + +### When upgrading the extension to a newer version + +#### ckan.plugins.core.PluginNotFoundException: geojson_view + + File "/home/pyenvs/spatial/src/ckan/ckan/plugins/core.py", line 149, in load + service = _get_service(plugin) + File "/home/pyenvs/spatial/src/ckan/ckan/plugins/core.py", line 256, in _get_service + raise PluginNotFoundException(plugin_name) + ckan.plugins.core.PluginNotFoundException: geojson_view + +Your CKAN instance is using the `geojson_view` (or `geojson_preview`) +plugin. This plugin has been moved from ckanext-spatial to +[ckanext-geoview](https://github.com/ckan/ckanext-geoview). Please +install ckanext-geoview following the instructions on the README. + +#### TemplateNotFound: Template dataviewer/geojson.html cannot be found + + File '/home/pyenvs/spatial/src/ckan/ckan/lib/base.py', line 129 in render_template + template_path, template_type = render_.template_info(template_name) + File '/home/pyenvs/spatial/src/ckan/ckan/lib/render.py', line 51 in template_info + raise TemplateNotFound('Template %s cannot be found' % template_name) + TemplateNotFound: Template dataviewer/geojson.html cannot be found + +See the issue above for details. Install +[ckanext-geoview](https://github.com/ckan/ckanext-geoview) and +additionally run the following on the ckanext-spatial directory with +your virtualenv activated: + + pip install -e . + +#### ImportError: No module named nongeos_plugin + + File "/home/pyenvs/spatial/src/ckan/ckan/plugins/core.py", line 255, in _get_service + return plugin.load()(name=plugin_name) + File "/home/pyenvs/spatial/local/lib/python2.7/site-packages/pkg_resources.py", line 2147, in load + ['__name__']) + ImportError: No module named nongeos_plugin + +See the issue above for details. Install +[ckanext-geoview](https://github.com/ckan/ckanext-geoview) and +additionally run the following on the ckanext-spatial directory with +your virtualenv activated: + + pip install -e . + +#### Plugin class \'GeoJSONPreview\' does not implement an interface + + File "/home/pyenvs/spatial/src/ckanext-spatial/ckanext/spatial/nongeos_plugin.py", line 175, in + class GeoJSONPreview(GeoJSONView): + File "/home/pyenvs/spatial/local/lib/python2.7/site-packages/pyutilib/component/core/core.py", line 732, in __new__ + return PluginMeta.__new__(cls, name, bases, d) + File "/home/pyenvs/spatial/local/lib/python2.7/site-packages/pyutilib/component/core/core.py", line 659, in __new__ + raise PluginError("Plugin class %r does not implement an interface, and it has already been defined in environment '%r'." % (str(name), PluginGlobals.env().name)) + pyutilib.component.core.core.PluginError: Plugin class 'GeoJSONPreview' does not implement an interface, and it has already been defined in environment ''pca'' + +You have correctly installed +[ckanext-geoview](https://github.com/ckan/ckanext-geoview) but the +ckanext-spatial source code is outdated, with references to the view +plugins previously part of this extension. Pull the latest version of +the code and re-register the extension. With the virtualenv CKAN is +installed on activated, run: + + git pull + pip install -e . + +### When running the spatial harvesters + + File "xmlschema.pxi", line 102, in lxml.etree.XMLSchema.__init__ (src/lxml/lxml.etree.c:154475) + lxml.etree.XMLSchemaParseError: local list type: A type, derived by list or union, must have the simple ur-type definition as base type, not '{http://www.opengis.net/gml}doubleList'., line 1 + +The XSD validation used by the spatial harvesters requires libxml2 +version 2.9. + +With CKAN you would probably have installed an older version from your +distribution. (e.g. with `sudo apt-get install libxml2-dev`). You need +to find the SO files for the old version: + + $ find /usr -name "libxml2.so" + +For example, it may show it here: +`/usr/lib/x86_64-linux-gnu/libxml2.so`. The directory of the SO file is +used as a parameter to the `configure` next on. + +Download the libxml2 source: + + $ cd ~ + $ wget ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz + +Unzip it: + + $ tar zxvf libxml2-2.9.0.tar.gz + $ cd libxml2-2.9.0/ + +Configure with the SO directory you found before: + + $ ./configure --libdir=/usr/lib/x86_64-linux-gnu + +Now make it and install it: + + $ make + $ sudo make install + +Now check the install by running xmllint: + + $ xmllint --version + xmllint: using libxml version 20900 + compiled with: Threads Tree Output Push Reader Patterns Writer SAXv1 FTP HTTP DTDValid HTML Legacy C14N Catalog XPath XPointer XInclude Iconv ISO8859X Unicode Regexps Automata Expr Schemas Schematron Modules Debug Zlib diff --git a/doc/install.rst b/doc/install.rst deleted file mode 100644 index 2f6bd778..00000000 --- a/doc/install.rst +++ /dev/null @@ -1,179 +0,0 @@ -====================== -Installation and Setup -====================== - -Check the Troubleshooting_ section if you get errors at any stage. - -.. warning:: If you are looking for the geospatial preview plugins to render (eg GeoJSON - or WMS services), these are now located in ckanext-geoview_. They have a much simpler - installation, so you can skip all the following steps if you just want the previews. - -All commands assume an existing CKAN database named ``ckan_default``. - - -Install the extension ---------------------- - -.. note:: The package names and paths shown are the defaults on Ubuntu installs. - Adjust the package names and the paths if you are using a different platform. - -#. Install some packages needed by the extension dependencies:: - - sudo apt-get install python-dev libxml2-dev libxslt1-dev libgeos-c1 - -#. Activate your CKAN virtual environment, for example:: - - . /usr/lib/ckan/default/bin/activate - -#. Install the ckanext-spatial Python package into your virtual environment:: - - pip install -e "git+https://github.com/ckan/ckanext-spatial.git#egg=ckanext-spatial" - -#. Install the rest of Python modules required by the extension:: - - pip install -r /usr/lib/ckan/default/src/ckanext-spatial/requirements.txt - -#. Restart CKAN. For example if you've deployed CKAN with Apache on Ubuntu:: - - sudo service apache2 reload - -To use the :doc:`harvesters`, you will need to install and configure the -harvester extension: `ckanext-harvest`_. Follow the install instructions on -its documentation for details on how to set it up. - - -Configuration -------------- - - -Add the following plugins to the ``ckan.plugins`` directive in the -CKAN ini file:: - - ckan.plugins = spatial_metadata spatial_query - - -Troubleshooting ---------------- - -Here are some common problems you may find when installing or using the -extension: - -When upgrading the extension to a newer version -+++++++++++++++++++++++++++++++++++++++++++++++ - -ckan.plugins.core.PluginNotFoundException: geojson_view -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - File "/home/pyenvs/spatial/src/ckan/ckan/plugins/core.py", line 149, in load - service = _get_service(plugin) - File "/home/pyenvs/spatial/src/ckan/ckan/plugins/core.py", line 256, in _get_service - raise PluginNotFoundException(plugin_name) - ckan.plugins.core.PluginNotFoundException: geojson_view - -Your CKAN instance is using the ``geojson_view`` (or ``geojson_preview``) plugin. This plugin has been -moved from ckanext-spatial to ckanext-geoview_. Please install ckanext-geoview following the instructions on the -README. - -TemplateNotFound: Template dataviewer/geojson.html cannot be found -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - File '/home/pyenvs/spatial/src/ckan/ckan/lib/base.py', line 129 in render_template - template_path, template_type = render_.template_info(template_name) - File '/home/pyenvs/spatial/src/ckan/ckan/lib/render.py', line 51 in template_info - raise TemplateNotFound('Template %s cannot be found' % template_name) - TemplateNotFound: Template dataviewer/geojson.html cannot be found - -See the issue above for details. Install ckanext-geoview_ and additionally run the following on the -ckanext-spatial directory with your virtualenv activated:: - - python setup.py develop - - -ImportError: No module named nongeos_plugin -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - File "/home/pyenvs/spatial/src/ckan/ckan/plugins/core.py", line 255, in _get_service - return plugin.load()(name=plugin_name) - File "/home/pyenvs/spatial/local/lib/python2.7/site-packages/pkg_resources.py", line 2147, in load - ['__name__']) - ImportError: No module named nongeos_plugin - -See the issue above for details. Install ckanext-geoview_ and additionally run the following on the -ckanext-spatial directory with your virtualenv activated:: - - python setup.py develop - - -Plugin class 'GeoJSONPreview' does not implement an interface -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - File "/home/pyenvs/spatial/src/ckanext-spatial/ckanext/spatial/nongeos_plugin.py", line 175, in - class GeoJSONPreview(GeoJSONView): - File "/home/pyenvs/spatial/local/lib/python2.7/site-packages/pyutilib/component/core/core.py", line 732, in __new__ - return PluginMeta.__new__(cls, name, bases, d) - File "/home/pyenvs/spatial/local/lib/python2.7/site-packages/pyutilib/component/core/core.py", line 659, in __new__ - raise PluginError("Plugin class %r does not implement an interface, and it has already been defined in environment '%r'." % (str(name), PluginGlobals.env().name)) - pyutilib.component.core.core.PluginError: Plugin class 'GeoJSONPreview' does not implement an interface, and it has already been defined in environment ''pca'' - -You have correctly installed ckanext-geoview_ but the ckanext-spatial source code is outdated, with references -to the view plugins previously part of this extension. Pull the latest version of the code and re-register the -extension. With the virtualenv CKAN is installed on activated, run:: - - git pull - python setup.py develop - -When running the spatial harvesters -+++++++++++++++++++++++++++++++++++ - -:: - - File "xmlschema.pxi", line 102, in lxml.etree.XMLSchema.__init__ (src/lxml/lxml.etree.c:154475) - lxml.etree.XMLSchemaParseError: local list type: A type, derived by list or union, must have the simple ur-type definition as base type, not '{http://www.opengis.net/gml}doubleList'., line 1 - -The XSD validation used by the spatial harvesters requires libxml2 version 2.9. - -With CKAN you would probably have installed an older version from your -distribution. (e.g. with ``sudo apt-get install libxml2-dev``). You need to -find the SO files for the old version:: - - $ find /usr -name "libxml2.so" - -For example, it may show it here: ``/usr/lib/x86_64-linux-gnu/libxml2.so``. -The directory of the SO file is used as a parameter to the ``configure`` next -on. - -Download the libxml2 source:: - - $ cd ~ - $ wget ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz - -Unzip it:: - - $ tar zxvf libxml2-2.9.0.tar.gz - $ cd libxml2-2.9.0/ - -Configure with the SO directory you found before:: - - $ ./configure --libdir=/usr/lib/x86_64-linux-gnu - -Now make it and install it:: - - $ make - $ sudo make install - -Now check the install by running xmllint:: - - $ xmllint --version - xmllint: using libxml version 20900 - compiled with: Threads Tree Output Push Reader Patterns Writer SAXv1 FTP HTTP DTDValid HTML Legacy C14N Catalog XPath XPointer XInclude Iconv ISO8859X Unicode Regexps Automata Expr Schemas Schematron Modules Debug Zlib - -.. _ckanext-harvest: https://github.com/okfn/ckanext-harvest -.. _ckanext-geoview: https://github.com/ckan/ckanext-geoview diff --git a/doc/map-widgets.md b/doc/map-widgets.md new file mode 100644 index 00000000..c159ff51 --- /dev/null +++ b/doc/map-widgets.md @@ -0,0 +1,129 @@ +# Common base layers for Map Widgets + +To provide a consistent look and feel and avoiding code duplication, the +map widgets (at least the ones based on [Leaflet](http://leafletjs.com)) +can use a common function to create the map. The base layer that the map +will use can be configured via configuration options. + +![image](_static/base-map-stamen.png) + +## Configuring the base layer + +The map widgets use the +[Leaflet-providers](https://github.com/leaflet-extras/leaflet-providers) +library to make easy to choose the base tile layer that the map widgets +will use. You can use any of the supported providers, which are listed +in the [preview +page](http://leaflet-extras.github.io/leaflet-providers/preview/index.html). + +!!! Note + As of October 2023, most if not all of the tile providers require at + least some form of registration and / or domain registering. They also + have terms of use and will most likely require proper attribution (which + should be handled automatically for you when choosing a provider). + +If you haven't configured a map provider you will see the following +notice in the map widgets: + +![image](_static/no-map-provider.png) + +The main configuration option to manage the base layer used is +`ckanext.spatial.common_map.type`. The value of this setting should be +one of the provider names supported by Leaflet-providers, e.g. +`Stadia.StamenTerrain`, `Stadia`, `MapBox`, `Herev3.terrainDay`, +`Esri.WorldImagery`, `USGS.USImagery` etc. Note that these values are +**case-sensitive**. + +Any additional configuration options required by Leaflet-providers +should be set prefixed with `ckanext.spatial.common_map.`, for instance +to configure the Stamen Terrain map that was used in previous versions +of ckanext-spatial: + + # Stadia / Stamen Terrain + ckanext.spatial.common_map.type = Stadia.StamenTerrain + ckanext.spatial.common_map.apikey = + +To use MapBox tiles: + + # MapBox + ckanext.spatial.common_map.type = MapBox + ckanext.spatial.common_map.mapbox.id = + ckanext.spatial.common_map.mapbox.accessToken = + +### Custom layers + +You can use any tileset that follows the [XYZ +convention](http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames) +using the `custom` type: + + ckanext.spatial.common_map.type = custom + +You will need to define the tileset URL using +`ckanext.spatial.common_map.custom_url`. This follows the [Leaflet URL +template](http://leafletjs.com/reference.html#url-template) format (ie +{s} for subdomains if any, {z} for zoom and {x} {y} for tile +coordinates). Additionally you can use +`ckanext.spatial.common_map.subdomains` and +`ckanext.spatial.common_map.attribution` if needed (this one will also +work for Leaflet-provider layers if you want to tweak the default +attribution). + +For example: + + ckanext.spatial.common_map.type = custom + ckanext.spatial.common_map.custom_url = https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x} + ckanext.spatial.common_map.attribution = Tiles courtesy of the U.S. Geological Survey + +### Old Stamen tiles + +Previous versions of ckanext-spatial defaulted to using the +[Stamen](http://maps.stamen.com/) terrain tiles as they not require +registration. These were deprecated and stopped working on October 2023. +If you see this error displayed in your map widgets, you need to +configure an alternative provider using the methods described in the +sections above: + +![image](_static/stamen-map-provider-error.png) + +## For developers + +To pass the base map configuration options to the relevant Javascript +module that will initialize the map widget, use the +`h.get_common_map_config()` helper function. This is available when +loading the `spatial_metadata` plugin. If you don't want to require +this plugin, create a new helper function that points to it to avoid +duplicating the names, which CKAN won't allow (see for instance how the +GeoJSON preview plugin does it). + +The function will return a dictionary with all configuration options +that relate to the common base layer (that's all that start with +`ckanext.spatial.common_map.`) + +You will need to dump the dict as JSON on the `data-module-map_config` +attribute (see for instance the `dataset_map_base.html` and +`spatial_query.html` snippets): + + {% set map_config = h.get_common_map_config() %} +
+
+
+ +Once at the Javascript module level, all Leaflet based map widgets +should use the `ckan.commonLeafletMap` constructor to initialize the +map. It accepts the following parameters: + +- `container`: HTML element or id of the map container +- `mapConfig`: (Optional) CKAN config related to the common base layer +- `leafletMapOptions`: (Optional) Options to pass to the Leaflet Map + constructor +- `leafletBaseLayerOptions`: (Optional) Options to pass to the Leaflet + TileLayer constructor + +Most of the times you will want to do something like this for a sidebar +map: + + var map = ckan.commonLeafletMap('dataset-map-container', this.options.map_config, {attributionControl: false}); + +And this for a primary content map: + + var map = ckan.commonLeafletMap('map', this.options.map_config); diff --git a/doc/map-widgets.rst b/doc/map-widgets.rst deleted file mode 100644 index fc67d851..00000000 --- a/doc/map-widgets.rst +++ /dev/null @@ -1,135 +0,0 @@ -Common base layers for Map Widgets -================================== - -To provide a consistent look and feel and avoiding code duplication, the map -widgets (at least the ones based on `Leaflet`_) can use a common function to -create the map. The base layer that the map will use can be configured via -configuration options. - -.. image:: _static/base-map-stamen.png - -Configuring the base layer --------------------------- - -The map widgets use the `Leaflet-providers`_ library to make easy to choose the -base tile layer that the map widgets will use. You can use any of the supported -providers, which are listed in the `preview page`_. - -.. note:: As of October 2023, most if not all of the tile providers require at - least some form of registration and / or domain registering. They also have - terms of use and will most likely require proper attribution (which should be - handled automatically for you when choosing a provider). - -If you haven't configured a map provider you will see the following notice in the -map widgets: - -.. image:: _static/no-map-provider.png - -The main configuration option to manage the base layer used is -``ckanext.spatial.common_map.type``. The value of this setting should be one of the -provider names supported by Leaflet-providers, e.g. ``Stadia.StamenTerrain``, ``Stadia``, -``MapBox``, ``Herev3.terrainDay``, ``Esri.WorldImagery``, ``USGS.USImagery`` etc. Note -that these values are **case-sensitive**. - -Any additional configuration options required by Leaflet-providers should be set prefixed -with ``ckanext.spatial.common_map.``, for instance to configure the Stamen Terrain map that -was used in previous versions of ckanext-spatial:: - - # Stadia / Stamen Terrain - ckanext.spatial.common_map.type = Stadia.StamenTerrain - ckanext.spatial.common_map.apikey = - -To use MapBox tiles:: - - # MapBox - ckanext.spatial.common_map.type = MapBox - ckanext.spatial.common_map.mapbox.id = - ckanext.spatial.common_map.mapbox.accessToken = - - -Custom layers -+++++++++++++ - -You can use any tileset that follows the `XYZ convention`_ using the ``custom`` -type:: - - ckanext.spatial.common_map.type = custom - -You will need to define the tileset URL using -``ckanext.spatial.common_map.custom_url``. This follows the `Leaflet URL -template`_ format (ie {s} for subdomains if any, {z} for zoom and {x} {y} for -tile coordinates). Additionally you can use -``ckanext.spatial.common_map.subdomains`` and -``ckanext.spatial.common_map.attribution`` if needed (this one will also work -for Leaflet-provider layers if you want to tweak the default attribution). - -For example:: - - ckanext.spatial.common_map.type = custom - ckanext.spatial.common_map.custom_url = https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x} - ckanext.spatial.common_map.attribution = Tiles courtesy of the U.S. Geological Survey - -Old Stamen tiles -++++++++++++++++ - -Previous versions of ckanext-spatial defaulted to using the `Stamen`_ terrain tiles as they -not require registration. These were deprecated and stopped working on October 2023. If you see -this error displayed in your map widgets, you need to configure an alternative provider using the -methods described in the sections above: - -.. image:: _static/stamen-map-provider-error.png - - - -For developers --------------- - -To pass the base map configuration options to the relevant Javascript module -that will initialize the map widget, use the ``h.get_common_map_config()`` -helper function. This is available when loading the ``spatial_metadata`` -plugin. If you don't want to require this plugin, create a new helper function -that points to it to avoid duplicating the names, which CKAN won't allow (see -for instance how the GeoJSON preview plugin does it). - -The function will return a dictionary with all configuration options that -relate to the common base layer (that's all that start with -``ckanext.spatial.common_map.``) - -You will need to dump the dict as JSON on the ``data-module-map_config`` -attribute (see for instance the ``dataset_map_base.html`` and -``spatial_query.html`` snippets):: - - {% set map_config = h.get_common_map_config() %} -
-
-
- -Once at the Javascript module level, all Leaflet based map widgets should use -the ``ckan.commonLeafletMap`` constructor to initialize the map. It accepts the -following parameters: - -* ``container``: HTML element or id of the map container -* ``mapConfig``: (Optional) CKAN config related to the common base layer -* ``leafletMapOptions``: (Optional) Options to pass to the Leaflet Map constructor -* ``leafletBaseLayerOptions``: (Optional) Options to pass to the Leaflet TileLayer - constructor - -Most of the times you will want to do something like this for a sidebar map:: - - var map = ckan.commonLeafletMap('dataset-map-container', this.options.map_config, {attributionControl: false}); - -And this for a primary content map:: - - var map = ckan.commonLeafletMap('map', this.options.map_config); - - -.. _Leaflet: http://leafletjs.com -.. _OpenStreetMap: http://openstreetmap.org -.. _`XYZ convention`: http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames -.. _MapBox: http://www.mapbox.com/ -.. _MapBox Studio: https://www.mapbox.com/mapbox-studio -.. _here: http://www.mapbox.com/developers/api-overview/ -.. _`Leaflet URL template`: http://leafletjs.com/reference.html#url-template -.. _Stamen: http://maps.stamen.com/ -.. _`Leaflet-providers`: https://github.com/leaflet-extras/leaflet-providers -.. _`preview page`: http://leaflet-extras.github.io/leaflet-providers/preview/index.html diff --git a/doc/previews.md b/doc/previews.md new file mode 100644 index 00000000..a15e132f --- /dev/null +++ b/doc/previews.md @@ -0,0 +1,8 @@ +# Previews for Spatial Formats + +!!! Note + The view plugins for rendering spatial formats can be found in + [ckanext-geoview](https://github.com/ckan/ckanext-geoview), which + contains view plugins based on [OpenLayers](http://openlayers.org) and + [Leaflet](http://leafletjs.org) to display several geospatial files and + services in CKAN. diff --git a/doc/previews.rst b/doc/previews.rst deleted file mode 100644 index 274e57b0..00000000 --- a/doc/previews.rst +++ /dev/null @@ -1,12 +0,0 @@ -============================ -Previews for Spatial Formats -============================ - -.. note:: The view plugins for rendering spatial formats have - been moved to ckanext-geoview_, which contains view plugins - based on OpenLayers_ and Leaflet_ to display several geospatial files - and services in CKAN. - -.. _Leaflet: http://leafletjs.org -.. _OpenLayers: http://openlayers.org -.. _ckanext-geoview: https://github.com/ckan/ckanext-geoview diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 00000000..4c8f017d --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1 @@ +mkdocs-material diff --git a/doc/spatial-search.md b/doc/spatial-search.md new file mode 100644 index 00000000..f3d76fcc --- /dev/null +++ b/doc/spatial-search.md @@ -0,0 +1,297 @@ +# Spatial Search + +The spatial extension allows to index datasets with spatial information +so they can be filtered via a spatial search query. This includes both +via the web interface (see the [Spatial Search +Widget](#spatial-search-widget)) or via the [action +API](https://docs.ckan.org/en/latest/api/), e.g.: + + http://localhost:5000/api/3/action/package_search?q=Pollution&ext_bbox=-7.535093,49.208494,3.890688,57.372349 + +The `ext_bbox` parameter must be provided in the form +`ext_bbox={minx},{miny},{maxx},{maxy}` + +## Setup + +To enable the spatial search you need to add the `spatial_query` plugin +to your ini file. This plugin in turn requires the `spatial_metadata` +plugin, eg: + + ckan.plugins = ... spatial_metadata spatial_query + +To define which backend to use for the spatial search use the following +configuration option (see [Choosing a backend for the spatial +search](#choosing-a-backend-for-the-spatial-search)): + + ckanext.spatial.search_backend = solr-bbox + +## Geo-Indexing your datasets + +Regardless of the backend that you are using, in order to make a dataset +searchable by location, it must have a the location information (a +geometry), indexed in Solr. You can provide this information in two +ways. + +### The `spatial` extra field + +The easiest way to get your geometries indexed is to use an extra field +named `spatial`. The value of this extra should be a valid +[GeoJSON](http://geojson.org) geometry, for example: + + { + "type":"Polygon", + "coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]] + } + +or: + + { + "type": "Point", + "coordinates": [-3.145,53.078] + } + +Every time a dataset is created, updated or deleted, the extension will +index the information stored in the `spatial` in Solr, so it can be +reflected on spatial searches. + +If you already have datasets when you enable Spatial Search then you'll +need to [rebuild the search +index](https://docs.ckan.org/en/latest/maintaining/cli.html?#search-index-rebuild-search-index). + +### Custom indexing logic + +You might not want to use the `spatial` extra field. Perhaps you don't +want to store the geometries in the dataset metadata but prefer to do so +in a separate table, or you simply want to perform a different +processing on the geometries before indexing. + +In this case you need to implement the `before_dataset_index()` method +of the +[IPackageController](https://docs.ckan.org/en/latest/extensions/plugin-interfaces.html#ckan.plugins.interfaces.IPackageController.before_dataset_index) +interface: + +```python +def before_dataset_index(self, dataset_dict): + + # When using the default `solr-bbox` backend (based on bounding boxes), you need to + # include the following fields in the returned dataset_dict: + + dataset_dict["minx"] = minx + dataset_dict["maxx"] = maxx + dataset_dict["miny"] = miny + dataset_dict["maxy"] = maxy + + # When using the `solr-spatial-field` backend, you need to include the `spatial_geom` + # field in the returned dataset_dict. This should be a valid geometry in WKT format. + # Shapely can help you get the WKT representation of your gemetry if you have it in GeoJSON: + + shape = shapely.geometry.shape(geometry) + wkt = shape.wkt + + dataset_dict["spatial_geom"] = wkt + + # Don't forget to actually return the dict! + + return dataset_dict +``` + +Some things to keep in mind: + +- Remember, you only need to provide one field, either `spatial_bbox` + or `spatial_geom`, depending on the backend chosen. + +- All indexed geometries should fall within the -180, -90, 180, 90 + bounds. If you have polygons crossing the antimeridian (i.e. with + longituded lower than -180 or bigger than 180) you'll have to split + them across the antimeridian. + +- Check the default implementation of `before_dataset_index()` in + [ckanext/spatial/plugins/\_\_init\_\_.py](https://github.com/ckan/ckanext-spatial/blob/master/ckanext/spatial/plugin/__init__.py) + for extra useful checks and validations. + +- If you want to store the geometry in the `spatial` field but don't + want to apply the default automatic indexing logic applied by + ckanext-spatial just remove the field from the dict (this won't + remove it from the dataset metadata, just from the indexed data): + + ```python + def before_dataset_search(self, dataset_dict): + + dataset_dict.pop("spatial", None) + + return dataset_dict + ``` + +### Choosing a backend for the spatial search + +Ckanext-spatial uses Solr to power the spatial search. The current +implementation is tested on Solr 9, which is the supported version, +although it might work on previous Solr versions. + +!!! Note + The are official [Docker images for + Solr](https://github.com/ckan/ckan-solr) that have all the configuration + needed to perform spatial searches (look for the ones with a `-spatial` + suffix). This is the easiest way to get started but if you need to + customize Solr yourself see below for the modifications needed. + +There are different backends supported for the spatial search, it is +important to understand their differences and the necessary setup +required when choosing which one to use. To configure the search backend +use the following configuration option: + + ckanext.spatial.search_backend = solr-bbox | solr-spatial-field + +The following table summarizes the different spatial search backends: + + | Backend | Supported geometries indexed in Solr | Solr setup needed | + |-----------------------|- ------------------------------- ----|---------------------| + | `solr-bbox` (default) | Bounding Box, Polygon (extents only) | Custom fields | + | `solr-spatial-field` | Bounding Box, Point and Polygon | Custom field + JTS | + +!!! Note + The default `solr-bbox` search backend was previously known as `solr`. + Please update your configuration if using this version as it will be + removed in the future. + +The `solr-bbox` backend is probably a good starting point. Here are more +details about the available options (again, you don't need to modify +Solr if you are using one of the spatially enabled official Docker +images): + +- `solr-bbox`: + This option always indexes just the extent of the provided + geometries, whether if it's an actual bounding box or not. It + supports spatial sorting of the returned results (based on the + closeness of their bounding box to the query bounding box). It + uses standard Solr float fields so you just need to add the + following to your Solr schema: + ```xml + + + + + + + + ``` +- `solr-spatial-field`: + This option uses the [RPT](https://solr.apache.org/guide/8_11/spatial-search.html#rpt) + Solr field, which allows to index points, rectangles and more + complex geometries like polygons. This requires the install of + the [JTS](https://github.com/locationtech/jts) library. See the + linked Solr documentation for details on this. Note that it does + not support spatial sorting of the returned results. You will + need to add the following field type and field to your Solr + schema file to enable it : + ```xml + + + + + + + + + + ``` + By default, the `solr-sptatial-field` backend uses the following + query. This can be customized by setting the + `ckanext.spatial.solr_query` configuration option, but note that + all placeholders must be included: + ``` + {{!field f=spatial_geom}}Intersects(ENVELOPE({minx}, {maxx}, {maxy}, {miny})) + ``` + +!!! Note + The old `postgis` search backend is no longer supported. You should + migrate to one of the other backends instead. + +## Spatial Search Widget + +![image](_static/spatial-search-widget.png) + +The extension provides a snippet to add a map widget to the search form, +which allows filtering results by an area of interest. + +To add the map widget to the sidebar of the search page, add the +following block to the dataset search page template +(`myproj/ckanext/myproj/templates/package/search.html`). If your custom +theme is simply extending the CKAN default theme, you will need to add +`{% ckan_extends %}` to the start of your custom search.html, then +continue with this: + + {% block secondary_content %} + + {% snippet "spatial/snippets/spatial_query.html" %} + + {% endblock %} + +By default the map widget will show the whole world. If you want to set +up a different default extent, you can pass an extra `default_extent` to +the snippet, either with a pair of coordinates like this: + + {% snippet "spatial/snippets/spatial_query.html", default_extent="[[15.62, + -139.21], [64.92, -61.87]]" %} + +or with a GeoJSON object describing a bounding box (note the escaped +quotes): + + {% snippet "spatial/snippets/spatial_query.html", default_extent="{ \"type\": + \"Polygon\", \"coordinates\": [[[74.89, 29.39],[74.89, 38.45], [60.50, + 38.45], [60.50, 29.39], [74.89, 29.39]]]}" %} + +You need to load the `spatial_metadata` and `spatial_query` plugins to +use this snippet. + +## Dataset Extent Map + +![image](_static/dataset-extent-map.png) + +Using the snippets provided, if datasets contain a `spatial` extra like +the one described in the previous section, a map will be shown on the +dataset details page. + +There are snippets already created to load the map on the left sidebar +or in the main body of the dataset details page, but these can be easily +modified to suit your project needs + +To add a map to the sidebar, add the following block to the dataset page +template (eg +`ckanext-myproj/ckanext/myproj/templates/package/read_base.html`). If +your custom theme is simply extending the CKAN default theme, you will +need to add `{% ckan_extends %}` to the start of your custom read.html, +then continue with this: + + {% block secondary_content %} + {{ super() }} + + {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} + {% if dataset_extent %} + {% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %} + {% endif %} + + {% endblock %} + +For adding the map to the main body, add this to the main dataset page +template (eg +`ckanext-myproj/ckanext/myproj/templates/package/read.html`): + + {% block primary_content_inner %} + + {{ super() }} + + {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} + {% if dataset_extent %} + {% snippet "spatial/snippets/dataset_map.html", extent=dataset_extent %} + {% endif %} + + {% endblock %} + +You need to load the `spatial_metadata` plugin to use these snippets. diff --git a/doc/spatial-search.rst b/doc/spatial-search.rst deleted file mode 100644 index a64fec78..00000000 --- a/doc/spatial-search.rst +++ /dev/null @@ -1,264 +0,0 @@ -============== -Spatial Search -============== - -The spatial extension allows to index datasets with spatial information so they -can be filtered via a spatial search query. This includes both via the web -interface (see the `Spatial Search Widget`_) or via the `action API`_, e.g.:: - - http://localhost:5000/api/action/package_search?q=Pollution&ext_bbox=-7.535093,49.208494,3.890688,57.372349 - -The ``ext_bbox`` parameter must be provided in the form ``ext_bbox={minx},{miny},{maxx},{maxy}`` - - -Setup ------ - -To enable the spatial search you need to add the ``spatial_query`` plugin to -your ini file. This plugin in turn requires the ``spatial_metadata`` plugin, eg:: - - ckan.plugins = ... spatial_metadata spatial_query - -To define which backend to use for the spatial search use the following -configuration option (see `Choosing a backend for the spatial search`_):: - - ckanext.spatial.search_backend = solr-bbox - - -Geo-Indexing your datasets --------------------------- - -Regardless of the backend that you are using, in order to make a dataset -searchable by location, it must have a the location information (a geometry), indexed in -Solr. You can provide this information in two ways. - -The ``spatial`` extra field -+++++++++++++++++++++++++++ - -The easiest way to get your geometries indexed is to use an extra field named ``spatial``. -The value of this extra should be a valid GeoJSON_ geometry, for example:: - - { - "type":"Polygon", - "coordinates":[[[2.05827, 49.8625],[2.05827, 55.7447], [-6.41736, 55.7447], [-6.41736, 49.8625], [2.05827, 49.8625]]] - } - -or:: - - { - "type": "Point", - "coordinates": [-3.145,53.078] - } - - -Every time a dataset is created, updated or deleted, the extension will -index the information stored in the ``spatial`` in Solr, so it can be reflected on spatial searches. - -If you already have datasets when you enable Spatial Search then you'll need to -`rebuild the search index `_. - - -Custom indexing logic -+++++++++++++++++++++ - -You might not want to use the ``spatial`` extra field. Perhaps you don't want to store the geometries -in the dataset metadata but prefer to do so in a separate table, or you simply want to perform a different -processing on the geometries before indexing. - -In this case you need to implement the ``before_dataset_index()`` method of the `IPackageController `_ interface:: - - def before_dataset_index(self, dataset_dict): - - # When using the default `solr-bbox` backend (based on bounding boxes), you need to - # include the following fields in the returned dataset_dict: - - dataset_dict["minx"] = minx - dataset_dict["maxx"] = maxx - dataset_dict["miny"] = miny - dataset_dict["maxy"] = maxy - - # When using the `solr-spatial-field` backend, you need to include the `spatial_geom` - # field in the returned dataset_dict. This should be a valid geometry in WKT format. - # Shapely can help you get the WKT representation of your gemetry if you have it in GeoJSON: - - shape = shapely.geometry.shape(geometry) - wkt = shape.wkt - - dataset_dict["spatial_geom"] = wkt - - # Don't forget to actually return the dict! - - return dataset_dict - -Some things to keep in mind: - -* Remember, you only need to provide one field, either ``spatial_bbox`` or ``spatial_geom``, depending on - the backend chosen. -* All indexed geometries should fall within the -180, -90, 180, 90 bounds. If you have polygons crossing the antimeridian (i.e. with longituded lower than -180 or bigger than 180) you'll have to split them across the antimeridian. -* Check the default implementation of ``before_dataset_index()`` in `ckanext/spatial/plugins/__init__.py `_ for extra useful checks and validations. -* If you want to store the geometry in the ``spatial`` field but don't want to apply the default automatic indexing logic applied by ckanext-spatial just remove the field from the dict (this won't remove it from the dataset metadata, just from the indexed data):: - - def before_dataset_search(self, dataset_dict): - - dataset_dict.pop("spatial", None) - - return dataset_dict - -Choosing a backend for the spatial search -+++++++++++++++++++++++++++++++++++++++++ - -Ckanext-spatial uses Solr to power the spatial search. The current implementation is tested on Solr 8, which is the supported version, although it might work on previous Solr versions. - -.. note:: The are official `Docker images for Solr `_ that have all the configuration needed to perform spatial searches (look for the ones with a ``-spatial`` suffix). This is the easiest way to get started but if you need to customize Solr yourself see below for the modifications needed. - -There are different backends supported for the spatial search, it is important -to understand their differences and the necessary setup required when choosing -which one to use. To configure the search backend use the following configuration option:: - - ckanext.spatial.search_backend = solr-bbox | solr-spatial-field - -The following table summarizes the different spatial search backends: - -+-------------------------+--------------------------------------+--------------------+ -| Backend | Supported geometries indexed in Solr | Solr setup needed | -+=========================+======================================+====================+ -| ``solr-bbox`` (default) | Bounding Box, Polygon (extents only) | Custom fields | -+-------------------------+--------------------------------------+--------------------+ -| ``solr-spatial-field`` | Bounding Box, Point and Polygon | Custom field + JTS | -+-------------------------+--------------------------------------+--------------------+ - -.. note:: The default ``solr-bbox`` search backend was previously known as ``solr``. Please update - your configuration if using this version as it will be removed in the future. - - -The ``solr-bbox`` backend is probably a good starting point. Here are more -details about the available options (again, you don't need to modify Solr if you are using one of the spatially enabled official Docker images): - -* ``solr-bbox`` - This option always indexes just the extent of the provided geometries, whether if it's an - actual bounding box or not. It supports spatial sorting of the returned results (based on the closeness of their bounding box to the query bounding box). It uses standard Solr float fields so you just need to add the following to your Solr schema:: - - - - - - - - - -* ``solr-spatial-field`` - This option uses the `RPT `_ Solr field, which allows - to index points, rectangles and more complex geometries like polygons. This requires the install of the `JTS`_ library. See the linked Solr documentation for details on this. Note that it does not support spatial sorting of the returned results. - You will need to add the following field type and field to your Solr - schema file to enable it :: - - - - - - - - - - - - By default, the ``solr-sptatial-field`` backend uses the following query. This can be customized by setting the ``ckanext.spatial.solr_query`` configuration option, but note that all placeholders must be included:: - - "{{!field f=spatial_geom}}Intersects(ENVELOPE({minx}, {maxx}, {maxy}, {miny}))" - -.. note:: The old ``postgis`` search backend is no longer supported. You should migrate to one of the other backends instead. - - - -Spatial Search Widget ---------------------- - - -.. image:: _static/spatial-search-widget.png - -The extension provides a snippet to add a map widget to the search form, which -allows filtering results by an area of interest. - -To add the map widget to the sidebar of the search page, add the following -block to the dataset search page template -(``myproj/ckanext/myproj/templates/package/search.html``). If your custom -theme is simply extending the CKAN default theme, you will need to add ``{% ckan_extends %}`` -to the start of your custom search.html, then continue with this:: - - {% block secondary_content %} - - {% snippet "spatial/snippets/spatial_query.html" %} - - {% endblock %} - -By default the map widget will show the whole world. If you want to set up a -different default extent, you can pass an extra ``default_extent`` to the -snippet, either with a pair of coordinates like this:: - - {% snippet "spatial/snippets/spatial_query.html", default_extent="[[15.62, - -139.21], [64.92, -61.87]]" %} - -or with a GeoJSON object describing a bounding box (note the escaped quotes):: - - {% snippet "spatial/snippets/spatial_query.html", default_extent="{ \"type\": - \"Polygon\", \"coordinates\": [[[74.89, 29.39],[74.89, 38.45], [60.50, - 38.45], [60.50, 29.39], [74.89, 29.39]]]}" %} - -You need to load the ``spatial_metadata`` and ``spatial_query`` plugins to use this -snippet. - - - -Dataset Extent Map ------------------- - -.. image:: _static/dataset-extent-map.png - -Using the snippets provided, if datasets contain a ``spatial`` extra like the -one described in the previous section, a map will be shown on the dataset -details page. - -There are snippets already created to load the map on the left sidebar or in -the main body of the dataset details page, but these can be easily modified to -suit your project needs - -To add a map to the sidebar, add the following block to the dataset page template (eg -``ckanext-myproj/ckanext/myproj/templates/package/read_base.html``). If your custom -theme is simply extending the CKAN default theme, you will need to add ``{% ckan_extends %}`` -to the start of your custom read.html, then continue with this:: - - {% block secondary_content %} - {{ super() }} - - {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} - {% if dataset_extent %} - {% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %} - {% endif %} - - {% endblock %} - -For adding the map to the main body, add this to the main dataset page template (eg -``ckanext-myproj/ckanext/myproj/templates/package/read.html``):: - - {% block primary_content_inner %} - - {{ super() }} - - {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} - {% if dataset_extent %} - {% snippet "spatial/snippets/dataset_map.html", extent=dataset_extent %} - {% endif %} - - {% endblock %} - -You need to load the ``spatial_metadata`` plugin to use these snippets. - -.. _action API: http://docs.ckan.org/en/latest/apiv3.html -.. _JTS: https://github.com/locationtech/jts -.. _GeoJSON: http://geojson.org diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..f2eaf75d --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,75 @@ +site_name: ckanext-spatial +#site_url: https://docs.ckan.org/projects/ckanext-spatial +site_url: http://localhost:8000 + +site_description: >- + The documentation of ckanext-spatial, a CKAN extension that + improves CKAN geospatial capabilities. + + +repo_name: ckan/ckanext-spatial +repo_url: https://github.com/ckan/ckanext-spatial + +docs_dir: doc + +theme: + name: material + palette: + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: ckan + toggle: + icon: material/toggle-switch + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + toggle: + icon: material/toggle-switch-off + name: Switch to light mode + + logo: _assets/logo.png + favicon: _assets/ckan.ico + features: + - navigation.expand + - navigation.footer + - navigation.instant + - navigation.instant.progress + - search.suggest + - search.highlight + - toc.integrate + - content.code.copy + +plugins: + - search + +markdown_extensions: + - toc: + permalink: true + - admonition + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - pymdownx.snippets + + +extra_css: + - _css/extra.css + +nav: + - Home: 'index.md' + - Installation and setup: 'install.md' + - Spatial search: 'spatial-search.md' + - Spatial harvesters: 'harvesters.md' + - Other features: + - CSW: 'csw.md' + - Previews of spatial formats: 'previews.md' + - Common base layer for map widgets: 'map-widgets.md' + - CHANGELOG: 'changelog.md'