From 3b17e2a8d157828fe1ed382084e3c060bf5dad51 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 11 Oct 2024 13:16:08 +0300 Subject: [PATCH 01/12] Add PropertyString, cleanHtml helper and escapeHtmlExt helper. --- composer.json | 1 + composer.lock | 63 +++++- .../src/VuFind/Content/Summaries/Demo.php | 4 + .../src/VuFind/RecordDriver/DefaultRecord.php | 23 ++- .../src/VuFind/String/PropertyString.php | 187 ++++++++++++++++++ .../VuFind/String/PropertyStringInterface.php | 92 +++++++++ .../src/VuFind/View/Helper/Root/CleanHtml.php | 79 ++++++++ .../View/Helper/Root/CleanHtmlFactory.php | 147 ++++++++++++++ .../View/Helper/Root/RecordDataFormatter.php | 4 + .../DefaultRecord/data-summary.phtml | 16 +- .../DefaultRecord/data-summary.phtml | 16 +- themes/root/theme.config.php | 5 + 12 files changed, 623 insertions(+), 14 deletions(-) create mode 100644 module/VuFind/src/VuFind/String/PropertyString.php create mode 100644 module/VuFind/src/VuFind/String/PropertyStringInterface.php create mode 100644 module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php create mode 100644 module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php diff --git a/composer.json b/composer.json index f399ac8a8e0..171f00c8877 100644 --- a/composer.json +++ b/composer.json @@ -34,6 +34,7 @@ "composer/package-versions-deprecated": "1.11.99.5", "composer/semver": "3.4.2", "endroid/qr-code": "5.0.9", + "ezyang/htmlpurifier": "4.17.0", "guzzlehttp/guzzle": "7.9.2", "jaybizzle/crawler-detect": "^1.2", "laminas/laminas-cache": "3.12.2", diff --git a/composer.lock b/composer.lock index f227e348fb1..da4733f6d52 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "a3b3369d5130dc83c7a9a6616ebbce7a", + "content-hash": "9868f802ae7cbc11e33ea21415c8798a", "packages": [ { "name": "ahand/mobileesp", @@ -1155,6 +1155,67 @@ ], "time": "2024-05-08T08:09:28+00:00" }, + { + "name": "ezyang/htmlpurifier", + "version": "v4.17.0", + "source": { + "type": "git", + "url": "https://github.com/ezyang/htmlpurifier.git", + "reference": "bbc513d79acf6691fa9cf10f192c90dd2957f18c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/bbc513d79acf6691fa9cf10f192c90dd2957f18c", + "reference": "bbc513d79acf6691fa9cf10f192c90dd2957f18c", + "shasum": "" + }, + "require": { + "php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0" + }, + "require-dev": { + "cerdic/css-tidy": "^1.7 || ^2.0", + "simpletest/simpletest": "dev-master" + }, + "suggest": { + "cerdic/css-tidy": "If you want to use the filter 'Filter.ExtractStyleBlocks'.", + "ext-bcmath": "Used for unit conversion and imagecrash protection", + "ext-iconv": "Converts text to and from non-UTF-8 encodings", + "ext-tidy": "Used for pretty-printing HTML" + }, + "type": "library", + "autoload": { + "files": [ + "library/HTMLPurifier.composer.php" + ], + "psr-0": { + "HTMLPurifier": "library/" + }, + "exclude-from-classmap": [ + "/library/HTMLPurifier/Language/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "LGPL-2.1-or-later" + ], + "authors": [ + { + "name": "Edward Z. Yang", + "email": "admin@htmlpurifier.org", + "homepage": "http://ezyang.com" + } + ], + "description": "Standards compliant HTML filter written in PHP", + "homepage": "http://htmlpurifier.org/", + "keywords": [ + "html" + ], + "support": { + "issues": "https://github.com/ezyang/htmlpurifier/issues", + "source": "https://github.com/ezyang/htmlpurifier/tree/v4.17.0" + }, + "time": "2023-11-17T15:01:25+00:00" + }, { "name": "filp/whoops", "version": "2.15.4", diff --git a/module/VuFind/src/VuFind/Content/Summaries/Demo.php b/module/VuFind/src/VuFind/Content/Summaries/Demo.php index 2e7d91deacc..c3a22f191c9 100644 --- a/module/VuFind/src/VuFind/Content/Summaries/Demo.php +++ b/module/VuFind/src/VuFind/Content/Summaries/Demo.php @@ -29,6 +29,8 @@ namespace VuFind\Content\Summaries; +use VuFind\String\PropertyString; + /** * Demo (fake data) summaries content loader. * @@ -56,6 +58,8 @@ public function loadByIsbn($key, \VuFindCode\ISBN $isbnObj) return [ 'Demo summary key: ' . $key, 'Demo summary ISBN: ' . $isbnObj->get13(), + (new PropertyString('Demo non-HTML summary')) + ->setHtml('Demo HTML Summary:'), ]; } } diff --git a/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php b/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php index 6a3ddb08a76..5b72982acfa 100644 --- a/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php +++ b/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php @@ -1271,9 +1271,9 @@ public function getSystemDetails() public function getSummary() { // We need to return an array, so if we have a description, turn it into an - // array (it should be a flat string according to the default schema, but we - // might as well support the array case just to be on the safe side: - return (array)($this->fields['description'] ?? []); + // array (it is a flat string in the default Solr schema, but we also + // support multivalued fields for other backends): + return $this->getFieldAsArray('description'); } /** @@ -1816,4 +1816,21 @@ public function getCoordinateLabels() { return (array)($this->fields['long_lat_label'] ?? []); } + + /** + * Get a field as an array + * + * @param string $field Field + * + * @return array + */ + protected function getFieldAsArray(string $field): array + { + // Make sure to return only non-empty values and avoid casting since description can be a PropertyString too: + $value = $this->fields['description'] ?? ''; + if ('' === $value) { + return []; + } + return is_array($value) ? $value : [$value]; + } } diff --git a/module/VuFind/src/VuFind/String/PropertyString.php b/module/VuFind/src/VuFind/String/PropertyString.php new file mode 100644 index 00000000000..3cddb283301 --- /dev/null +++ b/module/VuFind/src/VuFind/String/PropertyString.php @@ -0,0 +1,187 @@ + + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org Main Site + */ + +namespace VuFind\String; + +use function array_key_exists; + +/** + * Class for a string with additional properties. + * + * @category VuFind + * @package String + * @author Ere Maijala + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org Main Site + */ +class PropertyString implements PropertyStringInterface +{ + /** + * Constructor + * + * @param string $string String value + * @param array $properties Associative array of any additional properties. Use a custom prefix for locally + * defined properties. Double underscore is a reserved prefix, and currently the following keys are defined: + * __ids Identifiers (e.g. subject URIs) + * __html HTML presentation + */ + public function __construct(protected string $string, protected array $properties = []) + { + } + + /** + * Set string value + * + * @param string $str String value + * + * @return static + */ + public function setString(string $str): static + { + $this->string = $str; + return $this; + } + + /** + * Get string value + * + * @return string + */ + public function getString(): string + { + return $this->string; + } + + /** + * Set HTML string + * + * @param string $html HTML + * + * @return static + */ + public function setHtml(string $html): static + { + $this['__html'] = $html; + return $this; + } + + /** + * Get HTML string + * + * Note: This could contain anything and must be sanitized for display + * + * @return ?string + */ + public function getHtml(): ?string + { + return $this['__html']; + } + + /** + * Set identifiers + * + * @param array $ids Identifiers + * + * @return static + */ + public function setIds(array $ids): static + { + $this['__ids'] = $ids; + return $this; + } + + /** + * Get identifiers + * + * @return ?array + */ + public function getIds(): ?array + { + return $this['__ids']; + } + + /** + * Check if offset exists + * + * @param mixed $offset Offset + * + * @return bool + */ + public function offsetExists(mixed $offset): bool + { + return array_key_exists($offset, $this->properties); + } + + /** + * Return value of offset + * + * @param mixed $offset Offset + * + * @return mixed + */ + public function offsetGet(mixed $offset): mixed + { + return $this->properties[$offset] ?? null; + } + + /** + * Set value of offset + * + * @param mixed $offset Offset + * @param mixed $value Value + * + * @return void + */ + public function offsetSet(mixed $offset, mixed $value): void + { + $this->properties[$offset] = $value; + } + + /** + * Unset value of offset + * + * @param mixed $offset Offset + * + * @return void + */ + public function offsetUnset(mixed $offset): void + { + unset($this->properties[$offset]); + } + + /** + * Return string value + * + * @return string + */ + public function __toString(): string + { + return $this->string; + } +} diff --git a/module/VuFind/src/VuFind/String/PropertyStringInterface.php b/module/VuFind/src/VuFind/String/PropertyStringInterface.php new file mode 100644 index 00000000000..81f6673a645 --- /dev/null +++ b/module/VuFind/src/VuFind/String/PropertyStringInterface.php @@ -0,0 +1,92 @@ + + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org Main Site + */ + +namespace VuFind\String; + +/** + * Interface for a string with additional properties. + * + * @category VuFind + * @package String + * @author Ere Maijala + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org Main Site + */ +interface PropertyStringInterface extends \ArrayAccess, \Stringable +{ + /** + * Set string value + * + * @param string $str String value + * + * @return static + */ + public function setString(string $str): static; + + /** + * Get string value + * + * @return string + */ + public function getString(): string; + + /** + * Set HTML string + * + * @param string $html HTML + * + * @return static + */ + public function setHtml(string $html): static; + + /** + * Get HTML string + * + * Note: This could contain anything and must be sanitized for display + * + * @return ?string + */ + public function getHtml(): ?string; + + /** + * Set identifiers + * + * @param array $ids Identifiers + * + * @return static + */ + public function setIds(array $ids): static; + + /** + * Get identifiers + * + * @return ?array + */ + public function getIds(): ?array; +} diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php new file mode 100644 index 00000000000..c728fcf731e --- /dev/null +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php @@ -0,0 +1,79 @@ + + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org Main Site + */ + +namespace VuFind\View\Helper\Root; + +use Closure; + +/** + * HTML Cleaner view helper + * + * @category VuFind + * @package View_Helpers + * @author Ere Maijala + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org Main Site + */ +class CleanHtml extends \Laminas\View\Helper\AbstractHelper +{ + /** + * Purifier + * + * @var \HTMLPurifier + */ + protected $purifier = null; + + /** + * Constructor + * + * @param Closure $purifierFactory Purifier factory callback + */ + public function __construct(protected Closure $purifierFactory) + { + } + + /** + * Clean up HTML + * + * @param string $html HTML + * @param boolean $targetBlank Whether to add target=_blank to outgoing links + * + * @return string + */ + public function __invoke($html, $targetBlank = false): string + { + if (!str_contains($html, '<')) { + return $html; + } + if (null === ($this->purifier[$targetBlank] ?? null)) { + $this->purifier[$targetBlank] = ($this->purifierFactory)(compact('targetBlank')); + } + return $this->purifier[$targetBlank]->purify($html); + } +} diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php new file mode 100644 index 00000000000..561a5f0f91c --- /dev/null +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php @@ -0,0 +1,147 @@ + + * @author Aleksi Peebles + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ + +namespace VuFind\View\Helper\Root; + +use Closure; +use HTMLPurifier; +use HTMLPurifier_Config; +use Laminas\ServiceManager\Exception\ServiceNotCreatedException; +use Laminas\ServiceManager\Exception\ServiceNotFoundException; +use Laminas\ServiceManager\Factory\FactoryInterface; +use Psr\Container\ContainerExceptionInterface as ContainerException; +use Psr\Container\ContainerInterface; + +/** + * CleanHtml helper factory. + * + * @category VuFind + * @package View_Helpers + * @author Ere Maijala + * @author Aleksi Peebles + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ +class CleanHtmlFactory implements FactoryInterface +{ + /** + * Service manager + * + * @var ContainerInterface + */ + protected ContainerInterface $container; + + /** + * Create an object + * + * @param ContainerInterface $container Service manager + * @param string $requestedName Service being created + * @param null|array $options Extra options (optional) + * + * @return object + * + * @throws ServiceNotFoundException if unable to resolve the service. + * @throws ServiceNotCreatedException if an exception is raised when + * creating a service. + * @throws ContainerException if any other error occurs + */ + public function __invoke( + ContainerInterface $container, + $requestedName, + array $options = null + ) { + if (!empty($options)) { + throw new \Exception('Unexpected options sent to factory.'); + } + + $this->container = $container; + return new $requestedName(Closure::fromCallable([$this, 'createPurifier'])); + } + + /** + * Create a purifier instance. + * + * N.B. This is a relatively slow method. + * + * @param array $options Additional options. Currently supported: + * targetBlank true/false Whether to add target="_blank" to external links + * + * @return HTMLPurifier + */ + protected function createPurifier(array $options): HTMLPurifier + { + $config = \HTMLPurifier_Config::createDefault(); + // Set cache path to the object cache + $cacheDir + = $this->container->get(\VuFind\Cache\Manager::class)->getCache('object')->getOptions()->getCacheDir(); + if ($cacheDir) { + $config->set('Cache.SerializerPath', $cacheDir); + } + if ($options['targetBlank'] ?? false) { + $config->set('HTML.Nofollow', 1); + $config->set('HTML.TargetBlank', 1); + } + + // Setting the following option makes purifier’s DOMLex pass the + // LIBXML_PARSEHUGE option to DOMDocument::loadHtml method. This in turn + // ensures that PHP calls htmlCtxtUseOptions (see + // github.com/php/php-src/blob/PHP-8.1.14/ext/dom/document.c#L1870), + // which ensures that the libxml2 options (namely keepBlanks) are set up + // properly, and whitespace nodes are preserved. This should not be an + // issue from libxml2 version 2.9.5, but during testing the issue was + // still intermittently present. Regardless of that, CentOS 7.x have an + // older libxml2 that exhibits the issue. + $config->set('Core.AllowParseManyTags', true); + + $this->setAdditionalConfiguration($config); + return new \HTMLPurifier($config); + } + + /** + * Sets additional configuration + * + * @param HTMLPurifier_Config $config Configuration + * + * @return void + */ + protected function setAdditionalConfiguration(HTMLPurifier_Config $config) + { + // Add support for deprecated tags: + $definition = $config->getHTMLDefinition(true); + $definition->addElement( + 'details', + 'Block', + 'Flow', + 'Common', + ['open' => new \HTMLPurifier_AttrDef_HTML_Bool(true)] + ); + $definition->addElement('summary', 'Block', 'Flow', 'Common'); + } +} diff --git a/module/VuFind/src/VuFind/View/Helper/Root/RecordDataFormatter.php b/module/VuFind/src/VuFind/View/Helper/Root/RecordDataFormatter.php index 37405783a3a..9889e90a15c 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/RecordDataFormatter.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/RecordDataFormatter.php @@ -33,6 +33,7 @@ use Laminas\View\Helper\AbstractHelper; use VuFind\RecordDriver\AbstractBase as RecordDriver; +use VuFind\String\PropertyStringInterface; use function call_user_func; use function count; @@ -125,6 +126,9 @@ protected function sortCallback($a, $b) */ protected function allowValue($value, $options, $ignoreCombineAlt = false) { + if ($value instanceof PropertyStringInterface) { + $value = (string)$value; + } if (!empty($value) || ($ignoreCombineAlt && ($options['renderType'] ?? 'Simple') == 'CombineAlt')) { return true; } diff --git a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml index bd4333cb4e5..206db205499 100644 --- a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -1,10 +1,16 @@ driver->getSummary() as $summary): ?> - escapeHtml($summary) ?>
+ escapeHtmlExt($summary, allowHtml: true) ?>
driver->getCleanISBN(); ?> summaries($isbn) as $provider => $content): ?> - - '); ?> - escapeHtml($summary) . '
')?> - + ', + array_map( + function ($summary) { + $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); + return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + }, + $content + ) +)?> diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml index bd4333cb4e5..206db205499 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -1,10 +1,16 @@ driver->getSummary() as $summary): ?> - escapeHtml($summary) ?>
+ escapeHtmlExt($summary, allowHtml: true) ?>
driver->getCleanISBN(); ?> summaries($isbn) as $provider => $content): ?> - - '); ?> - escapeHtml($summary) . '
')?> - + ', + array_map( + function ($summary) { + $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); + return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + }, + $content + ) +)?> diff --git a/themes/root/theme.config.php b/themes/root/theme.config.php index 1858b5fbe5c..0e32664efbe 100644 --- a/themes/root/theme.config.php +++ b/themes/root/theme.config.php @@ -17,6 +17,7 @@ 'VuFind\View\Helper\Root\Captcha' => 'VuFind\View\Helper\Root\CaptchaFactory', 'VuFind\View\Helper\Root\Cart' => 'VuFind\View\Helper\Root\CartFactory', 'VuFind\View\Helper\Root\Citation' => 'VuFind\View\Helper\Root\CitationFactory', + 'VuFind\View\Helper\Root\CleanHtml' => 'VuFind\View\Helper\Root\CleanHtmlFactory', 'VuFind\View\Helper\Root\Component' => 'Laminas\ServiceManager\Factory\InvokableFactory', 'VuFind\View\Helper\Root\Config' => 'VuFind\View\Helper\Root\ConfigFactory', 'VuFind\View\Helper\Root\Content' => 'VuFind\View\Helper\Root\ContentFactory', @@ -30,6 +31,7 @@ 'VuFind\View\Helper\Root\DateTime' => 'VuFind\View\Helper\Root\DateTimeFactory', 'VuFind\View\Helper\Root\DisplayLanguageOption' => 'VuFind\View\Helper\Root\DisplayLanguageOptionFactory', 'VuFind\View\Helper\Root\Doi' => 'VuFind\View\Helper\Root\DoiFactory', + 'VuFind\View\Helper\Root\EscapeHtmlExt' => 'VuFind\View\Helper\Root\EscapeHtmlExtFactory', 'VuFind\View\Helper\Root\ExplainElement' => 'Laminas\ServiceManager\Factory\InvokableFactory', 'VuFind\View\Helper\Root\Export' => 'VuFind\View\Helper\Root\ExportFactory', 'VuFind\View\Helper\Root\Feedback' => 'VuFind\View\Helper\Root\FeedbackFactory', @@ -115,6 +117,7 @@ 'captcha' => 'VuFind\View\Helper\Root\Captcha', 'cart' => 'VuFind\View\Helper\Root\Cart', 'citation' => 'VuFind\View\Helper\Root\Citation', + 'cleanHtml' => 'VuFind\View\Helper\Root\CleanHtml', 'component' => 'VuFind\View\Helper\Root\Component', 'config' => 'VuFind\View\Helper\Root\Config', 'content' => 'VuFind\View\Helper\Root\Content', @@ -128,6 +131,7 @@ 'dateTime' => 'VuFind\View\Helper\Root\DateTime', 'displayLanguageOption' => 'VuFind\View\Helper\Root\DisplayLanguageOption', 'doi' => 'VuFind\View\Helper\Root\Doi', + 'escapeHtmlExt' => 'VuFind\View\Helper\Root\EscapeHtmlExt', 'explainElement' => 'VuFind\View\Helper\Root\ExplainElement', 'export' => 'VuFind\View\Helper\Root\Export', 'feedback' => 'VuFind\View\Helper\Root\Feedback', @@ -196,6 +200,7 @@ 'truncate' => 'VuFind\View\Helper\Root\Truncate', 'userlist' => 'VuFind\View\Helper\Root\UserList', 'usertags' => 'VuFind\View\Helper\Root\UserTags', + 'Laminas\View\Helper\Url' => 'VuFind\View\Helper\Root\Url', ], ], From be7162995977285dcb6ec40ade6ba0107609306b Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 11 Oct 2024 15:30:57 +0300 Subject: [PATCH 02/12] Add EscapeHtmlExt. --- .../VuFind/View/Helper/Root/EscapeHtmlExt.php | 92 +++++++++++++++++++ .../View/Helper/Root/EscapeHtmlExtFactory.php | 76 +++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php create mode 100644 module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php new file mode 100644 index 00000000000..01fbb39d528 --- /dev/null +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php @@ -0,0 +1,92 @@ + + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ + +namespace VuFind\View\Helper\Root; + +use Laminas\Escaper\Escaper; +use VuFind\String\PropertyStringInterface; + +/** + * Extended Escape HTML view helper + * + * @category VuFind + * @package View_Helpers + * @author Ere Maijala + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ +class EscapeHtmlExt extends \Laminas\View\Helper\Escaper\AbstractHelper +{ + /** + * Constructor + * + * @param Escaper $escaper Escaper + * @param CleanHtml $cleanHtml Clean HTML helper + */ + public function __construct(Escaper $escaper, protected CleanHtml $cleanHtml) + { + $this->escaper = $escaper; + } + + /** + * Invoke this helper: escape a value + * + * @param mixed $value Value to escape + * @param int $recurse Expects one of the recursion constants; + * used to decide whether or not to recurse the given value when escaping + * @param bool $allowHtml Whether to allow sanitized HTML if passed a PropertyString + * + * @return mixed Given a scalar, a scalar value is returned. Given an object, with the $recurse flag not + * allowing object recursion, returns a string. Otherwise, returns an array. + * + * @throws Exception\InvalidArgumentException + */ + public function __invoke($value, $recurse = self::RECURSE_NONE, bool $allowHtml = false) + { + if ($value instanceof PropertyStringInterface) { + if ($allowHtml && $html = $value->getHtml()) { + return ($this->cleanHtml)($html); + } + $value = (string)$value; + } + return $this->escape($value); + } + + /** + * Escape a string + * + * @param string $value String to escape + * + * @return string + */ + protected function escape($value) + { + return $this->escaper->escapeHtml($value); + } +} diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php new file mode 100644 index 00000000000..aecb8c94634 --- /dev/null +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php @@ -0,0 +1,76 @@ + + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ + +namespace VuFind\View\Helper\Root; + +use Laminas\Escaper\Escaper; +use Laminas\ServiceManager\Exception\ServiceNotCreatedException; +use Laminas\ServiceManager\Exception\ServiceNotFoundException; +use Laminas\ServiceManager\Factory\FactoryInterface; +use Psr\Container\ContainerExceptionInterface as ContainerException; +use Psr\Container\ContainerInterface; + +/** + * Extended Escape HTML helper factory. + * + * @category VuFind + * @package View_Helpers + * @author Ere Maijala + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ +class EscapeHtmlExtFactory implements FactoryInterface +{ + /** + * Create an object + * + * @param ContainerInterface $container Service manager + * @param string $requestedName Service being created + * @param null|array $options Extra options (optional) + * + * @return object + * + * @throws ServiceNotFoundException if unable to resolve the service. + * @throws ServiceNotCreatedException if an exception is raised when + * creating a service. + * @throws ContainerException&\Throwable if any other error occurs + */ + public function __invoke( + ContainerInterface $container, + $requestedName, + array $options = null + ) { + if (!empty($options)) { + throw new \Exception('Unexpected options sent to factory.'); + } + + $helpers = $container->get('ViewHelperManager'); + return new $requestedName(new Escaper(), $helpers->get('cleanHtml')); + } +} From d5c0ca5cd4c236d69f460bf20ae6a5ee9cfc120e Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 11 Oct 2024 20:20:50 +0300 Subject: [PATCH 03/12] Fixes. --- module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php | 4 +++- module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php b/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php index 5b72982acfa..de6e7f77e12 100644 --- a/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php +++ b/module/VuFind/src/VuFind/RecordDriver/DefaultRecord.php @@ -1826,11 +1826,13 @@ public function getCoordinateLabels() */ protected function getFieldAsArray(string $field): array { - // Make sure to return only non-empty values and avoid casting since description can be a PropertyString too: + // Make sure to return only non-empty values: $value = $this->fields['description'] ?? ''; if ('' === $value) { return []; } + // Avoid casting since description can be a PropertyString too (and casting would return an array of object + // properties): return is_array($value) ? $value : [$value]; } } diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php index 01fbb39d528..7720ba37db6 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php @@ -51,7 +51,7 @@ class EscapeHtmlExt extends \Laminas\View\Helper\Escaper\AbstractHelper */ public function __construct(Escaper $escaper, protected CleanHtml $cleanHtml) { - $this->escaper = $escaper; + parent::__construct($escaper); } /** From c1ff4e9be466972ea83067f37712b9f86e32fb77 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Tue, 15 Oct 2024 11:07:44 +0300 Subject: [PATCH 04/12] Add a static factory method. --- module/VuFind/src/VuFind/String/PropertyString.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/module/VuFind/src/VuFind/String/PropertyString.php b/module/VuFind/src/VuFind/String/PropertyString.php index 3cddb283301..0497072500e 100644 --- a/module/VuFind/src/VuFind/String/PropertyString.php +++ b/module/VuFind/src/VuFind/String/PropertyString.php @@ -55,6 +55,19 @@ public function __construct(protected string $string, protected array $propertie { } + /** + * Create a PropertyString from an HTML string + * + * @param string $html HTML + * @param array $properties Any additional properties (see __construct) + * + * @return static + */ + public static function fromHtml(string $html, array $properties = []): static + { + return (new static(strip_tags($html), $properties))->setHtml($html); + } + /** * Set string value * From da9165f30ea20560e24c5f0910ce6fca026bb3c9 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Tue, 15 Oct 2024 11:13:17 +0300 Subject: [PATCH 05/12] Don't try to be too clever. --- module/VuFind/src/VuFind/String/PropertyString.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/VuFind/src/VuFind/String/PropertyString.php b/module/VuFind/src/VuFind/String/PropertyString.php index 0497072500e..c2da2519bb2 100644 --- a/module/VuFind/src/VuFind/String/PropertyString.php +++ b/module/VuFind/src/VuFind/String/PropertyString.php @@ -61,11 +61,11 @@ public function __construct(protected string $string, protected array $propertie * @param string $html HTML * @param array $properties Any additional properties (see __construct) * - * @return static + * @return PropertyString */ - public static function fromHtml(string $html, array $properties = []): static + public static function fromHtml(string $html, array $properties = []): PropertyString { - return (new static(strip_tags($html), $properties))->setHtml($html); + return (new PropertyString(strip_tags($html), $properties))->setHtml($html); } /** From 61ef47ee5654da32ffaabf5676d875e00529c5fe Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Tue, 15 Oct 2024 11:13:26 +0300 Subject: [PATCH 06/12] Fix comment. --- module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php index 561a5f0f91c..7f5ee309c46 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php @@ -133,7 +133,7 @@ protected function createPurifier(array $options): HTMLPurifier */ protected function setAdditionalConfiguration(HTMLPurifier_Config $config) { - // Add support for deprecated tags: + // Add support for details and summary elements: $definition = $config->getHTMLDefinition(true); $definition->addElement( 'details', From 9bcfe47c62ba4bdb4ca788808254dd5716979dca Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Tue, 15 Oct 2024 12:03:49 +0300 Subject: [PATCH 07/12] Fix indentation. Co-authored-by: Demian Katz --- .../templates/RecordDriver/DefaultRecord/data-summary.phtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml index 206db205499..92ea3ddf7e9 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -7,8 +7,8 @@ '
', array_map( function ($summary) { - $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); + return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); }, $content ) From 1461a008cfda9698694ab76eb5c819fddcf88f70 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Tue, 15 Oct 2024 12:11:45 +0300 Subject: [PATCH 08/12] Fix indentation more. --- .../DefaultRecord/data-summary.phtml | 20 ++++++++++--------- .../DefaultRecord/data-summary.phtml | 20 ++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml index 206db205499..a5d8e73fb4e 100644 --- a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -3,14 +3,16 @@ driver->getCleanISBN(); ?> summaries($isbn) as $provider => $content): ?> - ', - array_map( - function ($summary) { - $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); - }, - $content + ', + array_map( + function ($summary) { + $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); + return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + }, + $content + ) ) -)?> + ?> diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml index 92ea3ddf7e9..a5d8e73fb4e 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -3,14 +3,16 @@ driver->getCleanISBN(); ?> summaries($isbn) as $provider => $content): ?> - ', - array_map( - function ($summary) { - $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); - }, - $content + ', + array_map( + function ($summary) { + $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); + return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + }, + $content + ) ) -)?> + ?> From efa191a02d749dcabc4213202c765d7b63eddf4f Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 8 Nov 2024 11:20:35 +0200 Subject: [PATCH 09/12] Rename helper, fix truncate, add configuration for HTML fields. --- config/vufind/config.ini | 7 ++++ .../View/Helper/Root/CleanHtmlFactory.php | 3 +- ...scapeHtmlExt.php => EscapeOrCleanHtml.php} | 40 +++++++++++-------- ...ctory.php => EscapeOrCleanHtmlFactory.php} | 11 ++--- .../src/VuFind/View/Helper/Root/Truncate.php | 18 ++++----- .../RecordDriver/DefaultRecord/core.phtml | 26 ++++++------ .../DefaultRecord/data-summary.phtml | 4 +- .../RecordDriver/DefaultRecord/core.phtml | 26 ++++++------ .../DefaultRecord/data-summary.phtml | 4 +- themes/root/theme.config.php | 4 +- 10 files changed, 80 insertions(+), 63 deletions(-) rename module/VuFind/src/VuFind/View/Helper/Root/{EscapeHtmlExt.php => EscapeOrCleanHtml.php} (62%) rename module/VuFind/src/VuFind/View/Helper/Root/{EscapeHtmlExtFactory.php => EscapeOrCleanHtmlFactory.php} (88%) diff --git a/config/vufind/config.ini b/config/vufind/config.ini index 0d2d33fa780..7ed4ec87e5b 100644 --- a/config/vufind/config.ini +++ b/config/vufind/config.ini @@ -2610,3 +2610,10 @@ description = "The REST API provides access to search functions and records cont ; By default, VuFind sorts text in a locale-agnostic way; if this setting is ; turned on, the current user-selected locale will impact sort order. ;use_locale_sorting = true + +; By default HTML is not allowed in record fields, but support for sanitized HTML +; can be enabled per field type. +[HTML_Fields] +;title = false +;title-alt = false +;summary = false diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php index 7f5ee309c46..22acf8ef1e6 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php @@ -116,8 +116,7 @@ protected function createPurifier(array $options): HTMLPurifier // which ensures that the libxml2 options (namely keepBlanks) are set up // properly, and whitespace nodes are preserved. This should not be an // issue from libxml2 version 2.9.5, but during testing the issue was - // still intermittently present. Regardless of that, CentOS 7.x have an - // older libxml2 that exhibits the issue. + // still intermittently present, and this setting should remain in place. $config->set('Core.AllowParseManyTags', true); $this->setAdditionalConfiguration($config); diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php similarity index 62% rename from module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php rename to module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php index 7720ba37db6..adcf5936af3 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExt.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php @@ -1,11 +1,11 @@ htmlFields = (array)($config['HTML_Fields'] ?? []); } /** * Invoke this helper: escape a value * - * @param mixed $value Value to escape - * @param int $recurse Expects one of the recursion constants; - * used to decide whether or not to recurse the given value when escaping - * @param bool $allowHtml Whether to allow sanitized HTML if passed a PropertyString - * - * @return mixed Given a scalar, a scalar value is returned. Given an object, with the $recurse flag not - * allowing object recursion, returns a string. Otherwise, returns an array. + * @param ?string $value Value to escape + * @param ?string $fieldType Field type (for fields that allow sanitized HTML) + * @param ?bool $allowHtml Whether to allow sanitized HTML if passed a PropertyString * - * @throws Exception\InvalidArgumentException + * @return mixed Given a string, returns an escaped string, otherwise returns self */ - public function __invoke($value, $recurse = self::RECURSE_NONE, bool $allowHtml = false) + public function __invoke($value = null, ?string $fieldType = null, ?bool $allowHtml = null) { + if (null === $value) { + return $this; + } if ($value instanceof PropertyStringInterface) { - if ($allowHtml && $html = $value->getHtml()) { + if (($allowHtml ?? ($fieldType && ($this->htmlFields[$fieldType] ?? false))) && $html = $value->getHtml()) { return ($this->cleanHtml)($html); } $value = (string)$value; diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php similarity index 88% rename from module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php rename to module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php index aecb8c94634..f0db9891adc 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/EscapeHtmlExtFactory.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php @@ -1,11 +1,11 @@ get('ViewHelperManager'); - return new $requestedName(new Escaper(), $helpers->get('cleanHtml')); + $config = $container->get(\VuFind\Config\PluginManager::class)->get('config'); + return new $requestedName(new Escaper(), $helpers->get('cleanHtml'), $config->toArray()); } } diff --git a/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php b/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php index 4bf37e258a5..4751507fd57 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php @@ -30,6 +30,8 @@ namespace VuFind\View\Helper\Root; use Laminas\View\Helper\AbstractHelper; +use VuFind\String\PropertyString; +use VuFind\String\PropertyStringInterface; use function function_exists; use function strlen; @@ -48,23 +50,19 @@ class Truncate extends AbstractHelper /** * Truncate a string * - * @param string $str the string to be truncated - * @param string $len how long the truncated string will be - * @param string $append what to add to the end of the string to + * @param string|PropertyStringInterface $str the string to be truncated + * @param string $len how long the truncated string will be + * @param string $append what to add to the end of the string to * indicate it's been truncated * - * @return string + * @return string|PropertyStringInterface */ public function __invoke($str, $len, $append = '...') { if ($len == 0) { return ''; - } elseif (strlen($str) > $len) { - if (function_exists('mb_substr')) { - return trim(mb_substr($str, 0, $len, 'UTF-8')) . $append; - } else { - return trim(substr($str, 0, $len)) . $append; - } + } elseif (mb_strlen((string)$str, 'UTF-8') > $len) { + return trim(mb_substr((string)$str, 0, $len, 'UTF-8')) . $append; } return $str; } diff --git a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml index d9c5409ee40..031f4ccbab3 100644 --- a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml +++ b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml @@ -3,13 +3,14 @@ ?>
schemaOrg()->getAttributes(['vocab' => 'http://schema.org/', 'resource' => '#record', 'typeof' => $this->schemaOrg()->getRecordTypes($this->driver)])?>> record($this->driver)->getQRCode('core'); - $largeImage = $this->record($this->driver)->getThumbnail('large'); + $recordHelper = $this->record($this->driver); + $QRCode = $recordHelper->getQRCode('core'); + $largeImage = $recordHelper->getThumbnail('large'); $linkAttributes = $largeImage ? ['href' => $largeImage, 'data-lightbox-image' => 'true'] : []; - $coverDetails = $this->record($this->driver)->getCoverDetails('core', 'medium', $linkAttributes); + $coverDetails = $recordHelper->getCoverDetails('core', 'medium', $linkAttributes); $cover = $coverDetails['html']; $preview = ($this->previewOverride ?? false) - ? $this->previewOverride : $this->record($this->driver)->getPreviews(); + ? $this->previewOverride : $recordHelper->getPreviews(); $rating = $this->driver->isRatingAllowed(); ?> @@ -28,7 +29,7 @@ - record($this->driver)->renderTemplate('rating.phtml')?> + renderTemplate('rating.phtml')?>
- schemaOrg()->getAttributes(['property' => 'name'])?>>escapeHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection())?> + schemaOrg()->getAttributes(['property' => 'name'])?>>escapeOrCleanHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection(), 'title')?> driver->getExtraDetail('cached_record') && !$this->translationEmpty('cached_record_warning')): ?>
@@ -54,7 +55,7 @@ driver->tryMethod('getFullTitlesAltScript', [], []) as $altTitle): ?>
- escapeHtml($altTitle)?> + escapeOrCleanHtml($altTitle, 'title-alt')?>
@@ -64,14 +65,15 @@ searchOptions($this->driver->getSourceIdentifier())->getVersionsAction()): ?> - record($this->driver)->renderTemplate('versions-link.phtml')?> + renderTemplate('versions-link.phtml')?> driver->getSummary(); ?> - escapeHtml($summary[0]) : false): ?> -

truncate($summary, 300)?>

+ + truncate($summary, 300); ?> +

escapeOrCleanHtml($shortSummary, $summary, 'summary')?>

- 300): ?> +

transEsc('Full description')?>

@@ -85,7 +87,7 @@ record($this->driver)->renderTemplate( + $recordHelper->renderTemplate( 'core-fields.phtml', [ 'driver' => $this->driver, diff --git a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml index a5d8e73fb4e..f53d90974b7 100644 --- a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -1,5 +1,5 @@ driver->getSummary() as $summary): ?> - escapeHtmlExt($summary, allowHtml: true) ?>
+ escapeOrCleanHtml($summary, allowHtml: true) ?>
driver->getCleanISBN(); ?> summaries($isbn) as $provider => $content): ?> @@ -9,7 +9,7 @@ array_map( function ($summary) { $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + return $htmlContent ? $summary : ($this->escapeOrCleanHtml($summary, allowHtml: true)); }, $content ) diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml index d9c5409ee40..031f4ccbab3 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml @@ -3,13 +3,14 @@ ?>
schemaOrg()->getAttributes(['vocab' => 'http://schema.org/', 'resource' => '#record', 'typeof' => $this->schemaOrg()->getRecordTypes($this->driver)])?>> record($this->driver)->getQRCode('core'); - $largeImage = $this->record($this->driver)->getThumbnail('large'); + $recordHelper = $this->record($this->driver); + $QRCode = $recordHelper->getQRCode('core'); + $largeImage = $recordHelper->getThumbnail('large'); $linkAttributes = $largeImage ? ['href' => $largeImage, 'data-lightbox-image' => 'true'] : []; - $coverDetails = $this->record($this->driver)->getCoverDetails('core', 'medium', $linkAttributes); + $coverDetails = $recordHelper->getCoverDetails('core', 'medium', $linkAttributes); $cover = $coverDetails['html']; $preview = ($this->previewOverride ?? false) - ? $this->previewOverride : $this->record($this->driver)->getPreviews(); + ? $this->previewOverride : $recordHelper->getPreviews(); $rating = $this->driver->isRatingAllowed(); ?> @@ -28,7 +29,7 @@ - record($this->driver)->renderTemplate('rating.phtml')?> + renderTemplate('rating.phtml')?>
- schemaOrg()->getAttributes(['property' => 'name'])?>>escapeHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection())?> + schemaOrg()->getAttributes(['property' => 'name'])?>>escapeOrCleanHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection(), 'title')?> driver->getExtraDetail('cached_record') && !$this->translationEmpty('cached_record_warning')): ?>
@@ -54,7 +55,7 @@ driver->tryMethod('getFullTitlesAltScript', [], []) as $altTitle): ?>
- escapeHtml($altTitle)?> + escapeOrCleanHtml($altTitle, 'title-alt')?>
@@ -64,14 +65,15 @@ searchOptions($this->driver->getSourceIdentifier())->getVersionsAction()): ?> - record($this->driver)->renderTemplate('versions-link.phtml')?> + renderTemplate('versions-link.phtml')?> driver->getSummary(); ?> - escapeHtml($summary[0]) : false): ?> -

truncate($summary, 300)?>

+ + truncate($summary, 300); ?> +

escapeOrCleanHtml($shortSummary, $summary, 'summary')?>

- 300): ?> +

transEsc('Full description')?>

@@ -85,7 +87,7 @@ record($this->driver)->renderTemplate( + $recordHelper->renderTemplate( 'core-fields.phtml', [ 'driver' => $this->driver, diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml index a5d8e73fb4e..f53d90974b7 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -1,5 +1,5 @@ driver->getSummary() as $summary): ?> - escapeHtmlExt($summary, allowHtml: true) ?>
+ escapeOrCleanHtml($summary, allowHtml: true) ?>
driver->getCleanISBN(); ?> summaries($isbn) as $provider => $content): ?> @@ -9,7 +9,7 @@ array_map( function ($summary) { $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeHtmlExt($summary, allowHtml: true)); + return $htmlContent ? $summary : ($this->escapeOrCleanHtml($summary, allowHtml: true)); }, $content ) diff --git a/themes/root/theme.config.php b/themes/root/theme.config.php index 38e159d00fe..2cb294ab533 100644 --- a/themes/root/theme.config.php +++ b/themes/root/theme.config.php @@ -31,7 +31,7 @@ 'VuFind\View\Helper\Root\DateTime' => 'VuFind\View\Helper\Root\DateTimeFactory', 'VuFind\View\Helper\Root\DisplayLanguageOption' => 'VuFind\View\Helper\Root\DisplayLanguageOptionFactory', 'VuFind\View\Helper\Root\Doi' => 'VuFind\View\Helper\Root\DoiFactory', - 'VuFind\View\Helper\Root\EscapeHtmlExt' => 'VuFind\View\Helper\Root\EscapeHtmlExtFactory', + 'VuFind\View\Helper\Root\EscapeOrCleanHtml' => 'VuFind\View\Helper\Root\EscapeOrCleanHtmlFactory', 'VuFind\View\Helper\Root\ExplainElement' => 'Laminas\ServiceManager\Factory\InvokableFactory', 'VuFind\View\Helper\Root\Export' => 'VuFind\View\Helper\Root\ExportFactory', 'VuFind\View\Helper\Root\Feedback' => 'VuFind\View\Helper\Root\FeedbackFactory', @@ -136,7 +136,7 @@ 'dateTime' => 'VuFind\View\Helper\Root\DateTime', 'displayLanguageOption' => 'VuFind\View\Helper\Root\DisplayLanguageOption', 'doi' => 'VuFind\View\Helper\Root\Doi', - 'escapeHtmlExt' => 'VuFind\View\Helper\Root\EscapeHtmlExt', + 'escapeOrCleanHtml' => 'VuFind\View\Helper\Root\EscapeOrCleanHtml', 'explainElement' => 'VuFind\View\Helper\Root\ExplainElement', 'export' => 'VuFind\View\Helper\Root\Export', 'feedback' => 'VuFind\View\Helper\Root\Feedback', From 5975e484ab7abea1c010693117994c992c47d0d9 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 3 Jan 2025 14:04:55 +0200 Subject: [PATCH 10/12] Fix code style. --- .../VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php | 2 +- .../src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php | 2 +- module/VuFind/src/VuFind/View/Helper/Root/Truncate.php | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php index 22acf8ef1e6..be4b209293f 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php @@ -75,7 +75,7 @@ class CleanHtmlFactory implements FactoryInterface public function __invoke( ContainerInterface $container, $requestedName, - array $options = null + ?array $options = null ) { if (!empty($options)) { throw new \Exception('Unexpected options sent to factory.'); diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php index f0db9891adc..01c58c788db 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtmlFactory.php @@ -64,7 +64,7 @@ class EscapeOrCleanHtmlFactory implements FactoryInterface public function __invoke( ContainerInterface $container, $requestedName, - array $options = null + ?array $options = null ) { if (!empty($options)) { throw new \Exception('Unexpected options sent to factory.'); diff --git a/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php b/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php index 4751507fd57..d9dfc2623e2 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/Truncate.php @@ -30,12 +30,8 @@ namespace VuFind\View\Helper\Root; use Laminas\View\Helper\AbstractHelper; -use VuFind\String\PropertyString; use VuFind\String\PropertyStringInterface; -use function function_exists; -use function strlen; - /** * Truncate view helper * From 9f520cfccd936e9d1f57fecca678f1e24bffcc63 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 3 Jan 2025 15:02:08 +0200 Subject: [PATCH 11/12] Improve summaries. --- .../VuFind/Content/Summaries/Syndetics.php | 4 ++- .../src/VuFind/String/PropertyString.php | 26 ++++++++++++++++++- .../VuFind/String/PropertyStringInterface.php | 19 +++++++++++++- .../View/Helper/Root/EscapeOrCleanHtml.php | 11 +++++--- .../DefaultRecord/data-summary.phtml | 3 +-- .../DefaultRecord/data-summary.phtml | 3 +-- 6 files changed, 56 insertions(+), 10 deletions(-) diff --git a/module/VuFind/src/VuFind/Content/Summaries/Syndetics.php b/module/VuFind/src/VuFind/Content/Summaries/Syndetics.php index 35fe21a4f36..022585b3bd9 100644 --- a/module/VuFind/src/VuFind/Content/Summaries/Syndetics.php +++ b/module/VuFind/src/VuFind/Content/Summaries/Syndetics.php @@ -29,6 +29,8 @@ namespace VuFind\Content\Summaries; +use VuFind\String\PropertyString; + /** * Syndetics Summaries content loader. * @@ -110,7 +112,7 @@ public function loadByIsbn($key, \VuFindCode\ISBN $isbnObj) // If we have syndetics plus, we don't actually want the content // we'll just stick in the relevant div if ($this->usePlus) { - $summaries[] = $sourceInfo['div']; + $summaries[] = PropertyString::fromHtml($sourceInfo['div'])->setHtmlTrusted(true); } else { // Get the marc field for summaries. (520) $nodes = $xmldoc2->GetElementsbyTagName('Fld520'); diff --git a/module/VuFind/src/VuFind/String/PropertyString.php b/module/VuFind/src/VuFind/String/PropertyString.php index c2da2519bb2..5341f8dda05 100644 --- a/module/VuFind/src/VuFind/String/PropertyString.php +++ b/module/VuFind/src/VuFind/String/PropertyString.php @@ -107,7 +107,8 @@ public function setHtml(string $html): static /** * Get HTML string * - * Note: This could contain anything and must be sanitized for display + * Note: This could contain anything and must be sanitized for display unless marked trusted + * (see setHtmlTrusted/getHtmlTrusted). * * @return ?string */ @@ -116,6 +117,29 @@ public function getHtml(): ?string return $this['__html']; } + /** + * Set flag for trusted HTML + * + * @param bool $trusted Is the HTML content trusted? + * + * @return static + */ + public function setHtmlTrusted(bool $trusted): static + { + $this['__trustedHtml'] = $trusted; + return $this; + } + + /** + * Get flag for trusted HTML + * + * @return ?bool + */ + public function getHtmlTrusted(): ?bool + { + return $this['__trustedHtml']; + } + /** * Set identifiers * diff --git a/module/VuFind/src/VuFind/String/PropertyStringInterface.php b/module/VuFind/src/VuFind/String/PropertyStringInterface.php index 81f6673a645..0e2aec82b64 100644 --- a/module/VuFind/src/VuFind/String/PropertyStringInterface.php +++ b/module/VuFind/src/VuFind/String/PropertyStringInterface.php @@ -68,12 +68,29 @@ public function setHtml(string $html): static; /** * Get HTML string * - * Note: This could contain anything and must be sanitized for display + * Note: This could contain anything and must be sanitized for display unless marked trusted + * (see setHtmlTrusted/getHtmlTrusted). * * @return ?string */ public function getHtml(): ?string; + /** + * Set flag for trusted HTML + * + * @param bool $trusted Is the HTML content trusted? + * + * @return static + */ + public function setHtmlTrusted(bool $trusted): static; + + /** + * Get flag for trusted HTML + * + * @return ?bool + */ + public function getHtmlTrusted(): ?bool; + /** * Set identifiers * diff --git a/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php index adcf5936af3..63b8015ad51 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/EscapeOrCleanHtml.php @@ -69,17 +69,22 @@ public function __construct(protected Escaper $escaper, protected CleanHtml $cle * @param ?string $value Value to escape * @param ?string $fieldType Field type (for fields that allow sanitized HTML) * @param ?bool $allowHtml Whether to allow sanitized HTML if passed a PropertyString + * @param string $context Rendering context for cleaning HTML * * @return mixed Given a string, returns an escaped string, otherwise returns self */ - public function __invoke($value = null, ?string $fieldType = null, ?bool $allowHtml = null) - { + public function __invoke( + $value = null, + ?string $fieldType = null, + ?bool $allowHtml = null, + string $context = 'default' + ) { if (null === $value) { return $this; } if ($value instanceof PropertyStringInterface) { if (($allowHtml ?? ($fieldType && ($this->htmlFields[$fieldType] ?? false))) && $html = $value->getHtml()) { - return ($this->cleanHtml)($html); + return $value->getHtmlTrusted() ? $html : ($this->cleanHtml)($html, context: $context); } $value = (string)$value; } diff --git a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml index f53d90974b7..99b13a9bc1c 100644 --- a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -8,8 +8,7 @@ '
', array_map( function ($summary) { - $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeOrCleanHtml($summary, allowHtml: true)); + return $this->escapeOrCleanHtml($summary, allowHtml: true); }, $content ) diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml index f53d90974b7..99b13a9bc1c 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/data-summary.phtml @@ -8,8 +8,7 @@ '
', array_map( function ($summary) { - $htmlContent = str_starts_with($summary, '<') && str_ends_with($summary, '>'); - return $htmlContent ? $summary : ($this->escapeOrCleanHtml($summary, allowHtml: true)); + return $this->escapeOrCleanHtml($summary, allowHtml: true); }, $content ) From 3d5df2612a21e84dd504d0848b666fad1083464b Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 3 Jan 2025 15:03:48 +0200 Subject: [PATCH 12/12] Add context support for cleaning HTML. --- .../src/VuFind/View/Helper/Root/CleanHtml.php | 20 +++++----- .../View/Helper/Root/CleanHtmlFactory.php | 38 ++++++++++++------- .../RecordDriver/DefaultRecord/core.phtml | 2 +- .../RecordDriver/DefaultRecord/core.phtml | 2 +- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php index c728fcf731e..fea6d838946 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtml.php @@ -43,11 +43,11 @@ class CleanHtml extends \Laminas\View\Helper\AbstractHelper { /** - * Purifier + * Purifiers * - * @var \HTMLPurifier + * @var \HTMLPurifier[] */ - protected $purifier = null; + protected $purifiers = []; /** * Constructor @@ -61,19 +61,21 @@ public function __construct(protected Closure $purifierFactory) /** * Clean up HTML * - * @param string $html HTML - * @param boolean $targetBlank Whether to add target=_blank to outgoing links + * @param string $html HTML + * @param bool $targetBlank Whether to add target=_blank to outgoing links + * @param string $context Display context (e.g. 'heading') * * @return string */ - public function __invoke($html, $targetBlank = false): string + public function __invoke(string $html, bool $targetBlank = false, string $context = 'default'): string { if (!str_contains($html, '<')) { return $html; } - if (null === ($this->purifier[$targetBlank] ?? null)) { - $this->purifier[$targetBlank] = ($this->purifierFactory)(compact('targetBlank')); + $key = ($targetBlank ? 'blank' : 'noblank') . "_$context"; + if (!isset($this->purifiers[$key])) { + $this->purifiers[$key] = ($this->purifierFactory)(compact('targetBlank', 'context')); } - return $this->purifier[$targetBlank]->purify($html); + return $this->purifiers[$key]->purify($html); } } diff --git a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php index be4b209293f..abefb146acb 100644 --- a/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php +++ b/module/VuFind/src/VuFind/View/Helper/Root/CleanHtmlFactory.php @@ -91,7 +91,8 @@ public function __invoke( * N.B. This is a relatively slow method. * * @param array $options Additional options. Currently supported: - * targetBlank true/false Whether to add target="_blank" to external links + * bool targetBlank Whether to add target="_blank" to external links + * string context Rendering context ('default' or 'heading') * * @return HTMLPurifier */ @@ -119,28 +120,37 @@ protected function createPurifier(array $options): HTMLPurifier // still intermittently present, and this setting should remain in place. $config->set('Core.AllowParseManyTags', true); - $this->setAdditionalConfiguration($config); + $this->setAdditionalConfiguration($config, $options); return new \HTMLPurifier($config); } /** * Sets additional configuration * - * @param HTMLPurifier_Config $config Configuration + * @param HTMLPurifier_Config $config Configuration + * @param array $options Additional options * * @return void */ - protected function setAdditionalConfiguration(HTMLPurifier_Config $config) + protected function setAdditionalConfiguration(HTMLPurifier_Config $config, array $options): void { - // Add support for details and summary elements: - $definition = $config->getHTMLDefinition(true); - $definition->addElement( - 'details', - 'Block', - 'Flow', - 'Common', - ['open' => new \HTMLPurifier_AttrDef_HTML_Bool(true)] - ); - $definition->addElement('summary', 'Block', 'Flow', 'Common'); + $context = $options['context'] ?? 'default'; + if ('heading' === $context) { + // Limit elements allowed in headings + // (see https://developer.mozilla.org/en-US/docs/Web/HTML/Content_categories#phrasing_content for more + // information): + $config->set('HTML.AllowedElements', 'a,b,br,em,i,span'); + } else { + // Add support for details and summary elements: + $definition = $config->getHTMLDefinition(true); + $definition->addElement( + 'details', + 'Block', + 'Flow', + 'Common', + ['open' => new \HTMLPurifier_AttrDef_HTML_Bool(true)] + ); + $definition->addElement('summary', 'Block', 'Flow', 'Common'); + } } } diff --git a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml index 6a3fcf08827..5d33519deae 100644 --- a/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml +++ b/themes/bootstrap3/templates/RecordDriver/DefaultRecord/core.phtml @@ -45,7 +45,7 @@
- schemaOrg()->getAttributes(['property' => 'name'])?>>escapeOrCleanHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection(), 'title')?> + schemaOrg()->getAttributes(['property' => 'name'])?>>escapeOrCleanHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection(), 'title', context: 'heading')?> driver->getExtraDetail('cached_record') && !$this->translationEmpty('cached_record_warning')): ?>
diff --git a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml index 6a3fcf08827..5d33519deae 100644 --- a/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml +++ b/themes/bootstrap5/templates/RecordDriver/DefaultRecord/core.phtml @@ -45,7 +45,7 @@
- schemaOrg()->getAttributes(['property' => 'name'])?>>escapeOrCleanHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection(), 'title')?> + schemaOrg()->getAttributes(['property' => 'name'])?>>escapeOrCleanHtml($this->driver->getShortTitle() . ' ' . $this->driver->getSubtitle() . ' ' . $this->driver->getTitleSection(), 'title', context: 'heading')?> driver->getExtraDetail('cached_record') && !$this->translationEmpty('cached_record_warning')): ?>