Skip to content

Commit

Permalink
Add hocr patch.
Browse files Browse the repository at this point in the history
  • Loading branch information
rosiel committed Oct 27, 2023
1 parent 5788e85 commit d89a575
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 0 deletions.
176 changes: 176 additions & 0 deletions assets/patches/hocr.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
diff --git a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php
index 804153ab..fc31c36a 100644
--- a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php
+++ b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php
@@ -11,6 +11,7 @@ use Drupal\Core\Field\FieldItemInterface;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Messenger\MessengerInterface;
use Drupal\Core\Url;
+use Drupal\islandora\IslandoraUtils;
use Drupal\views\Plugin\views\style\StylePluginBase;
use Drupal\views\ResultRow;
use GuzzleHttp\Client;
@@ -35,6 +36,13 @@ use Symfony\Component\HttpFoundation\Request;
*/
class IIIFManifest extends StylePluginBase {

+ /**
+ * Islandora utility functions.
+ *
+ * @var \Drupal\islandora\IslandoraUtils
+ */
+ protected $utils;
+
/**
* {@inheritdoc}
*/
@@ -108,10 +116,17 @@ class IIIFManifest extends StylePluginBase {
*/
protected $moduleHandler;

+ /*
+ * The media use term for structured OCR text.
+ *
+ * @var \Drupal\taxonomy\TermInterface|null
+ */
+ protected $structuredTextTerm;
+
/**
* {@inheritdoc}
*/
- public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler) {
+ public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler, IslandoraUtils $utils) {
parent::__construct($configuration, $plugin_id, $plugin_definition);

$this->serializer = $serializer;
@@ -122,6 +137,10 @@ class IIIFManifest extends StylePluginBase {
$this->httpClient = $http_client;
$this->messenger = $messenger;
$this->moduleHandler = $moduleHandler;
+ $this->utils = $utils;
+ $this->structured_text_term = isset($this->options['structured_text_term_uri'])
+ ? $this->utils->getTermForUri($this->options['structured_text_term_uri'])
+ : FALSE;
}

/**
@@ -139,7 +158,8 @@ class IIIFManifest extends StylePluginBase {
$container->get('file_system'),
$container->get('http_client'),
$container->get('messenger'),
- $container->get('module_handler')
+ $container->get('module_handler'),
+ $container->get('islandora.utils')
);
}

@@ -157,6 +177,7 @@ class IIIFManifest extends StylePluginBase {
* {@inheritdoc}
*/
public function render() {
+ $this->structuredTextTerm = $this->utils->getTermForUri($this->options['structured_text_term_uri']);
$json = [];
$iiif_address = $this->iiifConfig->get('iiif_server');
if (!is_null($iiif_address) && !empty($iiif_address)) {
@@ -282,7 +303,7 @@ class IIIFManifest extends StylePluginBase {
],
];

- if ($ocr_url = $this->getOcrUrl($entity, $row, $i)) {
+ if ($ocr_url = $this->getOcrUrl($entity)) {
$tmp_canvas['seeAlso'] = [
'@id' => $ocr_url,
'format' => 'text/vnd.hocr+html',
@@ -362,28 +383,34 @@ class IIIFManifest extends StylePluginBase {
*
* @param \Drupal\Core\Entity\EntityInterface $entity
* The entity at the current row.
- * @param \Drupal\views\ResultRow $row
- * Result row.
- * @param int $delta
- * The delta in case there are multiple canvases on one media.
*
* @return string|false
* The absolute URL of the current row's structured text,
* or FALSE if none.
*/
- protected function getOcrUrl(EntityInterface $entity, ResultRow $row, $delta) {
+ protected function getOcrUrl(EntityInterface $entity) {
$ocr_url = FALSE;
$iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : [];
$ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL;
if ($ocrField) {
- $ocr_entity = $ocrField->getEntity($row);
+ $ocr_entity = $entity;
$ocr_field_name = $ocrField->definition['field_name'];
- if (!is_null($ocr_field_name)) {
+ if (!is_null($ocrField_name)) {
$ocrs = $ocr_entity->{$ocr_field_name};
- $ocr = isset($ocrs[$delta]) ? $ocrs[$delta] : FALSE;
- if ($ocr) {
- $ocr_url = $ocr->entity->createFileUrl(FALSE);
- }
+ $ocr = isset($ocrs[$i]) ? $ocrs[$i] : FALSE;
+ $ocr_url = $ocr->entity->createFileUrl(FALSE);
+ }
+ }
+ elseif ($this->structuredTextTerm) {
+ $parent_node = $this->utils->getParentNode($entity);
+ $ocr_entity_array = $this->utils->getMediaReferencingNodeAndTerm($parent_node, $this->structuredTextTerm);
+ $ocr_entity_id = is_array($ocr_entity_array) ? array_shift($ocr_entity_array) : NULL;
+ $ocr_entity = $ocr_entity_id ? $this->entityTypeManager->getStorage('media')->load($ocr_entity_id) : NULL;
+ if ($ocr_entity) {
+ $ocr_file_source = $ocr_entity->getSource();
+ $ocr_fid = $ocr_file_source->getSourceFieldValue($ocr_entity);
+ $ocr_file = $this->entityTypeManager->getStorage('file')->load($ocr_fid);
+ $ocr_url = $ocr_file->createFileUrl(FALSE);
}
}

@@ -486,10 +513,19 @@ class IIIFManifest extends StylePluginBase {
'#title' => $this->t('Structured OCR data file field'),
'#type' => 'checkboxes',
'#default_value' => $this->options['iiif_ocr_file_field'],
- '#description' => $this->t('The source of structured OCR text for each entity.'),
+ '#description' => $this->t('The source of structured OCR text for each entity. If the term setting below is left blank, it will be the same entity as the source image'),
'#options' => $field_options,
'#required' => FALSE,
];
+ $form['structured_text_term'] = [
+ '#type' => 'entity_autocomplete',
+ '#target_type' => 'taxonomy_term',
+ '#title' => $this->t('Structured OCR text term'),
+ '#default_value' => $this->utils->getTermForUri($this->options['structured_text_term_uri']),
+ '#required' => FALSE,
+ '#description' => $this->t('Term indicating the media that holds structured text, such as hOCR, for the given object. Use this if the text is on a separate media from the tile source.'),
+ ];
+
}

/**
@@ -502,4 +538,25 @@ class IIIFManifest extends StylePluginBase {
return ['json' => 'json'];
}

+ /**
+ * Submit handler for options form.
+ *
+ * Used to store the structured text media term by URL instead of Ttid.
+ *
+ * @param array $form
+ * The form.
+ * @param \Drupal\Core\Form\FormStateInterface $form_state
+ * The form state object.
+ */
+ // @codingStandardsIgnoreStart
+ public function submitOptionsForm(&$form, FormStateInterface $form_state) {
+ // @codingStandardsIgnoreEnd
+ $style_options = $form_state->getValue('style_options');
+ $tid = $style_options['structured_text_term'];
+ $term = $this->entityTypeManager->getStorage('taxonomy_term')->load($tid);
+ $style_options['structured_text_term_uri'] = $this->utils->getUriForTerm($term);
+ $form_state->setValue('style_options', $style_options);
+ parent::submitOptionsForm($form, $form_state);
+ }
+
}
3 changes: 3 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@
"patches": {
"drupal/matomo": {
"https://www.drupal.org/project/matomo/issues/3363521": "https://www.drupal.org/files/issues/2023-06-29/matomo-3363521-7.patch"
},
"drupal/islandora": {
"OCR from media: https://github.com/Islandora/islandora/pull/953": "assets/patches/hocr.patch"
}
}
},
Expand Down

0 comments on commit d89a575

Please sign in to comment.