Merge pull request #75 from UCLALibrary/update-oai-feed

update-oai-feed
UCLALibrary · Feb 29, 2024 · 0f6abdc · 0f6abdc
2 parents 4093ddb + 7edf6ee
commit 0f6abdc
Show file tree

Hide file tree

Showing 11 changed files with 121 additions and 57 deletions.
diff --git a/.env b/.env
@@ -1,13 +1,15 @@
+# UCLALibrary - .env setup -- Start
 DEPLOY_HOOK=CHANGEME
 DOCKER_PORTS=80
-MAKE_WAVES=
+MAKE_WAVES=true # Set to false to disable Waveform generation
 NEGATIVE_CAPTCHA_SECRET=64fe54311a8e54b637a1da1ff993b560ff5c742211f645f35b8b9bd8b3d2e4015e95dea8db4dc235df0396ddd94d21d18d0c787bcaa5b579cb5f6f2aac90e601
 SECRET_KEY_BASE=CHANGEME
 PASSENGER_APP_ENV=development
 POSTGRES_DB=oral_history
 POSTGRES_HOST=postgres
 POSTGRES_PASSWORD=DatabaseFTW
 POSTGRES_USER=postgres
+# Commented out for development purposes @SoftServ
 REGISTRY_HOST=index.docker.io/
 REGISTRY_URI=uclalibrary
 SITE_URI=oralhistory-test.library.ucla.edu
@@ -20,3 +22,11 @@ SOLR_PORT=8983
 SOLR_URL="http://${SOLR_ADMIN_USER}:${SOLR_ADMIN_PASSWORD}@${SOLR_HOST}:${SOLR_PORT}/solr/blacklight-core"
 TAG=dev
 TEST_DB=oral_history_test
+# UCLALibrary - .env setup -- End
+
+# SoftServ - .env additions/alterations --- Start
+# REGISTRY_HOST=ghcr.io
+# REGISTRY_URI=oral-history
+# ADMIN_EMAIL=admin@example.com
+# ADMIN_PASSWORD=testing123
+# SoftServ - .env additions/alterations --- End
diff --git a/.github/workflows/build-dockerhub.yml b/.github/workflows/build-dockerhub.yml
@@ -1,13 +1,24 @@
 name: Build Oral History Web for Docker Hub
 on:
   push:
-    # branches:
-    #   - main
-  workflow_dispatch:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
 jobs:
   build-for-docker-hub:
     runs-on: ubuntu-latest
     steps:
+      - name: Set env
+        run: >-
+          echo "TAG=${HEAD_TAG::8}" >> ${GITHUB_ENV};
+          echo ${HEAD_TAG::8}
+        env:
+          HEAD_TAG: ${{ github.event.pull_request.head.sha || github.sha }}
+        shell: bash
+
       - name: Check out code
         uses: actions/checkout@v3
 
@@ -20,19 +31,20 @@ jobs:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
 
-      - name: Read chart yaml to get version for docker tag
-        id: image-tag
-        uses: KJ002/read-yaml@1.6
+      - name: Retag latest if merge to main action
+        id: meta-web
+        uses: docker/metadata-action@v4.1.1
         with:
-          file: 'charts/prod-oralhistory-values.yaml'
-          key-path: '["image", "tag"]'
-
-      - name: Report image tag
-        run: echo "${{ steps.image-tag.outputs.data }}"
+          images: |
+            name=uclalibrary/oral-history
+          tags: |
+            type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push
         uses: docker/build-push-action@v3
         with:
           context: .
           push: true
-          tags: uclalibrary/oral-history:${{ steps.image-tag.outputs.data }}
+          tags: |
+            ${{ steps.meta-web.outputs.tags }}
+            uclalibrary/oral-history:${{ env.TAG }}
diff --git a/app/controllers/admin_controller.rb b/app/controllers/admin_controller.rb
@@ -21,4 +21,9 @@ def delete_jobs
   def logs
     send_file(Rails.root.join('log/indexing.log'))
   end
+
+  def destroy_all_delayed_jobs
+    Delayed::Job.destroy_all
+    redirect_to root_path, notice: 'All Delayed::Jobs have been destroyed.'
+  end
 end
diff --git a/app/models/oral_history_item.rb b/app/models/oral_history_item.rb
@@ -26,35 +26,31 @@ def self.index_logger
 
 
   def self.client(args)
-    url = args[:url] || "https://webservices.library.ucla.edu/dldataprovider/oai2_0.do"
-    OAI::Client.new url, :headers => { "From" => "rob@notch8.com" }, :parser => 'rexml', metadata_prefix: 'mods'
+    url = args[:url] || "https://oh-staff.library.ucla.edu/oai/"
+
+    OAI::Client.new(url, http: Faraday.new {|c| c.options.timeout = 300})
   end
 
   def self.fetch(args)
-    set = args[:set] || "oralhistory"
-    response = client(args).list_records(set: set, metadata_prefix: 'mods')
+    response = client(args).list_records
   end
 
   def self.get(args)
-    response = client(args).get_record(identifier: args[:identifier], metadata_prefix: 'mods', )
+    response = client(args).get_record(identifier: args[:identifier] )
   end
 
 
   def self.fetch_first_id
-    response = self.fetch({progress: false, limit:1})
-    response.full&.first&.header&.identifier&.split('/')&.last
+    response = self.fetch({limit:1})
+    response.full&.first&.header&.identifier
   end
 
   def self.import(args)
     return false if !args[:override] && check_for_tmp_file
     begin
       create_import_tmp_file
-      progress = args[:progress] || true
       limit = args[:limit] || 20000000  # essentially no limit
       response = self.fetch(args)
-      if progress
-        bar = ProgressBar.new(response.doc.elements['//resumptionToken'].attributes['completeListSize'].to_i)
-      end
       total = 0
       new_record_ids = []
 
@@ -78,9 +74,6 @@ def self.import(args)
           yield(total) if block_given?
         end
 
-        if progress
-          bar.increment!
-        end
         total += 1
         break if total >= limit
       end
@@ -100,7 +93,8 @@ def self.import(args)
   end
 
   def self.import_single(id)
-    record = self.get(identifier: id)&.record
+    converted_id = id.gsub('-','/')
+    record = self.get(identifier: converted_id)&.record
     history = process_record(record)
     history.index_record
     if ENV['MAKE_WAVES'] && history.attributes["audio_b"] && history.should_process_peaks?
@@ -116,8 +110,8 @@ def self.process_record(record)
     if record.header.blank? || record.header.identifier.blank?
       return false
     end
-
-    history = OralHistoryItem.find_or_new(record.header.identifier.split('/').last) #Digest::MD5.hexdigest(record.header.identifier).to_i(16))
+    record_id = record.header.identifier.gsub('/','-')
+    history = OralHistoryItem.find_or_new(record_id) #Digest::MD5.hexdigest(record.header.identifier).to_i(16))
     history.attributes['id_t'] = history.id
     if record.header.datestamp
       history.attributes[:timestamp] = Time.parse(record.header.datestamp)
@@ -140,7 +134,8 @@ def self.process_record(record)
         history.attributes['links_t'] = []
         set.children.each do |child|
         next if child.class == REXML::Text
-          if child.name == "titleInfo"
+
+          if child.name == "titleInfo" # <mods:titleInfo>
             child.elements.each('mods:title') do |title|
               title_text = title.text.to_s.strip
               if(child.attributes["type"] == "alternative") && title_text.size > 0
@@ -157,36 +152,52 @@ def self.process_record(record)
                 end
               end
             end
-          elsif child.name == "typeOfResource"
-            history.attributes["type_of_resource_display"] = child.text
-            history.attributes["type_of_resource_t"] ||= []
-            history.attributes["type_of_resource_t"] << child.text
-            history.attributes["type_of_resource_facet"] ||= []
-            history.attributes["type_of_resource_facet"] << child.text
+
+          # not in new oai feed remove?
+          # elsif child.name == "typeOfResource"
+          #   history.attributes["type_of_resource_display"] = child.text
+          #   history.attributes["type_of_resource_t"] ||= []
+          #   history.attributes["type_of_resource_t"] << child.text
+          #   history.attributes["type_of_resource_facet"] ||= []
+          #   history.attributes["type_of_resource_facet"] << child.text
+
+          # <mods:accessCondition>
           elsif child.name == "accessCondition"
             history.attributes["rights_display"] = [child.text]
             history.attributes["rights_t"] = []
             history.attributes["rights_t"] << child.text
+
+          # <mods:language>
           elsif child.name == 'language'
             child.elements.each('mods:languageTerm') do |e|
               history.attributes["language_facet"] = LanguageList::LanguageInfo.find(e.text).try(:name)
               history.attributes["language_sort"] = LanguageList::LanguageInfo.find(e.text).try(:name)
               history.attributes["language_t"] = [LanguageList::LanguageInfo.find(e.text).try(:name)]
             end
+
+          # <mods:subject>
           elsif child.name == "subject"
             child.elements.each('mods:topic') do |e|
               history.attributes["subject_topic_facet"] ||= []
               history.attributes["subject_topic_facet"] << e.text
               history.attributes["subject_t"] ||= []
               history.attributes["subject_t"] << e.text
             end
+
+          # <mods:name>
           elsif child.name == "name"
+
+            # <mods:role>
+            #   <mods:roleTerm type="text">interviewer</mods:roleTerm>
             if child.elements['mods:role/mods:roleTerm'].text == "interviewer"
               history.attributes["author_display"] = child.elements['mods:namePart'].text
               history.attributes["author_t"] ||= []
               if !history.attributes["author_t"].include?(child.elements['mods:namePart'].text)
                 history.attributes["author_t"] << child.elements['mods:namePart'].text
               end
+
+            # <mods:role>
+            #   <mods:roleTerm type="text">interviewee</mods:roleTerm>
             elsif child.elements['mods:role/mods:roleTerm'].text == "interviewee"
               history.attributes["interviewee_display"] = child.elements['mods:namePart'].text
               history.attributes["interviewee_t"] ||= []
@@ -195,10 +206,11 @@ def self.process_record(record)
               end
               history.attributes["interviewee_sort"] = child.elements['mods:namePart'].text
             end
+
+          # <mods:relatedItem type="constituent">
           elsif child.name == "relatedItem" && child.attributes['type'] == "constituent"
             time_log_url = ''
             order = child.elements['mods:part'].present? ? child.elements['mods:part'].attributes['order'] : 1
-
             if child.elements['mods:location/mods:url[@usage="timed log"]'].present?
               time_log_url = child.elements['mods:location/mods:url[@usage="timed log"]'].text
               transcript = self.generate_xml_transcript(time_log_url)
@@ -228,51 +240,71 @@ def self.process_record(record)
             end
             history.attributes["peaks_t"] ||= []
             child_doc_json = child_document.to_json
-            history.attributes["peaks_t"] <<  child_doc_json unless history.attributes["peaks_t"].include? child_doc_json
+            history.attributes["peaks_t"] << child_doc_json unless history.attributes["peaks_t"].include? child_doc_json
             history.attributes["children_t"] << child_doc_json
+
+          # <mods:relatedItem type="series">
           elsif child.name == "relatedItem" && child.attributes['type'] == "series"
             history.attributes["series_facet"] = child.elements['mods:titleInfo/mods:title'].text
             history.attributes["series_t"] = child.elements['mods:titleInfo/mods:title'].text
             history.attributes["series_sort"] = child.elements['mods:titleInfo/mods:title'].text
-            history.attributes["abstract_display"] = child.elements['mods:abstract'].text
+            history.attributes["abstract_display"] = child.elements['mods:abstract']&.text
             history.attributes["abstract_t"] = []
-            history.attributes["abstract_t"] << child.elements['mods:abstract'].text
+            history.attributes["abstract_t"] << child.elements['mods:abstract']&.text
+
+          # <mods:note>
           elsif child.name == "note"
             if child.attributes == {}
               history.attributes["admin_note_display"] = child.text
               history.attributes["admin_note_t"] = []
               history.attributes["admin_note_t"] << child.text
             end
+
+            # <mods:note type="biographical">
             if child.attributes['type'].to_s.match('biographical')
               history.attributes["biographical_display"] = child.text
               history.attributes["biographical_t"] = []
               history.attributes["biographical_t"] << child.text
             end
+
+            # <mods:note type="personpresent">
             if child.attributes['type'].to_s.match('personpresent')
               history.attributes['person_present_display'] = child.text
               history.attributes['person_present_t'] << child.text
             end
+
+            # <mods:note type="place">
             if child.attributes['type'].to_s.match('place')
               history.attributes['place_display'] = child.text
               history.attributes['place_t'] << child.text
             end
+
+            # <mods:note type="supportingdocuments">
             if child.attributes['type'].to_s.match('supportingdocuments')
               history.attributes['supporting_documents_display'] = child.text
               history.attributes['supporting_documents_t'] << child.text
             end
+
+            # <mods:note type="interviewerhistory">
             if child.attributes['type'].to_s.match('interviewerhistory')
               history.attributes['interviewer_history_display'] = child.text
               history.attributes['interviewer_history_t'] << child.text
             end
+
+            # <mods:note type="processinterview">
             if child.attributes['type'].to_s.match('processinterview')
               history.attributes['process_interview_display'] = child.text
               history.attributes['process_interview_t'] << child.text
             end
             history.attributes["description_t"] << child.text
+
+          # <mods:location>
           elsif child.name == 'location'
             child.elements.each do |f|
               history.attributes['links_t'] << [f.text, f.attributes['displayLabel']].to_json
               order = child.elements['mods:part'].present? ? child.elements['mods:part'].attributes['order'] : 1
+
+              # <mods:location displayLabel=
               if f.attributes['displayLabel'] &&
                 has_xml_transcripts == false &&
                 history.attributes["transcripts_t"].blank? &&
@@ -285,10 +317,14 @@ def self.process_record(record)
                 }.to_json
               end
             end
+
+          # <mods:physicalDescription>
           elsif child.name == 'physicalDescription'
             history.attributes["extent_display"] = child.elements['mods:extent'].text
             history.attributes['extent_t'] = []
             history.attributes['extent_t'] << child.elements['mods:extent'].text
+
+          # <mods:abstract>
           elsif child.name == 'abstract'
             history.attributes['interview_abstract_display'] = child.text
             history.attributes["interview_abstract_t"] = []
@@ -381,11 +417,9 @@ def self.generate_xml_transcript(url)
   end
 
   def self.total_records(args = {})
-    url = args[:url] || "https://webservices.library.ucla.edu/dldataprovider/oai2_0.do"
-    set = args[:set] || "oralhistory"
-    client = OAI::Client.new url, :headers => { "From" => "rob@notch8.com" }, :parser => 'rexml', metadata_prefix: 'mods'
-    response = client.list_records(set: set, metadata_prefix: 'mods')
-    response.doc.elements['//resumptionToken'].attributes['completeListSize'].to_i
+    url = args[:url] || "https://oh-staff.library.ucla.edu/oai/"
+
+    OAI::Client.new(url, http: Faraday.new {|c| c.options.timeout = 300})
   end
 
   def has_peaks?
@@ -418,10 +452,11 @@ def self.create_import_tmp_file
   end
 
   def self.remove_import_tmp_file
-    FileUtils.rm(Rails.root.join('tmp/importer.tmp'))
+    tmp_file = Rails.root.join('tmp/importer.tmp')
+    FileUtils.rm(tmp_file) if File.exist?(tmp_file)
   end
 
   def self.check_for_tmp_file
     File.exist?(File.join('tmp/importer.tmp'))
   end
-end
+end
diff --git a/app/views/admin/index.html.erb b/app/views/admin/index.html.erb
@@ -40,7 +40,7 @@
   <% end %>
   <%= link_to "Background Jobs", delayed_job_web_path, class: "btn btn-lg btn-primary" %>
   <%= link_to "Download Logs", admin_logs_path, class: "btn btn-lg btn-primary" %>
-
+  <%= button_to "Destroy All Delayed Jobs", destroy_all_delayed_jobs_path, method: :delete, data: { confirm: 'Are you sure?' }, class: 'btn btn-lg btn-danger' %>
   <div class="g-mt-40 g-mb-40">
     <%= link_to t('blacklight.header_links.logout'), destroy_user_session_path %>
   </div>

diff --git a/app/views/delayed_jobs/destroy_all.html.erb b/app/views/delayed_jobs/destroy_all.html.erb
@@ -0,0 +1,4 @@
+<!-- app/views/delayed_jobs/destroy_all.html.erb -->
+<h1>Destroy All Delayed Jobs</h1>
+<p>Are you sure you want to destroy all Delayed Jobs?</p>
+<%= button_to 'Destroy All', destroy_all_delayed_jobs_path, method: :delete, data: { confirm: 'Are you sure?' }, class: 'btn btn-danger' %>
diff --git a/charts/Chart.yaml b/charts/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
 appVersion: "0.0.2"
 description: Chart for Oral History Public-Facing App
 name: oralhistory
-version: 1.0.3
+version: 1.0.0
 
 # The `appVersion` is not a required field whereas `version` is required. If
 # you’re making changes to a helm chart template file and/or the default values