Skip to content

Commit

Permalink
Merge pull request #75 from UCLALibrary/update-oai-feed
Browse files Browse the repository at this point in the history
update-oai-feed
  • Loading branch information
aprilrieger authored Feb 29, 2024
2 parents 4093ddb + 7edf6ee commit 0f6abdc
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 57 deletions.
12 changes: 11 additions & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# UCLALibrary - .env setup -- Start
DEPLOY_HOOK=CHANGEME
DOCKER_PORTS=80
MAKE_WAVES=
MAKE_WAVES=true # Set to false to disable Waveform generation
NEGATIVE_CAPTCHA_SECRET=64fe54311a8e54b637a1da1ff993b560ff5c742211f645f35b8b9bd8b3d2e4015e95dea8db4dc235df0396ddd94d21d18d0c787bcaa5b579cb5f6f2aac90e601
SECRET_KEY_BASE=CHANGEME
PASSENGER_APP_ENV=development
POSTGRES_DB=oral_history
POSTGRES_HOST=postgres
POSTGRES_PASSWORD=DatabaseFTW
POSTGRES_USER=postgres
# Commented out for development purposes @SoftServ
REGISTRY_HOST=index.docker.io/
REGISTRY_URI=uclalibrary
SITE_URI=oralhistory-test.library.ucla.edu
Expand All @@ -20,3 +22,11 @@ SOLR_PORT=8983
SOLR_URL="http://${SOLR_ADMIN_USER}:${SOLR_ADMIN_PASSWORD}@${SOLR_HOST}:${SOLR_PORT}/solr/blacklight-core"
TAG=dev
TEST_DB=oral_history_test
# UCLALibrary - .env setup -- End

# SoftServ - .env additions/alterations --- Start
# REGISTRY_HOST=ghcr.io
# REGISTRY_URI=oral-history
# ADMIN_EMAIL=admin@example.com
# ADMIN_PASSWORD=testing123
# SoftServ - .env additions/alterations --- End
36 changes: 24 additions & 12 deletions .github/workflows/build-dockerhub.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
name: Build Oral History Web for Docker Hub
on:
push:
# branches:
# - main
workflow_dispatch:
branches:
- main
pull_request:
branches:
- main

jobs:
build-for-docker-hub:
runs-on: ubuntu-latest
steps:
- name: Set env
run: >-
echo "TAG=${HEAD_TAG::8}" >> ${GITHUB_ENV};
echo ${HEAD_TAG::8}
env:
HEAD_TAG: ${{ github.event.pull_request.head.sha || github.sha }}
shell: bash

- name: Check out code
uses: actions/checkout@v3

Expand All @@ -20,19 +31,20 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Read chart yaml to get version for docker tag
id: image-tag
uses: KJ002/read-yaml@1.6
- name: Retag latest if merge to main action
id: meta-web
uses: docker/metadata-action@v4.1.1
with:
file: 'charts/prod-oralhistory-values.yaml'
key-path: '["image", "tag"]'

- name: Report image tag
run: echo "${{ steps.image-tag.outputs.data }}"
images: |
name=uclalibrary/oral-history
tags: |
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
uses: docker/build-push-action@v3
with:
context: .
push: true
tags: uclalibrary/oral-history:${{ steps.image-tag.outputs.data }}
tags: |
${{ steps.meta-web.outputs.tags }}
uclalibrary/oral-history:${{ env.TAG }}
5 changes: 5 additions & 0 deletions app/controllers/admin_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,9 @@ def delete_jobs
def logs
send_file(Rails.root.join('log/indexing.log'))
end

def destroy_all_delayed_jobs
Delayed::Job.destroy_all
redirect_to root_path, notice: 'All Delayed::Jobs have been destroyed.'
end
end
105 changes: 70 additions & 35 deletions app/models/oral_history_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,31 @@ def self.index_logger


def self.client(args)
url = args[:url] || "https://webservices.library.ucla.edu/dldataprovider/oai2_0.do"
OAI::Client.new url, :headers => { "From" => "rob@notch8.com" }, :parser => 'rexml', metadata_prefix: 'mods'
url = args[:url] || "https://oh-staff.library.ucla.edu/oai/"

OAI::Client.new(url, http: Faraday.new {|c| c.options.timeout = 300})
end

def self.fetch(args)
set = args[:set] || "oralhistory"
response = client(args).list_records(set: set, metadata_prefix: 'mods')
response = client(args).list_records
end

def self.get(args)
response = client(args).get_record(identifier: args[:identifier], metadata_prefix: 'mods', )
response = client(args).get_record(identifier: args[:identifier] )
end


def self.fetch_first_id
response = self.fetch({progress: false, limit:1})
response.full&.first&.header&.identifier&.split('/')&.last
response = self.fetch({limit:1})
response.full&.first&.header&.identifier
end

def self.import(args)
return false if !args[:override] && check_for_tmp_file
begin
create_import_tmp_file
progress = args[:progress] || true
limit = args[:limit] || 20000000 # essentially no limit
response = self.fetch(args)
if progress
bar = ProgressBar.new(response.doc.elements['//resumptionToken'].attributes['completeListSize'].to_i)
end
total = 0
new_record_ids = []

Expand All @@ -78,9 +74,6 @@ def self.import(args)
yield(total) if block_given?
end

if progress
bar.increment!
end
total += 1
break if total >= limit
end
Expand All @@ -100,7 +93,8 @@ def self.import(args)
end

def self.import_single(id)
record = self.get(identifier: id)&.record
converted_id = id.gsub('-','/')
record = self.get(identifier: converted_id)&.record
history = process_record(record)
history.index_record
if ENV['MAKE_WAVES'] && history.attributes["audio_b"] && history.should_process_peaks?
Expand All @@ -116,8 +110,8 @@ def self.process_record(record)
if record.header.blank? || record.header.identifier.blank?
return false
end

history = OralHistoryItem.find_or_new(record.header.identifier.split('/').last) #Digest::MD5.hexdigest(record.header.identifier).to_i(16))
record_id = record.header.identifier.gsub('/','-')
history = OralHistoryItem.find_or_new(record_id) #Digest::MD5.hexdigest(record.header.identifier).to_i(16))
history.attributes['id_t'] = history.id
if record.header.datestamp
history.attributes[:timestamp] = Time.parse(record.header.datestamp)
Expand All @@ -140,7 +134,8 @@ def self.process_record(record)
history.attributes['links_t'] = []
set.children.each do |child|
next if child.class == REXML::Text
if child.name == "titleInfo"

if child.name == "titleInfo" # <mods:titleInfo>
child.elements.each('mods:title') do |title|
title_text = title.text.to_s.strip
if(child.attributes["type"] == "alternative") && title_text.size > 0
Expand All @@ -157,36 +152,52 @@ def self.process_record(record)
end
end
end
elsif child.name == "typeOfResource"
history.attributes["type_of_resource_display"] = child.text
history.attributes["type_of_resource_t"] ||= []
history.attributes["type_of_resource_t"] << child.text
history.attributes["type_of_resource_facet"] ||= []
history.attributes["type_of_resource_facet"] << child.text

# not in new oai feed remove?
# elsif child.name == "typeOfResource"
# history.attributes["type_of_resource_display"] = child.text
# history.attributes["type_of_resource_t"] ||= []
# history.attributes["type_of_resource_t"] << child.text
# history.attributes["type_of_resource_facet"] ||= []
# history.attributes["type_of_resource_facet"] << child.text

# <mods:accessCondition>
elsif child.name == "accessCondition"
history.attributes["rights_display"] = [child.text]
history.attributes["rights_t"] = []
history.attributes["rights_t"] << child.text

# <mods:language>
elsif child.name == 'language'
child.elements.each('mods:languageTerm') do |e|
history.attributes["language_facet"] = LanguageList::LanguageInfo.find(e.text).try(:name)
history.attributes["language_sort"] = LanguageList::LanguageInfo.find(e.text).try(:name)
history.attributes["language_t"] = [LanguageList::LanguageInfo.find(e.text).try(:name)]
end

# <mods:subject>
elsif child.name == "subject"
child.elements.each('mods:topic') do |e|
history.attributes["subject_topic_facet"] ||= []
history.attributes["subject_topic_facet"] << e.text
history.attributes["subject_t"] ||= []
history.attributes["subject_t"] << e.text
end

# <mods:name>
elsif child.name == "name"

# <mods:role>
# <mods:roleTerm type="text">interviewer</mods:roleTerm>
if child.elements['mods:role/mods:roleTerm'].text == "interviewer"
history.attributes["author_display"] = child.elements['mods:namePart'].text
history.attributes["author_t"] ||= []
if !history.attributes["author_t"].include?(child.elements['mods:namePart'].text)
history.attributes["author_t"] << child.elements['mods:namePart'].text
end

# <mods:role>
# <mods:roleTerm type="text">interviewee</mods:roleTerm>
elsif child.elements['mods:role/mods:roleTerm'].text == "interviewee"
history.attributes["interviewee_display"] = child.elements['mods:namePart'].text
history.attributes["interviewee_t"] ||= []
Expand All @@ -195,10 +206,11 @@ def self.process_record(record)
end
history.attributes["interviewee_sort"] = child.elements['mods:namePart'].text
end

# <mods:relatedItem type="constituent">
elsif child.name == "relatedItem" && child.attributes['type'] == "constituent"
time_log_url = ''
order = child.elements['mods:part'].present? ? child.elements['mods:part'].attributes['order'] : 1

if child.elements['mods:location/mods:url[@usage="timed log"]'].present?
time_log_url = child.elements['mods:location/mods:url[@usage="timed log"]'].text
transcript = self.generate_xml_transcript(time_log_url)
Expand Down Expand Up @@ -228,51 +240,71 @@ def self.process_record(record)
end
history.attributes["peaks_t"] ||= []
child_doc_json = child_document.to_json
history.attributes["peaks_t"] << child_doc_json unless history.attributes["peaks_t"].include? child_doc_json
history.attributes["peaks_t"] << child_doc_json unless history.attributes["peaks_t"].include? child_doc_json
history.attributes["children_t"] << child_doc_json

# <mods:relatedItem type="series">
elsif child.name == "relatedItem" && child.attributes['type'] == "series"
history.attributes["series_facet"] = child.elements['mods:titleInfo/mods:title'].text
history.attributes["series_t"] = child.elements['mods:titleInfo/mods:title'].text
history.attributes["series_sort"] = child.elements['mods:titleInfo/mods:title'].text
history.attributes["abstract_display"] = child.elements['mods:abstract'].text
history.attributes["abstract_display"] = child.elements['mods:abstract']&.text
history.attributes["abstract_t"] = []
history.attributes["abstract_t"] << child.elements['mods:abstract'].text
history.attributes["abstract_t"] << child.elements['mods:abstract']&.text

# <mods:note>
elsif child.name == "note"
if child.attributes == {}
history.attributes["admin_note_display"] = child.text
history.attributes["admin_note_t"] = []
history.attributes["admin_note_t"] << child.text
end

# <mods:note type="biographical">
if child.attributes['type'].to_s.match('biographical')
history.attributes["biographical_display"] = child.text
history.attributes["biographical_t"] = []
history.attributes["biographical_t"] << child.text
end

# <mods:note type="personpresent">
if child.attributes['type'].to_s.match('personpresent')
history.attributes['person_present_display'] = child.text
history.attributes['person_present_t'] << child.text
end

# <mods:note type="place">
if child.attributes['type'].to_s.match('place')
history.attributes['place_display'] = child.text
history.attributes['place_t'] << child.text
end

# <mods:note type="supportingdocuments">
if child.attributes['type'].to_s.match('supportingdocuments')
history.attributes['supporting_documents_display'] = child.text
history.attributes['supporting_documents_t'] << child.text
end

# <mods:note type="interviewerhistory">
if child.attributes['type'].to_s.match('interviewerhistory')
history.attributes['interviewer_history_display'] = child.text
history.attributes['interviewer_history_t'] << child.text
end

# <mods:note type="processinterview">
if child.attributes['type'].to_s.match('processinterview')
history.attributes['process_interview_display'] = child.text
history.attributes['process_interview_t'] << child.text
end
history.attributes["description_t"] << child.text

# <mods:location>
elsif child.name == 'location'
child.elements.each do |f|
history.attributes['links_t'] << [f.text, f.attributes['displayLabel']].to_json
order = child.elements['mods:part'].present? ? child.elements['mods:part'].attributes['order'] : 1

# <mods:location displayLabel=
if f.attributes['displayLabel'] &&
has_xml_transcripts == false &&
history.attributes["transcripts_t"].blank? &&
Expand All @@ -285,10 +317,14 @@ def self.process_record(record)
}.to_json
end
end

# <mods:physicalDescription>
elsif child.name == 'physicalDescription'
history.attributes["extent_display"] = child.elements['mods:extent'].text
history.attributes['extent_t'] = []
history.attributes['extent_t'] << child.elements['mods:extent'].text

# <mods:abstract>
elsif child.name == 'abstract'
history.attributes['interview_abstract_display'] = child.text
history.attributes["interview_abstract_t"] = []
Expand Down Expand Up @@ -381,11 +417,9 @@ def self.generate_xml_transcript(url)
end

def self.total_records(args = {})
url = args[:url] || "https://webservices.library.ucla.edu/dldataprovider/oai2_0.do"
set = args[:set] || "oralhistory"
client = OAI::Client.new url, :headers => { "From" => "rob@notch8.com" }, :parser => 'rexml', metadata_prefix: 'mods'
response = client.list_records(set: set, metadata_prefix: 'mods')
response.doc.elements['//resumptionToken'].attributes['completeListSize'].to_i
url = args[:url] || "https://oh-staff.library.ucla.edu/oai/"

OAI::Client.new(url, http: Faraday.new {|c| c.options.timeout = 300})
end

def has_peaks?
Expand Down Expand Up @@ -418,10 +452,11 @@ def self.create_import_tmp_file
end

def self.remove_import_tmp_file
FileUtils.rm(Rails.root.join('tmp/importer.tmp'))
tmp_file = Rails.root.join('tmp/importer.tmp')
FileUtils.rm(tmp_file) if File.exist?(tmp_file)
end

def self.check_for_tmp_file
File.exist?(File.join('tmp/importer.tmp'))
end
end
end
2 changes: 1 addition & 1 deletion app/views/admin/index.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
<% end %>
<%= link_to "Background Jobs", delayed_job_web_path, class: "btn btn-lg btn-primary" %>
<%= link_to "Download Logs", admin_logs_path, class: "btn btn-lg btn-primary" %>

<%= button_to "Destroy All Delayed Jobs", destroy_all_delayed_jobs_path, method: :delete, data: { confirm: 'Are you sure?' }, class: 'btn btn-lg btn-danger' %>
<div class="g-mt-40 g-mb-40">
<%= link_to t('blacklight.header_links.logout'), destroy_user_session_path %>
</div>
Expand Down
4 changes: 4 additions & 0 deletions app/views/delayed_jobs/destroy_all.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!-- app/views/delayed_jobs/destroy_all.html.erb -->
<h1>Destroy All Delayed Jobs</h1>
<p>Are you sure you want to destroy all Delayed Jobs?</p>
<%= button_to 'Destroy All', destroy_all_delayed_jobs_path, method: :delete, data: { confirm: 'Are you sure?' }, class: 'btn btn-danger' %>
2 changes: 1 addition & 1 deletion charts/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
appVersion: "0.0.2"
description: Chart for Oral History Public-Facing App
name: oralhistory
version: 1.0.3
version: 1.0.0

# The `appVersion` is not a required field whereas `version` is required. If
# you’re making changes to a helm chart template file and/or the default values
Expand Down
Loading

0 comments on commit 0f6abdc

Please sign in to comment.