Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assets investigation - data extraction rake tasks #9626

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions lib/tasks/attachments/check_assets.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
desc "Check assets"
task check_assets: :environment do
file = File.open("./lib/tasks/attachments/attachment_data_deleted_in_wh_replaced_in_both_but_replacements_are_draft.txt", "a")

File.readlines("./lib/tasks/attachments/ads_to_check.txt", chomp: true).each do |line|
attachment_data_id, asset_manager_id, = line.split(",")

begin
asset_manager_response = GdsApi.asset_manager.asset(asset_manager_id).to_h
rep_response = GdsApi.asset_manager.asset(asset_manager_response["replacement_id"]).to_h
rescue GdsApi::HTTPNotFound
next
end

if rep_response["draft"]
puts "ad: #{attachment_data_id}, am_id: #{asset_manager_id}, rep_id: #{asset_manager_response['replacement_id']}, rep_del: #{rep_response['deleted']}, rep_rep: #{!rep_response['replacement_id'].nil?}"
file << "#{attachment_data_id},#{asset_manager_id},#{asset_manager_response['replacement_id']},#{rep_response['deleted']},#{!rep_response['replacement_id'].nil?}" << "\n"
else
print "."
end
end

file.close
end
19 changes: 19 additions & 0 deletions lib/tasks/attachments/check_attachables.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
desc "Check attachables"
task check_attachables: :environment do
File.readlines("./lib/tasks/attachments/ads_to_check.txt", chomp: true).each do |line|
attachment_data_id = line.split(",").first
states = AttachmentData.find(attachment_data_id).attachments.map(&:attachable).map { |a| a&.state }
if !valid_state(states)
puts "NOT OK: #{attachment_data_id}: [#{states.join(', ')}]"
else
print "."
end
end
end

def valid_state(states)
return false unless states[-1].nil?
return false unless states.reject { |state| ["superseded", nil].include? state }.empty?

true
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
desc "Report on AttachmentData in WH that is not deleted"
task attachment_data_not_deleted: :environment do
file1 = File.open("./lib/tasks/attachments/ad_not_deleted_but_replaced_OK.txt", "a")
file11 = File.open("./lib/tasks/attachments/ad_not_deleted_but_replaced_with_draft_replacement.txt", "a")
file2 = File.open("./lib/tasks/attachments/ad_not_deleted_replaced_in_wh_but_not_in_am.txt", "a")
file3 = File.open("./lib/tasks/attachments/ad_not_deleted_not_replaced_in_wh.txt", "a")

AttachmentData.find_each.map do |attachment_data|
attachments = attachment_data.attachments
attachables = attachments.map(&:attachable).compact

next unless attachables.any?
next if attachables.detect { |attachable| !attachable.is_a?(Edition) }
next if (attachables.map(&:state) - %w[superseded]).any?

next if attachment_data.deleted?

document_id = attachables.last.document.id

attachment_data.assets.map do |asset|
begin
am_response = GdsApi.asset_manager.asset(asset.asset_manager_id).to_h
rescue GdsApi::HTTPNotFound
next
end

variant = asset.variant

if attachment_data.replaced_by_id
if am_response["replacement_id"]
replacement = get_rep(am_response["replacement_id"])

if replacement["draft"]
puts "draft_rep - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, v: #{variant}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}, rep_del: #{replacement['deleted']}, rep_rep: #{!replacement['replacement_id'].nil?}"
file11 << "draft_rep - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, v: #{variant}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}, rep_del: #{replacement['deleted']}, rep_rep: #{!replacement['replacement_id'].nil?}" << "\n"
else
puts "all OK - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}"
file1 << "all OK - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}" << "\n"
end
else
puts "missing rep am - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, v: #{variant}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}"
file2 << "missing rep am - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, v: #{variant}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}" << "\n"
end
else
puts "nothing - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, v: #{variant}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}, AL: #{am_response['access_limited'].present?}, ALO: #{am_response['access_limited_organisation_ids'].present?}"
file3 << "nothing - ad: #{attachment_data.id}, d_id: #{document_id}, am_id: #{asset.asset_manager_id}, v: #{variant}, deleted: #{am_response['deleted']}, draft: #{am_response['draft']}, redirect: #{!am_response['redirect_url'].nil?}, #{attachment_data.created_at.year}, AL: #{am_response['access_limited'].present?}, ALO: #{am_response['access_limited_organisation_ids'].present?}" << "\n"
end
end
end

file1.close
file11.close
file2.close
file3.close
end

def get_rep(replacement_id)
begin
am_response = GdsApi.asset_manager.asset(replacement_id).to_h
rescue GdsApi::HTTPNotFound
puts "#{attachment_data.id}, #{asset.asset_manager_id}, no replacement found for given ID"

nil
end

am_response
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
desc "Identify all AttachmentData instances where attachable is unpublished but there are missing redirects in AM"
task find_attachments_unpublished_in_whitehall_with_missing_redirects_in_asset_manager: :environment do
file = File.open("./lib/tasks/attachments/attachments_without_redirects_in_am.txt", "a")

CSV.foreach("./lib/tasks/attachments/attachments_on_unpublished_editions.csv", headers: true) do |row|
asset_manager_id = row["asset_manager_id"]
attachment_data_id = row["attachment_data_id"]

begin
am_response = GdsApi.asset_manager.asset(asset_manager_id).to_h
rescue GdsApi::HTTPNotFound
next
end

if am_response["redirect_url"].nil?
attachment_data = AttachmentData.find(attachment_data_id)
document_id = attachment_data&.attachments&.first&.attachable&.document_id
variant = attachment_data.assets.select { |a| a["asset_manager_id"] == asset_manager_id }.first.variant

puts "ad: #{attachment_data_id}, am_id: #{asset_manager_id}, d_id: #{document_id}, var: #{variant}, draft: #{am_response['draft']}, deleted: #{am_response['deleted']}, replaced: #{!am_response['replacement_id'].nil?}, year: #{attachment_data.created_at.year}"
file << "#{attachment_data_id},#{asset_manager_id},#{document_id},#{variant},#{am_response['draft']},#{am_response['deleted']},#{!am_response['replacement_id'].nil?},#{attachment_data.created_at.year}" << "\n"
end
end

file.close
end
Loading
Loading