From ce39494f8490955b2bdd453ade6733b47f638de4 Mon Sep 17 00:00:00 2001 From: DocuSeal Date: Sun, 22 Oct 2023 01:54:20 +0300 Subject: [PATCH] optimize for large files --- .rubocop.yml | 3 ++ .../api/templates_documents_controller.rb | 1 + .../preview_document_page_controller.rb | 52 +++++++++++++++++++ app/controllers/submissions_controller.rb | 14 ++++- app/controllers/submit_form_controller.rb | 23 ++++++-- app/controllers/templates_controller.rb | 8 +++ app/javascript/template_builder/document.vue | 20 ++++++- app/javascript/template_builder/page.vue | 9 +++- app/javascript/template_builder/preview.vue | 2 +- app/mailers/submitter_mailer.rb | 22 ++++++-- app/views/start_form/show.html.erb | 2 +- app/views/submissions/show.html.erb | 8 ++- app/views/submit_form/show.html.erb | 8 ++- app/views/templates/edit.html.erb | 2 +- config/routes.rb | 1 + lib/submissions/ensure_result_generated.rb | 2 +- lib/templates/clone_attachments.rb | 16 +++--- lib/templates/create_attachments.rb | 9 +++- lib/templates/process_document.rb | 36 ++++++++++++- 19 files changed, 206 insertions(+), 32 deletions(-) create mode 100644 app/controllers/preview_document_page_controller.rb diff --git a/.rubocop.yml b/.rubocop.yml index 623c3db2f..bcb65cd46 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -67,6 +67,9 @@ RSpec/MultipleMemoizedHelpers: Rails/I18nLocaleTexts: Enabled: false +Rails/FindEach: + Enabled: false + Rails/SkipsModelValidations: Enabled: false diff --git a/app/controllers/api/templates_documents_controller.rb b/app/controllers/api/templates_documents_controller.rb index 1657e70eb..04fdb12c6 100644 --- a/app/controllers/api/templates_documents_controller.rb +++ b/app/controllers/api/templates_documents_controller.rb @@ -16,6 +16,7 @@ def create render json: { schema:, documents: documents.as_json( + methods: [:metadata], include: { preview_images: { methods: %i[url metadata filename] } } diff --git a/app/controllers/preview_document_page_controller.rb b/app/controllers/preview_document_page_controller.rb new file mode 100644 index 000000000..f49a81285 --- /dev/null +++ b/app/controllers/preview_document_page_controller.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +class PreviewDocumentPageController < ActionController::API + include ActiveStorage::SetCurrent + + FORMAT = Templates::ProcessDocument::FORMAT + + def show + if Docuseal.multitenant? + Rollbar.warning('load page') + + return head :not_found + end + + attachment = ActiveStorage::Attachment.find_by(uuid: params[:attachment_uuid]) + + return head :not_found unless attachment + + preview_image = attachment.preview_images.joins(:blob).find_by(blob: { filename: "#{params[:id]}#{FORMAT}" }) + + return redirect_to preview_image.url, allow_other_host: true if preview_image + + file_path = + if attachment.service.name == :disk + ActiveStorage::Blob.service.path_for(attachment.key) + else + find_or_create_document_tempfile_path(attachment) + end + + io = Templates::ProcessDocument.generate_pdf_preview_from_file(attachment, file_path, params[:id].to_i) + + render plain: io.tap(&:rewind).read + end + + def find_or_create_document_tempfile_path(attachment) + file_path = "#{Dir.tmpdir}/#{attachment.uuid}" + + File.open(file_path, File::RDWR | File::CREAT, 0o644) do |f| + f.flock(File::LOCK_EX) + + # rubocop:disable Style/ZeroLengthPredicate + if f.size.zero? + f.binmode + + f.write(attachment.download) + end + # rubocop:enable Style/ZeroLengthPredicate + end + + file_path + end +end diff --git a/app/controllers/submissions_controller.rb b/app/controllers/submissions_controller.rb index 311efb196..a327d56e3 100644 --- a/app/controllers/submissions_controller.rb +++ b/app/controllers/submissions_controller.rb @@ -6,12 +6,24 @@ class SubmissionsController < ApplicationController load_and_authorize_resource :submission, only: %i[show destroy] + PRELOAD_ALL_PAGES_AMOUNT = 200 + def show ActiveRecord::Associations::Preloader.new( records: [@submission], - associations: [:template, { template_schema_documents: [:blob, { preview_images_attachments: :blob }] }] + associations: [:template, { template_schema_documents: :blob }] ).call + total_pages = + @submission.template_schema_documents.sum { |e| e.metadata.dig('pdf', 'number_of_pages').to_i } + + if total_pages < PRELOAD_ALL_PAGES_AMOUNT + ActiveRecord::Associations::Preloader.new( + records: @submission.template_schema_documents, + associations: [:blob, { preview_images_attachments: :blob }] + ).call + end + render :show, layout: 'plain' end diff --git a/app/controllers/submit_form_controller.rb b/app/controllers/submit_form_controller.rb index ea9e8597e..01fa5adc9 100644 --- a/app/controllers/submit_form_controller.rb +++ b/app/controllers/submit_form_controller.rb @@ -6,15 +6,28 @@ class SubmitFormController < ApplicationController skip_before_action :authenticate_user! skip_authorization_check + PRELOAD_ALL_PAGES_AMOUNT = 200 + def show - @submitter = - Submitter.preload(submission: [ - :template, { template_schema_documents: [:blob, { preview_images_attachments: :blob }] } - ]) - .find_by!(slug: params[:slug]) + @submitter = Submitter.find_by!(slug: params[:slug]) return redirect_to submit_form_completed_path(@submitter.slug) if @submitter.completed_at? + ActiveRecord::Associations::Preloader.new( + records: [@submitter], + associations: [submission: [:template, { template_schema_documents: :blob }]] + ).call + + total_pages = + @submitter.submission.template_schema_documents.sum { |e| e.metadata.dig('pdf', 'number_of_pages').to_i } + + if total_pages < PRELOAD_ALL_PAGES_AMOUNT + ActiveRecord::Associations::Preloader.new( + records: @submitter.submission.template_schema_documents, + associations: [:blob, { preview_images_attachments: :blob }] + ).call + end + Submitters::MaybeUpdateDefaultValues.call(@submitter, current_user) cookies[:submitter_sid] = @submitter.signed_id diff --git a/app/controllers/templates_controller.rb b/app/controllers/templates_controller.rb index e1be3c3b0..c1e2c99ee 100644 --- a/app/controllers/templates_controller.rb +++ b/app/controllers/templates_controller.rb @@ -30,6 +30,14 @@ def edit associations: [schema_documents: { preview_images_attachments: :blob }] ).call + @template_data = + @template.as_json.merge( + documents: @template.schema_documents.as_json( + methods: [:metadata], + include: { preview_images: { methods: %i[url metadata filename] } } + ) + ).to_json + render :edit, layout: 'plain' end diff --git a/app/javascript/template_builder/document.vue b/app/javascript/template_builder/document.vue index cc48defb2..e7d3195b5 100644 --- a/app/javascript/template_builder/document.vue +++ b/app/javascript/template_builder/document.vue @@ -56,8 +56,26 @@ export default { } }, computed: { + numberOfPages () { + return this.document.metadata?.pdf?.number_of_pages || this.document.preview_images.length + }, sortedPreviewImages () { - return [...this.document.preview_images].sort((a, b) => parseInt(a.filename) - parseInt(b.filename)) + const lazyloadMetadata = this.document.preview_images[this.document.preview_images.length - 1].metadata + + return [...Array(this.numberOfPages).keys()].map((i) => { + return this.previewImagesIndex[i] || { + metadata: lazyloadMetadata, + id: Math.random().toString(), + url: `/preview/${this.document.uuid}/${i}.jpg` + } + }) + }, + previewImagesIndex () { + return this.document.preview_images.reduce((acc, e) => { + acc[parseInt(e.filename)] = e + + return acc + }, {}) } }, beforeUpdate () { diff --git a/app/javascript/template_builder/page.vue b/app/javascript/template_builder/page.vue index 2fea16560..97725d9db 100644 --- a/app/javascript/template_builder/page.vue +++ b/app/javascript/template_builder/page.vue @@ -5,11 +5,12 @@ >
parseInt(a.filename) - parseInt(b.filename))[0] } }, mounted () { diff --git a/app/mailers/submitter_mailer.rb b/app/mailers/submitter_mailer.rb index 0661b705d..99b7e76a8 100644 --- a/app/mailers/submitter_mailer.rb +++ b/app/mailers/submitter_mailer.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class SubmitterMailer < ApplicationMailer + MAX_ATTACHMENTS_SIZE = 10.megabytes + def invitation_email(submitter, body: nil, subject: nil) @current_account = submitter.submission.template.account @submitter = submitter @@ -75,14 +77,26 @@ def documents_copy_email(submitter, to: nil) def add_completed_email_attachments!(submitter) documents = Submitters.select_attachments_for_download(submitter) - documents.each do |attachment| - attachments[attachment.filename.to_s] = attachment.download - end + total_size = 0 + audit_trail_data = nil if submitter.submission.audit_trail.present? - attachments[submitter.submission.audit_trail.filename.to_s] = submitter.submission.audit_trail.download + audit_trail_data = submitter.submission.audit_trail.download + + total_size = audit_trail_data.size + end + + documents.each do |attachment| + data = attachment.download + total_size += data.size + + break if total_size >= MAX_ATTACHMENTS_SIZE + + attachments[attachment.filename.to_s] = data end + attachments[submitter.submission.audit_trail.filename.to_s] = audit_trail_data if audit_trail_data + documents end diff --git a/app/views/start_form/show.html.erb b/app/views/start_form/show.html.erb index dd69cd359..a0512105a 100644 --- a/app/views/start_form/show.html.erb +++ b/app/views/start_form/show.html.erb @@ -24,7 +24,7 @@
<% unless @template.deleted_at? %> - <%= form_for @submitter, url: start_form_path(@template.slug), data: { turbo_frame: :_top }, method: :put, html: { class: 'space-y-4' } do |f| %> + <%= form_for @submitter, url: start_form_path(@template.slug), data: { turbo_frame: :_top }, method: :put, html: { class: 'space-y-4', onsubmit: 'event.submitter.disabled = true' } do |f| %>
<%= f.label :email, class: 'label' %> <%= f.email_field :email, value: current_user&.email, required: true, class: 'base-input', placeholder: 'Provide your email to start' %> diff --git a/app/views/submissions/show.html.erb b/app/views/submissions/show.html.erb index 100de45b9..ad198a432 100644 --- a/app/views/submissions/show.html.erb +++ b/app/views/submissions/show.html.erb @@ -44,12 +44,16 @@ <% fields_index = Templates.build_field_areas_index(@submission.template_fields || @submission.template.fields) %> <% values = @submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %> <% attachments_index = ActiveStorage::Attachment.where(record: @submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %> + <% page_blob_struct = Struct.new(:url, :metadata, keyword_init: true) %> <% (@submission.template_schema || @submission.template.schema).each do |item| %> <% document = @submission.template_schema_documents.find { |e| e.uuid == item['attachment_uuid'] } %> <% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %> - <% document.preview_images.sort_by { |a| a.filename.base.to_i }.each_with_index do |page, index| %> + <% preview_images_index = document.preview_images.loaded? ? document.preview_images.index_by { |e| e.filename.base.to_i } : {} %> + <% lazyload_metadata = document.preview_images.first.metadata %> + <% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %> + <% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.uuid, "#{index}.jpg")) %>
" class="relative"> - +
<% document_annots_index[index]&.each do |annot| %> <%= render 'submissions/annotation', annot: %> diff --git a/app/views/submit_form/show.html.erb b/app/views/submit_form/show.html.erb index 88316310e..ffff02305 100644 --- a/app/views/submit_form/show.html.erb +++ b/app/views/submit_form/show.html.erb @@ -1,6 +1,7 @@ <% fields_index = Templates.build_field_areas_index(@submitter.submission.template_fields || @submitter.submission.template.fields) %> <% values = @submitter.submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %> <% attachments_index = ActiveStorage::Attachment.where(record: @submitter.submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %> +<% page_blob_struct = Struct.new(:url, :metadata, keyword_init: true) %>
@@ -10,9 +11,12 @@ <% (@submitter.submission.template_schema || @submitter.submission.template.schema).each do |item| %> <% document = @submitter.submission.template_schema_documents.find { |a| a.uuid == item['attachment_uuid'] } %> <% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %> - <% document.preview_images.sort_by { |a| a.filename.base.to_i }.each_with_index do |page, index| %> + <% preview_images_index = document.preview_images.loaded? ? document.preview_images.index_by { |e| e.filename.base.to_i } : {} %> + <% lazyload_metadata = document.preview_images.last.metadata %> + <% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %> + <% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.uuid, "#{index}.jpg")) %>
- +
<% document_annots_index[index]&.each do |annot| %> <%= render 'submissions/annotation', annot: %> diff --git a/app/views/templates/edit.html.erb b/app/views/templates/edit.html.erb index fc88f8919..31cae9563 100644 --- a/app/views/templates/edit.html.erb +++ b/app/views/templates/edit.html.erb @@ -1 +1 @@ - + diff --git a/config/routes.rb b/config/routes.rb index 7ca4e82b9..571f8eed0 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -67,6 +67,7 @@ resource :folder, only: %i[edit update], controller: 'templates_folders' resources :submissions_export, only: %i[index new] end + resources :preview_document_page, only: %i[show], path: '/preview/:attachment_uuid' resources :start_form, only: %i[show update], path: 'd', param: 'slug' do get :completed diff --git a/lib/submissions/ensure_result_generated.rb b/lib/submissions/ensure_result_generated.rb index a30a6a7e2..cbc610285 100644 --- a/lib/submissions/ensure_result_generated.rb +++ b/lib/submissions/ensure_result_generated.rb @@ -4,7 +4,7 @@ module Submissions module EnsureResultGenerated WAIT_FOR_RETRY = 2.seconds CHECK_EVENT_INTERVAL = 1.second - CHECK_COMPLETE_TIMEOUT = 20.seconds + CHECK_COMPLETE_TIMEOUT = 90.seconds WaitForCompleteTimeout = Class.new(StandardError) diff --git a/lib/templates/clone_attachments.rb b/lib/templates/clone_attachments.rb index 14927a1c1..8c797bbcc 100644 --- a/lib/templates/clone_attachments.rb +++ b/lib/templates/clone_attachments.rb @@ -13,13 +13,15 @@ def call(template:, original_template:) record: template ) - document.preview_images_attachments.each do |preview_image| - ActiveStorage::Attachment.create!( - uuid: preview_image.uuid, - blob_id: preview_image.blob_id, - name: 'preview_images', - record: new_document - ) + ApplicationRecord.no_touching do + document.preview_images_attachments.each do |preview_image| + ActiveStorage::Attachment.create!( + uuid: preview_image.uuid, + blob_id: preview_image.blob_id, + name: 'preview_images', + record: new_document + ) + end end end end diff --git a/lib/templates/create_attachments.rb b/lib/templates/create_attachments.rb index 5301c7ec3..863bc77c5 100644 --- a/lib/templates/create_attachments.rb +++ b/lib/templates/create_attachments.rb @@ -3,6 +3,7 @@ module Templates module CreateAttachments PDF_CONTENT_TYPE = 'application/pdf' + ANNOTATIONS_SIZE_LIMIT = 6.megabytes InvalidFileType = Class.new(StandardError) module_function @@ -18,7 +19,10 @@ def call(template, params) document = template.documents.create!(blob:) if blob.content_type == PDF_CONTENT_TYPE && blob.metadata['pdf'].nil? - blob.metadata['pdf'] = { 'annotations' => Templates::BuildAnnotations.call(document_data) } + annotations = + document_data.size > ANNOTATIONS_SIZE_LIMIT ? [] : Templates::BuildAnnotations.call(document_data) + + blob.metadata['pdf'] = { 'annotations' => annotations } blob.metadata['sha256'] = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data)) end @@ -37,9 +41,10 @@ def find_or_create_blobs(params) data = file.read if file.content_type == PDF_CONTENT_TYPE + annotations = data.size > ANNOTATIONS_SIZE_LIMIT ? [] : Templates::BuildAnnotations.call(data) metadata = { 'identified' => true, 'analyzed' => true, 'sha256' => Base64.urlsafe_encode64(Digest::SHA256.digest(data)), - 'pdf' => { 'annotations' => Templates::BuildAnnotations.call(data) } } + 'pdf' => { 'annotations' => annotations } } end ActiveStorage::Blob.create_and_upload!( diff --git a/lib/templates/process_document.rb b/lib/templates/process_document.rb index b5ee7c3cd..133e06b39 100644 --- a/lib/templates/process_document.rb +++ b/lib/templates/process_document.rb @@ -9,6 +9,12 @@ module ProcessDocument PDF_CONTENT_TYPE = 'application/pdf' Q = 35 MAX_WIDTH = 1400 + MAX_NUMBER_OF_PAGES_PROCESSED = + if Docuseal.multitenant? + 70 + else + 40 + end module_function @@ -42,14 +48,38 @@ def generate_preview_image(attachment, data) def generate_pdf_preview_images(attachment, data) ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all - number_of_pages = HexaPDF::Document.new(io: StringIO.new(data)).pages.size - 1 + number_of_pages = HexaPDF::Document.new(io: StringIO.new(data)).pages.size + + (attachment.metadata['pdf'] ||= {})[:number_of_pages] = number_of_pages + + attachment.save! - (0..number_of_pages).each do |page_number| + (0..[number_of_pages - 1, MAX_NUMBER_OF_PAGES_PROCESSED].min).each do |page_number| page = Vips::Image.new_from_buffer(data, '', dpi: DPI, page: page_number) page = page.resize(MAX_WIDTH / page.width.to_f) io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true)) + ApplicationRecord.no_touching do + ActiveStorage::Attachment.create!( + blob: ActiveStorage::Blob.create_and_upload!( + io:, filename: "#{page_number}#{FORMAT}", + metadata: { analyzed: true, identified: true, width: page.width, height: page.height } + ), + name: ATTACHMENT_NAME, + record: attachment + ) + end + end + end + + def generate_pdf_preview_from_file(attachment, file_path, page_number) + page = Vips::Image.new_from_file(file_path, dpi: DPI, page: page_number) + page = page.resize(MAX_WIDTH / page.width.to_f) + + io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true)) + + ApplicationRecord.no_touching do ActiveStorage::Attachment.create!( blob: ActiveStorage::Blob.create_and_upload!( io:, filename: "#{page_number}#{FORMAT}", @@ -59,6 +89,8 @@ def generate_pdf_preview_images(attachment, data) record: attachment ) end + + io end end end