From 49c2e66ebdf178863c16e141c88d7b79e9d1cf2d Mon Sep 17 00:00:00 2001 From: Zeke Gabrielse Date: Fri, 8 Nov 2024 10:49:26 -0600 Subject: [PATCH] refactor npm worker to use enumeratorio --- app/workers/process_npm_package_worker.rb | 33 ++++++++++++++----- .../process_npm_package_worker_spec.rb | 15 +++++---- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/app/workers/process_npm_package_worker.rb b/app/workers/process_npm_package_worker.rb index ff1adfccef..61ba022e9b 100644 --- a/app/workers/process_npm_package_worker.rb +++ b/app/workers/process_npm_package_worker.rb @@ -20,14 +20,24 @@ def perform(artifact_id) raise PackageNotAcceptableError, 'unacceptable filesize' end - # download the package tarball + # download the package tarball in chunks to reduce memory footprint client = artifact.client - tgz = client.get_object(bucket: artifact.bucket, key: artifact.key) - .body + enum = Enumerator.new do |yielder| + client.get_object(bucket: artifact.bucket, key: artifact.key) do |chunk| + yielder << chunk + end + end - # unpack the package tarball + # wrap the enumerator to provide an IO-like interface + tgz = EnumeratorIO.new(enum) + + # gunzip the package tarball tar = gunzip(tgz) + # keep a ref to the manifest + manifest = nil + + # unpack the package tarball unpack tar do |archive| # NOTE(ezekg) npm prefixes everything in the archive with package/ entry = archive.find { _1.name in 'package/package.json' } @@ -42,21 +52,25 @@ def perform(artifact_id) entry.size > MAX_MANIFEST_SIZE # parse/validate and minify the manifest - json = JSON.parse(entry.read) - .to_json + content = JSON.parse(entry.read) + .to_json - ReleaseManifest.create!( + manifest = ReleaseManifest.create!( account_id: artifact.account_id, environment_id: artifact.environment_id, release_id: artifact.release_id, release_artifact_id: artifact.id, - content: json, + content:, ) end - # not sure why GzipReader#open doesn't take an io? + # not sure why GzipReader#open doesn't take an IO ala every other IO-like? tar.close + # we can assume package tarball is invalid if there's no manifest + raise PackageNotAcceptableError, 'manifest is missing' if + manifest.nil? + artifact.update!(status: 'UPLOADED') BroadcastEventService.call( @@ -68,6 +82,7 @@ def perform(artifact_id) ActiveRecord::RecordInvalid, JSON::ParserError, Zlib::Error, + Minitar::UnexpectedEOF, Minitar::Error, IOError => e Keygen.logger.warn { "[workers.process-npm-package-worker] Error: #{e.class.name} - #{e.message}" } diff --git a/spec/workers/process_npm_package_worker_spec.rb b/spec/workers/process_npm_package_worker_spec.rb index 2bc87968e0..79fbab5b57 100644 --- a/spec/workers/process_npm_package_worker_spec.rb +++ b/spec/workers/process_npm_package_worker_spec.rb @@ -15,15 +15,16 @@ after { Sidekiq::Testing.fake! } context 'when artifact is a valid package' do - let(:package) { file_fixture('hello-2.0.0.tgz').open } - let(:package_json) { - tar = Zlib::GzipReader.new(package) - json = Minitar::Reader.open tar do |archive| + let(:package_fixture) { 'hello-2.0.0.tgz' } + let(:package_tarball) { file_fixture(package_fixture).open } + let(:package_json) { + tgz = file_fixture(package_fixture).open + tar = Zlib::GzipReader.new(tgz) + + Minitar::Reader.open tar do |archive| archive.find { _1.file? && _1.name in 'package/package.json' } .read end - - json } let(:minified_package_json) { @@ -32,7 +33,7 @@ } before do - Aws.config = { s3: { stub_responses: { get_object: [{ body: package }] } } } + Aws.config = { s3: { stub_responses: { get_object: [{ body: package_tarball }] } } } end context 'when artifact is waiting' do