Skip to content

Commit

Permalink
add enumeratorio to improve memory efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
ezekg committed Nov 8, 2024
1 parent 6ae07aa commit 88ec961
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 11 deletions.
29 changes: 22 additions & 7 deletions app/workers/process_docker_image_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,19 @@ def perform(artifact_id)
raise ImageNotAcceptableError, 'unacceptable filesize'
end

# download the image tarball
# download the image tarball in chunks to reduce memory footprint
client = artifact.client
tar = client.get_object(bucket: artifact.bucket, key: artifact.key)
.body
enum = Enumerator.new do |yielder|
client.get_object(bucket: artifact.bucket, key: artifact.key) do |chunk|
yielder << chunk
end
end

# wrap the enumerator to provide an IO-like interface
tar = EnumeratorIO.new(enum)

# keep a ref to the manifest
manifest = nil

# unpack the package tarball
unpack tar do |archive|
Expand All @@ -39,15 +48,15 @@ def perform(artifact_id)
entry.size > MAX_MANIFEST_SIZE

# parse/validate and minify the manifest
json = JSON.parse(entry.read)
.to_json
content = JSON.parse(entry.read)
.to_json

ReleaseManifest.create!(
manifest = ReleaseManifest.create!(
account_id: artifact.account_id,
environment_id: artifact.environment_id,
release_id: artifact.release_id,
release_artifact_id: artifact.id,
content: json,
content:,
)
in %r{^blobs/sha256/} if entry.file?
key = artifact.key_for(entry.name)
Expand All @@ -67,6 +76,10 @@ def perform(artifact_id)
end
end

# we can assume image tarball is invalid if there's no manifest
raise ImageNotAcceptableError, 'manifest is missing' if
manifest.nil?

artifact.update!(status: 'UPLOADED')

BroadcastEventService.call(
Expand All @@ -76,6 +89,8 @@ def perform(artifact_id)
)
rescue ImageNotAcceptableError,
ActiveRecord::RecordInvalid,
JSON::ParserError,
Minitar::UnexpectedEOF,
Minitar::Error,
IOError => e
Keygen.logger.warn { "[workers.process-docker-image-worker] Error: #{e.class.name} - #{e.message}" }
Expand Down
3 changes: 3 additions & 0 deletions config/initializers/enumerator_io.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# frozen_string_literal: true

require_dependency Rails.root / 'lib' / 'enumerator_io'
1 change: 1 addition & 0 deletions config/initializers/zietwork.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Rails.autoloaders.each do |autoloader|
# FIXME(ezekg) Should we rename these to follow conventions?
autoloader.inflector.inflect(
"enumerator_io" => "EnumeratorIO",
"digest_io" => "DigestIO",
"jsonapi" => "JSONAPI",
"ee" => "EE",
Expand Down
37 changes: 37 additions & 0 deletions lib/enumerator_io.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

class EnumeratorIO
def initialize(enum)
@enum = enum
@buffer = ''.b
@eof = false
end

def eof? = @eof && @buffer.empty?
def read(length = nil, buffer = nil)
buffer ||= +''
buffer.clear

# fill the buffer until it has enough data or EOF
while !@eof && (length.nil? || @buffer.bytesize < length)
begin
@buffer << @enum.next
rescue StopIteration
@eof = true

break
end
end

# extract the requested amount of data from the buffer
if length
buffer << @buffer.slice!(0, length)
else
buffer << @buffer

@buffer.clear
end

buffer.empty? ? nil : buffer
end
end
19 changes: 15 additions & 4 deletions spec/workers/process_docker_image_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
after { Sidekiq::Testing.fake! }

context 'when artifact is a valid image' do
let(:image_tarball) { file_fixture('alpine-3.20.3.tar').open }
let(:image_fixture) { 'alpine-3.20.3.tar' }
let(:image_tarball) { file_fixture(image_fixture).open }
let(:manifest_json) {
Minitar::Reader.open image_tarball do |archive|
tarball = file_fixture(image_fixture).open

Minitar::Reader.open tarball do |archive|
archive.find { _1.file? && _1.name in 'manifest.json' }
.read
end
Expand Down Expand Up @@ -291,10 +294,18 @@

context 'when artifact filesize is unaccurate' do
let(:artifact) { create(:artifact, :processing, filesize: 1.kilobyte, account:) }
let(:file) { file_fixture('large.tar.gz').open }
let(:tgz) { file_fixture('large.tar.gz').open }
let(:tar) { Zlib::GzipReader.new(tgz).read }

before do
Aws.config = { s3: { stub_responses: { get_object: [{ body: file }] } } }
Aws.config = {
s3: {
stub_responses: {
head_object: [{ content_length: tar.size }],
get_object: [{ body: tar }],
},
},
}
end

it 'should not process file' do
Expand Down

0 comments on commit 88ec961

Please sign in to comment.