Skip to content

Commit

Permalink
RUBY-3429 Retry failed KMS requests (#2907)
Browse files Browse the repository at this point in the history
  • Loading branch information
comandeo-mongo authored Nov 19, 2024
1 parent 1924187 commit f1d1f91
Show file tree
Hide file tree
Showing 11 changed files with 244 additions and 35 deletions.
1 change: 1 addition & 0 deletions .evergreen/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ if test -n "$FLE"; then
python3 -u .evergreen/csfle/kms_http_server.py --ca_file .evergreen/x509gen/ca.pem --cert_file .evergreen/x509gen/server.pem --port 8002 --require_client_cert &
python3 -u .evergreen/csfle/kms_kmip_server.py &
python3 -u .evergreen/csfle/fake_azure.py &
python3 -u .evergreen/csfle/kms_failpoint_server.py --port 9003 &

# Obtain temporary AWS credentials
PYTHON=python3 . .evergreen/csfle/set-temp-creds.sh
Expand Down
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,6 @@ Style/TrailingCommaInArrayLiteral:

Style/TrailingCommaInHashLiteral:
Enabled: false

RSpec/ExampleLength:
Max: 10
2 changes: 1 addition & 1 deletion gemfiles/standard.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,6 @@ def standard_dependencies
gem 'ruby-lsp', platforms: :mri
end

gem 'libmongocrypt-helper', '~> 1.11.0' if ENV['FLE'] == 'helper'
gem 'libmongocrypt-helper', '~> 1.12.0' if ENV['FLE'] == 'helper'
end
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
58 changes: 57 additions & 1 deletion lib/mongo/crypt/binding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class Binding
# will cause a `LoadError`.
#
# @api private
MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.7.0")
MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.12.0")

# @!method self.mongocrypt_version(len)
# @api private
Expand Down Expand Up @@ -1113,6 +1113,62 @@ def self.check_kms_ctx_status(kms_context)
end
end

# @!method self.mongocrypt_kms_ctx_usleep(ctx)
# @api private
#
# Indicates how long to sleep before sending KMS request.
#
# @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object.
# @return [ int64 ] A 64-bit encoded number of microseconds of how long to sleep.
attach_function :mongocrypt_kms_ctx_usleep, [:pointer], :int64

# Returns number of milliseconds to sleep before sending KMS request
# for the given KMS context.
#
# @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context we are going
# to send KMS request for.
# @return [ Integer ] A 64-bit encoded number of microseconds to sleep.
def self.kms_ctx_usleep(kms_context)
mongocrypt_kms_ctx_usleep(kms_context.kms_ctx_p)
end

# @!method self.mongocrypt_kms_ctx_fail(ctx)
# @api private
#
# Indicate a network-level failure.
#
# @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object.
# @return [ Boolean ] whether the failed request may be retried.
attach_function :mongocrypt_kms_ctx_fail, [:pointer], :bool

# Check whether the last failed request for the KMS context may be retried.
#
# @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context
# @return [ true, false ] whether the failed request may be retried.
def self.kms_ctx_fail(kms_context)
mongocrypt_kms_ctx_fail(kms_context.kms_ctx_p)
end

# @!method self.mongocrypt_setopt_retry_kms(crypt, enable)
# @api private
#
# Enable or disable KMS retry behavior.
#
# @param [ FFI::Pointer ] crypt A pointer to a mongocrypt_t object
# @param [ Boolean ] enable A boolean indicating whether to retry operations.
# @return [ Boolean ] indicating success.
attach_function :mongocrypt_setopt_retry_kms, [:pointer, :bool], :bool

# Enable or disable KMS retry behavior.
#
# @param [ Mongo::Crypt::Handle ] handle
# @param [ true, false ] value whether to retry operations.
# @return [ true, fale ] true is the option was set, otherwise false.
def self.kms_ctx_setopt_retry_kms(handle, value)
mongocrypt_setopt_retry_kms(handle.ref, value)
end


# @!method self.mongocrypt_kms_ctx_done(ctx)
# @api private
#
Expand Down
82 changes: 55 additions & 27 deletions lib/mongo/crypt/context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def initialize(mongocrypt_handle, io)
Binding.mongocrypt_ctx_new(@mongocrypt_handle.ref),
Binding.method(:mongocrypt_ctx_destroy)
)

@encryption_io = io
@cached_azure_token = nil
end
Expand Down Expand Up @@ -90,35 +89,13 @@ def run_state_machine(timeout_holder)
when :done
return nil
when :need_mongo_keys
filter = Binding.ctx_mongo_op(self)

@encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key|
mongocrypt_feed(key) if key
end

mongocrypt_done
provide_keys(timeout_ms)
when :need_mongo_collinfo
filter = Binding.ctx_mongo_op(self)

result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms)
mongocrypt_feed(result) if result

mongocrypt_done
provide_collection_info(timeout_ms)
when :need_mongo_markings
cmd = Binding.ctx_mongo_op(self)

result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms)
mongocrypt_feed(result)

mongocrypt_done
provide_markings(timeout_ms)
when :need_kms
while kms_context = Binding.ctx_next_kms_ctx(self) do
provider = Binding.kms_ctx_get_kms_provider(kms_context)
tls_options = @mongocrypt_handle.kms_tls_options(provider)
@encryption_io.feed_kms(kms_context, tls_options)
end

Binding.ctx_kms_done(self)
feed_kms
when :need_kms_credentials
Binding.ctx_provide_kms_providers(
self,
Expand All @@ -134,6 +111,57 @@ def run_state_machine(timeout_holder)

private

def provide_markings(timeout_ms)
cmd = Binding.ctx_mongo_op(self)

result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms)
mongocrypt_feed(result)

mongocrypt_done
end

def provide_collection_info(timeout_ms)
filter = Binding.ctx_mongo_op(self)

result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms)
mongocrypt_feed(result) if result

mongocrypt_done
end

def provide_keys(timeout_ms)
filter = Binding.ctx_mongo_op(self)

@encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key|
mongocrypt_feed(key) if key
end

mongocrypt_done
end

def feed_kms
while (kms_context = Binding.ctx_next_kms_ctx(self)) do
begin
delay = Binding.kms_ctx_usleep(kms_context)
sleep(delay / 1_000_000.0) unless delay.nil?
provider = Binding.kms_ctx_get_kms_provider(kms_context)
tls_options = @mongocrypt_handle.kms_tls_options(provider)
@encryption_io.feed_kms(kms_context, tls_options)
rescue Error::KmsError => e
if e.network_error?
if Binding.kms_ctx_fail(kms_context)
next
else
raise
end
else
raise
end
end
end
Binding.ctx_kms_done(self)
end

# Indicate that state machine is done feeding I/O responses back to libmongocrypt
def mongocrypt_done
Binding.mongocrypt_ctx_mongo_done(ctx_p)
Expand Down
6 changes: 4 additions & 2 deletions lib/mongo/crypt/encryption_io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,10 @@ def with_ssl_socket(endpoint, tls_options, timeout_ms: nil)
tls_options.merge(socket_options)
)
yield(mongo_socket.socket)
rescue => e
raise Error::KmsError, "Error when connecting to KMS provider: #{e.class}: #{e.message}"
rescue Error::KmsError
raise
rescue StandardError => e
raise Error::KmsError.new("Error when connecting to KMS provider: #{e.class}: #{e.message}", network_error: true)
ensure
mongo_socket&.close
end
Expand Down
2 changes: 1 addition & 1 deletion lib/mongo/crypt/handle.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def initialize(kms_providers, kms_tls_options, options={})
Binding.mongocrypt_new,
Binding.method(:mongocrypt_destroy)
)

Binding.kms_ctx_setopt_retry_kms(self, true)
@kms_providers = kms_providers
@kms_tls_options = kms_tls_options

Expand Down
9 changes: 9 additions & 0 deletions lib/mongo/error/kms_error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ class Error

# A KMS-related error during client-side encryption.
class KmsError < CryptError
def initialize(message, code: nil, network_error: nil)
@network_error = network_error
super(message, code: code)
end
end

# @return [ true, false ] whether this error was caused by a network error.
def network_error?
@network_error == true
end
end
end
112 changes: 112 additions & 0 deletions spec/integration/client_side_encryption/kms_retry_prose_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# frozen_string_literal: true

require 'spec_helper'

def simulate_failure(type, times = 1)
url = URI.parse("https://localhost:9003/set_failpoint/#{type}")
data = { count: times }.to_json
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
http.ca_file = '.evergreen/x509gen/ca.pem'
request = Net::HTTP::Post.new(url.path, { 'Content-Type' => 'application/json' })
request.body = data
http.request(request)
end

describe 'KMS Retry Prose Spec' do
require_libmongocrypt
require_enterprise
min_server_version '4.2'

include_context 'define shared FLE helpers'

let(:key_vault_client) do
ClientRegistry.instance.new_local_client(SpecConfig.instance.addresses)
end

let(:client_encryption) do
Mongo::ClientEncryption.new(
key_vault_client,
kms_tls_options: {
aws: default_kms_tls_options_for_provider,
gcp: default_kms_tls_options_for_provider,
azure: default_kms_tls_options_for_provider,
},
key_vault_namespace: key_vault_namespace,
# For some reason libmongocrypt ignores custom endpoints for Azure and CGP
# kms_providers: aws_kms_providers.merge(azure_kms_providers).merge(gcp_kms_providers)
kms_providers: aws_kms_providers
)
end

shared_examples 'kms_retry prose spec' do
it 'createDataKey and encrypt with TCP retry' do
simulate_failure('network')
data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key)
simulate_failure('network')
expect do
client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic')
end.not_to raise_error
end

it 'createDataKey and encrypt with HTTP retry' do
simulate_failure('http')
data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key)
simulate_failure('http')
expect do
client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic')
end.not_to raise_error
end

it 'createDataKey fails after too many retries' do
simulate_failure('network', 4)
expect do
client_encryption.create_data_key(kms_provider, master_key: master_key)
end.to raise_error(Mongo::Error::KmsError)
end
end

context 'with AWS KMS provider' do
let(:kms_provider) { 'aws' }

let(:master_key) do
{
region: 'foo',
key: 'bar',
endpoint: '127.0.0.1:9003',
}
end

include_examples 'kms_retry prose spec'
end

context 'with GCP KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do
let(:kms_provider) { 'gcp' }

let(:master_key) do
{
project_id: 'foo',
location: 'bar',
key_ring: 'baz',
key_name: 'qux',
endpoint: '127.0.0.1:9003'
}
end

include_examples 'kms_retry prose spec'
end

context 'with Azure KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do
let(:kms_provider) { 'azure' }

let(:master_key) do
{
key_vault_endpoint: '127.0.0.1:9003',
key_name: 'foo',
}
end

include_examples 'kms_retry prose spec'
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@
}
)
rescue Mongo::Error::KmsError => exc
exc.message.should =~ /Error when connecting to KMS provider/
exc.message.should =~ /Error when connecting to KMS provider|Empty KMS response/
exc.message.should =~ /libmongocrypt error code/
exc.message.should_not =~ /CryptError/
else
Expand Down
2 changes: 0 additions & 2 deletions spec/integration/search_indexes_prose_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ def filter_results(result, names)
.first
end

# rubocop:disable RSpec/ExampleLength
it 'succeeds' do
expect(create_index).to be == name
helper.wait_for(name)
Expand All @@ -158,7 +157,6 @@ def filter_results(result, names)

expect(index['latestDefinition']).to be == new_definition
end
# rubocop:enable RSpec/ExampleLength
end

# Case 5: dropSearchIndex suppresses namespace not found errors
Expand Down

0 comments on commit f1d1f91

Please sign in to comment.