From c6c0c22244010ec6c6a0bc3b9b4f6b2a57154c64 Mon Sep 17 00:00:00 2001 From: Igor Zolotarev <63460867+yngvar-antonsson@users.noreply.github.com> Date: Sun, 7 Apr 2024 00:18:02 +0200 Subject: [PATCH] Add more strict validation in `cartridge.is_healthy` (#2210) --- CHANGELOG.rst | 6 ++++ cartridge/rpc.lua | 20 ++++--------- cartridge/topology.lua | 65 ++++++++++++++++++++++++++++++------------ 3 files changed, 57 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f259cec3d..e5e4fffe6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -20,6 +20,12 @@ Added - New GraphQL API ``failover_state_provider_status`` to ping state provider connection. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Changed +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- More strict validation for ``cartridge.is_healthy`` API function. + ------------------------------------------------------------------------------- [2.9.0] - 2024-03-06 ------------------------------------------------------------------------------- diff --git a/cartridge/rpc.lua b/cartridge/rpc.lua index aa8a5b031..8d1c439fe 100755 --- a/cartridge/rpc.lua +++ b/cartridge/rpc.lua @@ -52,21 +52,11 @@ local function call_local(role_name, fn_name, args) end local function member_is_healthy(uri, instance_uuid) - local member = membership.get_member(uri) - return ( - (member ~= nil) - and (member.status == 'alive' or member.status == 'suspect') - and (member.payload.uuid == instance_uuid) - and ( - member.payload.state_prev == nil or -- for backward compatibility with old versions - member.payload.state_prev == 'RolesConfigured' or - member.payload.state_prev == 'ConfiguringRoles' - ) - and ( - member.payload.state == 'ConfiguringRoles' or - member.payload.state == 'RolesConfigured' - ) - ) + local res, _ = topology.member_is_healthy(uri, instance_uuid) + if res == nil then + return false + end + return true end --- List candidates suitable for performing a remote call. diff --git a/cartridge/topology.lua b/cartridge/topology.lua index 47ab866eb..619d59d3e 100755 --- a/cartridge/topology.lua +++ b/cartridge/topology.lua @@ -1088,13 +1088,54 @@ local function refine_servers_uri(topology_cfg) return ret end +--- Check the instance health. +-- It is healthy if its state is OK. +-- +-- The function is designed mostly for testing purposes. +-- +-- @function member_is_healthy +-- @treturn[1] boolean `true` +-- @treturn[2] nil +-- @treturn[2] table Error description +local function member_is_healthy(uri, instance_uuid) + local member = membership.get_member(uri) or {} + + if (member.status ~= 'alive' and member.status ~= 'suspect') then + return nil, string.format( + '%s status is %s', + uri, member.status + ) + elseif (member.payload.uuid ~= instance_uuid) then + return nil, string.format( + '%s uuid mismatch: expected %s, have %s', + uri, instance_uuid, member.payload.uuid + ) + elseif member.payload.state_prev ~= nil + and member.payload.state_prev ~= 'ConfiguringRoles' + and member.payload.state_prev ~= 'RolesConfigured' then + return nil, string.format( + '%s previous state %s', + uri, member.payload.state_prev + ) + elseif member.payload.state ~= 'ConfiguringRoles' + and member.payload.state ~= 'RolesConfigured' then + return nil, string.format( + '%s state %s', + uri, member.payload.state + ) + end + return true +end + --- Check the cluster health. -- It is healthy if all instances are healthy. -- -- The function is designed mostly for testing purposes. -- -- @function cluster_is_healthy --- @treturn boolean true / false +-- @treturn[1] boolean `true` +-- @treturn[2] nil +-- @treturn[2] table Error description local function cluster_is_healthy() local confapplier = require('cartridge.confapplier') if confapplier.get_state() ~= 'RolesConfigured' then @@ -1104,24 +1145,9 @@ local function cluster_is_healthy() local topology_cfg = confapplier.get_readonly('topology') for _it, instance_uuid, server in fun.filter(not_disabled, topology_cfg.servers) do - local member = membership.get_member(server.uri) or {} - - if (member.status ~= 'alive') then - return nil, string.format( - '%s status is %s', - server.uri, member.status - ) - elseif (member.payload.uuid ~= instance_uuid) then - return nil, string.format( - '%s uuid mismatch: expected %s, have %s', - server.uri, instance_uuid, member.payload.uuid - ) - elseif member.payload.state ~= 'ConfiguringRoles' - and member.payload.state ~= 'RolesConfigured' then - return nil, string.format( - '%s state %s', - server.uri, member.payload.state - ) + local res, err = member_is_healthy(server.uri, instance_uuid) + if res == nil then + return nil, err end end @@ -1220,6 +1246,7 @@ return { get_failover_params = get_failover_params, get_leaders_order = get_leaders_order, + member_is_healthy = member_is_healthy, cluster_is_healthy = cluster_is_healthy, refine_servers_uri = refine_servers_uri, probe_missing_members = probe_missing_members,