Skip to content

Commit

Permalink
Add more strict validation in cartridge.is_healthy
Browse files Browse the repository at this point in the history
  • Loading branch information
yngvar-antonsson committed Apr 4, 2024
1 parent 8b54f07 commit caf271f
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 33 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ Added

- New GraphQL API ``failover_state_provider_status`` to ping state provider connection.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Changed
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- More strict validation for ``cartridge.is_healthy`` API function.

-------------------------------------------------------------------------------
[2.9.0] - 2024-03-06
-------------------------------------------------------------------------------
Expand Down
20 changes: 5 additions & 15 deletions cartridge/rpc.lua
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,11 @@ local function call_local(role_name, fn_name, args)
end

local function member_is_healthy(uri, instance_uuid)
local member = membership.get_member(uri)
return (
(member ~= nil)
and (member.status == 'alive' or member.status == 'suspect')
and (member.payload.uuid == instance_uuid)
and (
member.payload.state_prev == nil or -- for backward compatibility with old versions
member.payload.state_prev == 'RolesConfigured' or
member.payload.state_prev == 'ConfiguringRoles'
)
and (
member.payload.state == 'ConfiguringRoles' or
member.payload.state == 'RolesConfigured'
)
)
local res, _ = topology.member_is_healthy(uri, instance_uuid)
if res == nil then
return false
end
return true
end

--- List candidates suitable for performing a remote call.
Expand Down
59 changes: 41 additions & 18 deletions cartridge/topology.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,43 @@ local function refine_servers_uri(topology_cfg)
return ret
end

--- Check the instance health.
-- It is healthy if its state is OK.
--
-- The function is designed mostly for testing purposes.
--
-- @function cluster_is_healthy
-- @treturn boolean true / false
local function member_is_healthy(uri, instance_uuid)
local member = membership.get_member(uri) or {}

if (member.status ~= 'alive' and member.status ~= 'suspect') then
return nil, string.format(
'%s status is %s',
uri, member.status
)
elseif (member.payload.uuid ~= instance_uuid) then
return nil, string.format(
'%s uuid mismatch: expected %s, have %s',
uri, instance_uuid, member.payload.uuid
)
elseif member.payload.state_prev ~= nil
and member.payload.state_prev ~= 'ConfiguringRoles'
and member.payload.state_prev ~= 'RolesConfigured' then
return nil, string.format(
'%s previous state %s',
uri, member.payload.state_prev
)
elseif member.payload.state ~= 'ConfiguringRoles'
and member.payload.state ~= 'RolesConfigured' then
return nil, string.format(
'%s state %s',
uri, member.payload.state
)
end
return true
end

--- Check the cluster health.
-- It is healthy if all instances are healthy.
--
Expand All @@ -1104,24 +1141,9 @@ local function cluster_is_healthy()
local topology_cfg = confapplier.get_readonly('topology')

for _it, instance_uuid, server in fun.filter(not_disabled, topology_cfg.servers) do
local member = membership.get_member(server.uri) or {}

if (member.status ~= 'alive') then
return nil, string.format(
'%s status is %s',
server.uri, member.status
)
elseif (member.payload.uuid ~= instance_uuid) then
return nil, string.format(
'%s uuid mismatch: expected %s, have %s',
server.uri, instance_uuid, member.payload.uuid
)
elseif member.payload.state ~= 'ConfiguringRoles'
and member.payload.state ~= 'RolesConfigured' then
return nil, string.format(
'%s state %s',
server.uri, member.payload.state
)
local res, err = member_is_healthy(server.uri, instance_uuid)
if res == nil then
return nil, err
end
end

Expand Down Expand Up @@ -1220,6 +1242,7 @@ return {

get_failover_params = get_failover_params,
get_leaders_order = get_leaders_order,
member_is_healthy = member_is_healthy,
cluster_is_healthy = cluster_is_healthy,
refine_servers_uri = refine_servers_uri,
probe_missing_members = probe_missing_members,
Expand Down

0 comments on commit caf271f

Please sign in to comment.