Skip to content

Commit

Permalink
Send SIGTERM twice before sending SIGKILL
Browse files Browse the repository at this point in the history
  • Loading branch information
akash-akya committed Dec 17, 2024
1 parent e3cb8a2 commit e949cbb
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 32 deletions.
4 changes: 3 additions & 1 deletion lib/exile/process.ex
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ defmodule Exile.Process do

@type exit_status :: non_neg_integer

@type os_pid :: pos_integer

@default_opts [env: [], stderr: :console]
@default_buffer_size 65_535
@os_signal_timeout 1000
Expand Down Expand Up @@ -595,7 +597,7 @@ defmodule Exile.Process do
This is meant only for debugging. Avoid interacting with the
external process directly
"""
@spec os_pid(t) :: pos_integer()
@spec os_pid(t) :: os_pid()
def os_pid(process) do
GenServer.call(process.pid, :os_pid, :infinity)
end
Expand Down
81 changes: 51 additions & 30 deletions lib/exile/watcher.ex
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,15 @@ defmodule Exile.Watcher do
end

@impl true
def handle_info({:DOWN, ref, :process, pid, _reason}, %{
pid: pid,
socket_path: socket_path,
os_pid: os_pid,
ref: ref
}) do
def handle_info({:DOWN, ref, :process, pid, _reason}, %{pid: pid, ref: ref} = state) do
%{socket_path: socket_path, os_pid: os_pid} = state
_ = File.rm(socket_path)
# at max we wait for 50ms for program to exit
if process_exit?(os_pid, 50) do
:ok
else

# Ideally we should skip checking if program is alive if the Exile
# process is terminated with reason `:normal` But at the moment we
# unconditionally terminate, potentially with reason `:normal`
# when owner process exit expecting the watcher to do the cleanup
if !process_dead?(os_pid, 50) do
attempt_graceful_exit(os_pid)
end

Expand All @@ -46,37 +44,60 @@ defmodule Exile.Watcher do
# when watcher is attempted to be killed, we forcefully kill external os process.
# This can happen when beam receive SIGTERM
def handle_info({:EXIT, _, reason}, %{pid: pid, socket_path: socket_path, os_pid: os_pid}) do
Logger.debug(fn -> "Watcher exiting. reason: #{inspect(reason)}" end)
Logger.debug("Watcher exiting. reason: #{inspect(reason)}")
File.rm!(socket_path)
Elixir.Process.exit(pid, :watcher_exit)
attempt_graceful_exit(os_pid)
{:stop, reason, nil}
end

# certain programs such as older version of `parallel` expects
# multiple `SIGTERM` signals
@term_seq [sigterm: 150, sigterm: 100, sigkill: 100]

@spec attempt_graceful_exit(Exile.Process.os_pid()) :: :ok | no_return
defp attempt_graceful_exit(os_pid) do
Logger.debug("Failed to stop external program gracefully. attempting SIGTERM")
Nif.nif_kill(os_pid, :sigterm)
process_exit?(os_pid, 100) && throw(:done)

Logger.debug("Failed to stop external program with SIGTERM. attempting SIGKILL")
Nif.nif_kill(os_pid, :sigkill)
process_exit?(os_pid, 200) && throw(:done)

Logger.error("failed to kill external process")
raise "Failed to kill external process"
catch
:done ->
Logger.debug(fn -> "External program exited successfully" end)
end
Logger.debug("Starting graceful termination sequence: #{inspect(@term_seq)}")

dead? =
Enum.reduce_while(@term_seq, false, fn {signal, wait_time}, _dead? ->
Nif.nif_kill(os_pid, signal)

defp process_exit?(os_pid), do: !Nif.nif_is_os_pid_alive(os_pid)
# check process_status first before going to sleep
if process_dead?(os_pid, wait_time) do
Logger.debug("External program terminated successfully with signal: #{signal}")
{:halt, true}
else
Logger.debug("Failed to stop with signal: #{signal}, wait_time: #{wait_time}")
{:cont, false}
end
end)

defp process_exit?(os_pid, timeout) do
if process_exit?(os_pid) do
if dead? do
:ok
else
Logger.error("Failed to kill external process, os_pid #{os_pid}")
raise "Failed to kill external process"
end
end

@spec process_dead?(Exile.Process.os_pid(), pos_integer) :: boolean
defp process_dead?(os_pid, wait_time) do
# check process_status first before going to sleep
if process_status(os_pid) == :dead do
true
else
:timer.sleep(timeout)
process_exit?(os_pid)
Elixir.Process.sleep(wait_time)
process_status(os_pid) == :dead
end
end

@spec process_status(Exile.Process.os_pid()) :: :alive | :dead
defp process_status(os_pid) do
if Nif.nif_is_os_pid_alive(os_pid) do
:alive
else
:dead
end
end
end
2 changes: 1 addition & 1 deletion test/exile/process_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ defmodule Exile.ProcessTest do
assert os_process_alive?(os_pid)

# ensure the script set the correct signal handlers (handlers to ignore signal)
assert {:ok, "ignored signals\n"} = Process.read(s)
assert {:ok, "ignored signals\n" <> _} = Process.read(s)

# exit without waiting for the exile process
{os_pid, s}
Expand Down

0 comments on commit e949cbb

Please sign in to comment.