From a12998a52a7387936d96cdbe3d5a0e7266047487 Mon Sep 17 00:00:00 2001 From: Nikolay Shirokovskiy Date: Wed, 21 Feb 2024 13:36:23 +0300 Subject: [PATCH] iproto: fix assertion on dropping of a new connection We need to handle case of dropping new connection. When net_send_greeting() is executed the connection can be closed due to iproto_drop_connections() call. Note that in the test the Tarantool crashes for another reason. Due to access after sleep to the connection that is destroyed so its memory is poisoned. Yet we visit net_send_greeting() too in the test with patch so original issue is verified too. We also need to test that such a connection is closed. This will be done in EE version. Closes #9717 NO_DOC=bugfix --- ...h-9717-fix-crash-on-new-connection-drop.md | 3 ++ src/box/iproto.cc | 18 +++++++++++- test/box-luatest/shutdown_test.lua | 28 +++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 changelogs/unreleased/gh-9717-fix-crash-on-new-connection-drop.md diff --git a/changelogs/unreleased/gh-9717-fix-crash-on-new-connection-drop.md b/changelogs/unreleased/gh-9717-fix-crash-on-new-connection-drop.md new file mode 100644 index 000000000000..890e31db007f --- /dev/null +++ b/changelogs/unreleased/gh-9717-fix-crash-on-new-connection-drop.md @@ -0,0 +1,3 @@ +## bugfix/core + +* Fixed a crash on dropping a just accepted connection (gh-9717). diff --git a/src/box/iproto.cc b/src/box/iproto.cc index 9ab4b0d4b008..4cce543d9e7c 100644 --- a/src/box/iproto.cc +++ b/src/box/iproto.cc @@ -918,6 +918,8 @@ struct iproto_connection * connection. */ struct cmsg cancel_msg; + /** Set if connection is accepted in TX. */ + bool is_established; }; /** Returns a string suitable for logging. */ @@ -1663,6 +1665,7 @@ iproto_connection_new(struct iproto_thread *iproto_thread) con->session = NULL; con->is_in_replication = false; con->is_drop_pending = false; + con->is_established = false; rlist_create(&con->in_stop_list); rlist_create(&con->tx.inprogress); rlist_add_entry(&iproto_thread->connections, con, in_connections); @@ -3329,6 +3332,11 @@ net_send_greeting(struct cmsg *m) { struct iproto_msg *msg = (struct iproto_msg *) m; struct iproto_connection *con = msg->connection; + if (con->is_drop_pending) { + iproto_connection_close(con); + iproto_msg_delete(msg); + return; + } if (msg->close_connection) { struct obuf *out = msg->wpos.obuf; int64_t nwr = iostream_writev(&con->io, out->iov, @@ -3345,6 +3353,7 @@ net_send_greeting(struct cmsg *m) iproto_msg_delete(msg); return; } + con->is_established = true; con->wend = msg->wpos; /* * Connect is synchronous, so no one could have been @@ -3988,9 +3997,16 @@ iproto_do_cfg_f(struct cbus_call_msg *m) * cannot close them as usual. Anyway we cancel * replication fibers as well and close connection * after replication is breaked. + * + * Do not close connection that is not yet + * established. Otherwise session + * on_connect/on_disconnect callbacks may be + * executed in reverse order in case of yields + * in on_connect callbacks. */ if (!con->is_in_replication && - con->state == IPROTO_CONNECTION_ALIVE) + con->state == IPROTO_CONNECTION_ALIVE && + con->is_established) iproto_connection_close(con); /* * Do not wait deletion of connection that called diff --git a/test/box-luatest/shutdown_test.lua b/test/box-luatest/shutdown_test.lua index be2402f0311e..fd02c1a289aa 100644 --- a/test/box-luatest/shutdown_test.lua +++ b/test/box-luatest/shutdown_test.lua @@ -1,5 +1,6 @@ local server = require('luatest.server') local utils = require('luatest.utils') +local justrun = require('test.justrun') local fio = require('fio') local popen = require('popen') local fiber = require('fiber') @@ -74,6 +75,33 @@ g_crash.test_shutdown_during_snapshot_on_signal = function(cg) t.assert_equals(status.exit_code, 0) end +-- Test shutdown when new iproto connection is accepted but +-- not yet fully established. +g_crash.test_shutdown_during_new_connection = function(cg) + local script = [[ + local net = require('net.box') + local fiber = require('fiber') + + local sock = 'unix/:./iproto.sock' + box.cfg{listen = sock} + local cond = fiber.cond() + local in_trigger = false + box.session.on_connect(function() + in_trigger = true + cond:signal() + fiber.sleep(1) + end) + net.connect(sock, {wait_connected = false}) + while not in_trigger do + cond:wait() + end + os.exit() + ]] + local result = justrun.tarantool(cg.workdir, {}, {'-e', script}, + {nojson = true, quote_args = true}) + t.assert_equals(result.exit_code, 0) +end + local g = t.group() g.before_each(function(cg)