From 6a9a57e9004185b3aa0a937024d72012311237bd Mon Sep 17 00:00:00 2001 From: g97iulio1609 Date: Sat, 28 Feb 2026 19:28:35 +0100 Subject: [PATCH 1/3] fix(mcp): add timeout to background thread join in stop() to prevent hangs When an Agent holding an MCPClient goes out of scope inside a function, the Agent.__del__ finalizer calls MCPClient.stop() which calls _background_thread.join(). If the background thread cannot exit promptly (e.g. transport subprocess teardown is slow), join() blocks indefinitely, causing the entire process to hang on exit. Add a timeout (equal to startup_timeout, default 30s) to the join() call. If the thread does not exit within the timeout, log a warning and proceed with cleanup. The thread is already a daemon thread (daemon=True), so it will be cleaned up by the interpreter on process exit. Fixes #1732 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/strands/tools/mcp/mcp_client.py | 7 ++++- tests/strands/tools/mcp/test_mcp_client.py | 32 +++++++++++++++++++--- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/strands/tools/mcp/mcp_client.py b/src/strands/tools/mcp/mcp_client.py index f064f7def..66fc230a2 100644 --- a/src/strands/tools/mcp/mcp_client.py +++ b/src/strands/tools/mcp/mcp_client.py @@ -353,7 +353,12 @@ async def _set_close_event() -> None: asyncio.run_coroutine_threadsafe(coro=_set_close_event(), loop=self._background_thread_event_loop) self._log_debug_with_thread("waiting for background thread to join") - self._background_thread.join() + self._background_thread.join(timeout=self._startup_timeout) + if self._background_thread.is_alive(): + logger.warning( + "background thread did not exit within %d seconds, continuing cleanup", + self._startup_timeout, + ) if self._background_thread_event_loop is not None: self._background_thread_event_loop.close() diff --git a/tests/strands/tools/mcp/test_mcp_client.py b/tests/strands/tools/mcp/test_mcp_client.py index e477c64d5..5e86ff7f0 100644 --- a/tests/strands/tools/mcp/test_mcp_client.py +++ b/tests/strands/tools/mcp/test_mcp_client.py @@ -495,14 +495,15 @@ def test_stop_with_background_thread_but_no_event_loop(): # Mock a background thread without event loop mock_thread = MagicMock() mock_thread.join = MagicMock() + mock_thread.is_alive = MagicMock(return_value=False) client._background_thread = mock_thread client._background_thread_event_loop = None # Should not raise any exceptions and should join the thread client.stop(None, None, None) - # Verify thread was joined - mock_thread.join.assert_called_once() + # Verify thread was joined with timeout + mock_thread.join.assert_called_once_with(timeout=client._startup_timeout) # Verify cleanup occurred assert client._background_thread is None @@ -515,6 +516,7 @@ def test_stop_closes_event_loop(): # Mock a background thread with event loop mock_thread = MagicMock() mock_thread.join = MagicMock() + mock_thread.is_alive = MagicMock(return_value=False) mock_event_loop = MagicMock() mock_event_loop.close = MagicMock() @@ -524,8 +526,8 @@ def test_stop_closes_event_loop(): # Should close the event loop and join the thread client.stop(None, None, None) - # Verify thread was joined - mock_thread.join.assert_called_once() + # Verify thread was joined with timeout + mock_thread.join.assert_called_once_with(timeout=client._startup_timeout) # Verify event loop was closed mock_event_loop.close.assert_called_once() @@ -535,6 +537,28 @@ def test_stop_closes_event_loop(): assert client._background_thread_event_loop is None +def test_stop_does_not_hang_when_join_times_out(): + """Test that stop() completes even if the background thread doesn't exit in time.""" + client = MCPClient(MagicMock()) + + mock_thread = MagicMock() + # Simulate thread still alive after join (timed out) + mock_thread.is_alive = MagicMock(return_value=True) + mock_event_loop = MagicMock() + mock_event_loop.close = MagicMock() + + client._background_thread = mock_thread + client._background_thread_event_loop = mock_event_loop + + client.stop(None, None, None) + + # join should have been called with timeout + mock_thread.join.assert_called_once_with(timeout=client._startup_timeout) + # Cleanup should still proceed even though thread is alive + assert client._background_thread is None + assert client._background_thread_event_loop is None + + def test_mcp_client_state_reset_after_timeout(): """Test that all client state is properly reset after timeout.""" From 6605874712e7714174f17229d85cd04a1fedbb47 Mon Sep 17 00:00:00 2001 From: g97iulio1609 Date: Sun, 1 Mar 2026 04:36:03 +0100 Subject: [PATCH 2/3] fix: skip event loop close when background thread still alive after timeout When join() times out and the thread is still alive, return early without closing the event loop or resetting internal state. This prevents RuntimeError from closing a running loop and avoids allowing a second start() while teardown is still in progress. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/strands/tools/mcp/mcp_client.py | 4 +++- tests/strands/tools/mcp/test_mcp_client.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/strands/tools/mcp/mcp_client.py b/src/strands/tools/mcp/mcp_client.py index 66fc230a2..02de4b998 100644 --- a/src/strands/tools/mcp/mcp_client.py +++ b/src/strands/tools/mcp/mcp_client.py @@ -356,9 +356,11 @@ async def _set_close_event() -> None: self._background_thread.join(timeout=self._startup_timeout) if self._background_thread.is_alive(): logger.warning( - "background thread did not exit within %d seconds, continuing cleanup", + "background thread did not exit within %d seconds; " + "skipping event loop close and state reset", self._startup_timeout, ) + return if self._background_thread_event_loop is not None: self._background_thread_event_loop.close() diff --git a/tests/strands/tools/mcp/test_mcp_client.py b/tests/strands/tools/mcp/test_mcp_client.py index 5e86ff7f0..c0b1345e4 100644 --- a/tests/strands/tools/mcp/test_mcp_client.py +++ b/tests/strands/tools/mcp/test_mcp_client.py @@ -554,9 +554,10 @@ def test_stop_does_not_hang_when_join_times_out(): # join should have been called with timeout mock_thread.join.assert_called_once_with(timeout=client._startup_timeout) - # Cleanup should still proceed even though thread is alive - assert client._background_thread is None - assert client._background_thread_event_loop is None + # When thread is still alive, stop() returns early without closing loop or resetting state + mock_event_loop.close.assert_not_called() + assert client._background_thread is mock_thread + assert client._background_thread_event_loop is mock_event_loop def test_mcp_client_state_reset_after_timeout(): From 0e13d2e43bb4693a9e628a4d5898e5009223f4d6 Mon Sep 17 00:00:00 2001 From: giulio-leone Date: Sun, 1 Mar 2026 07:33:27 +0100 Subject: [PATCH 3/3] test: clarify docstring for join timeout test Expand the docstring to explicitly describe the expected behavior: skip event loop close and state reset when thread is still alive. --- tests/strands/tools/mcp/test_mcp_client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/strands/tools/mcp/test_mcp_client.py b/tests/strands/tools/mcp/test_mcp_client.py index c0b1345e4..d1a3f46d1 100644 --- a/tests/strands/tools/mcp/test_mcp_client.py +++ b/tests/strands/tools/mcp/test_mcp_client.py @@ -538,7 +538,11 @@ def test_stop_closes_event_loop(): def test_stop_does_not_hang_when_join_times_out(): - """Test that stop() completes even if the background thread doesn't exit in time.""" + """Test that stop() returns early when the background thread doesn't exit within the timeout. + + When join() times out (is_alive() returns True), stop() must skip event loop + close and state reset to avoid interacting with the still-running thread. + """ client = MCPClient(MagicMock()) mock_thread = MagicMock()