Fix stateless HTTP task accumulation causing memory leak (#756)

wiggzz · claude · wiggzz · commit 67d02d3ea292 · 2026-02-25T10:53:49.000-06:00
In stateless mode, each request spawned a `run_stateless_server` task into the manager's global `_task_group`. After `handle_request()` completed and `terminate()` was called, the task continued running inside `app.run()`, blocked on `async for message in session.incoming_messages`. These zombie tasks accumulated indefinitely, leaking memory. Replace the global task group spawn with a request-scoped task group so that server tasks are automatically cancelled when their request completes. Add a regression test that simulates the real blocking behavior of `app.run()` using `anyio.sleep_forever()` and verifies no tasks linger in the global task group after requests finish. Closes #756 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/src/mcp/server/streamable_http_manager.py b/src/mcp/server/streamable_http_manager.py
@@ -151,7 +151,12 @@ async def handle_request(self, scope: Scope, receive: Receive, send: Send) -> No
             await self._handle_stateful_request(scope, receive, send)
 
     async def _handle_stateless_request(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Process request in stateless mode - creating a new transport for each request."""
+        """Process request in stateless mode - creating a new transport for each request.
+
+        Uses a request-scoped task group so the server task is automatically
+        cancelled when the request completes, preventing task accumulation in
+        the manager's global task group.
+        """
         logger.debug("Stateless mode: Creating new transport for this request")
         # No session ID needed in stateless mode
         http_transport = StreamableHTTPServerTransport(
@@ -176,16 +181,23 @@ async def run_stateless_server(*, task_status: TaskStatus[None] = anyio.TASK_STA
                 except Exception:  # pragma: no cover
                     logger.exception("Stateless session crashed")
 
-        # Assert task group is not None for type checking
-        assert self._task_group is not None
-        # Start the server task
-        await self._task_group.start(run_stateless_server)
+        # Use a request-scoped task group instead of the global one.
+        # This ensures the server task is cancelled when the request
+        # finishes, preventing zombie tasks from accumulating.
+        # See: https://github.com/modelcontextprotocol/python-sdk/issues/756
+        async with anyio.create_task_group() as request_tg:
 
-        # Handle the HTTP request and return the response
-        await http_transport.handle_request(scope, receive, send)
+            async def run_request_handler(*, task_status: TaskStatus[None] = anyio.TASK_STATUS_IGNORED):
+                task_status.started()
+                # Handle the HTTP request and return the response
+                await http_transport.handle_request(scope, receive, send)
+                # Terminate the transport after the request is handled
+                await http_transport.terminate()
+                # Cancel the request-scoped task group to stop the server task
+                request_tg.cancel_scope.cancel()
 
-        # Terminate the transport after the request is handled
-        await http_transport.terminate()
+            await request_tg.start(run_stateless_server)
+            await request_tg.start(run_request_handler)
 
     async def _handle_stateful_request(self, scope: Scope, receive: Receive, send: Send) -> None:
         """Process request in stateful mode - maintaining session state between requests."""
diff --git a/tests/server/test_streamable_http_manager.py b/tests/server/test_streamable_http_manager.py
@@ -268,6 +268,97 @@ async def mock_receive():
             assert len(transport._request_streams) == 0, "Transport should have no active request streams"
 
 
+@pytest.mark.anyio
+async def test_stateless_requests_task_leak_on_client_disconnect():
+    """Test that stateless tasks don't leak when clients disconnect mid-request.
+
+    Regression test for https://github.com/modelcontextprotocol/python-sdk/issues/756
+
+    Reproduces the production memory leak: a client sends a tool call, the tool
+    handler takes some time, and the client disconnects before the response is
+    delivered. The SSE response pipeline detects the disconnect but app.run()
+    continues in the background. After the tool finishes, the response has
+    nowhere to go, and app.run() blocks on ``async for message in
+    session.incoming_messages`` forever — leaking the task in the global
+    task group.
+
+    The test uses real Server.run() with a real tool handler, real SSE streaming
+    via httpx.ASGITransport, and simulates client disconnect by cancelling the
+    request task.
+    """
+    from mcp.types import CallToolResult, TextContent, Tool
+
+    tool_started = anyio.Event()
+    tool_gate = anyio.Event()
+
+    async def handle_list_tools(
+        ctx: ServerRequestContext, params: PaginatedRequestParams | None
+    ) -> ListToolsResult:
+        return ListToolsResult(
+            tools=[Tool(name="slow_tool", description="A slow tool", inputSchema={"type": "object"})]
+        )
+
+    async def handle_call_tool(
+        ctx: ServerRequestContext, params: Any
+    ) -> CallToolResult:
+        tool_started.set()
+        # Simulate a slow tool (e.g., API call to Discovery/Snowflake)
+        await tool_gate.wait()
+        return CallToolResult(content=[TextContent(type="text", text="done")])
+
+    app = Server(
+        "test-stateless-leak",
+        on_list_tools=handle_list_tools,
+        on_call_tool=handle_call_tool,
+    )
+
+    host = "testserver"
+    mcp_app = app.streamable_http_app(host=host, stateless_http=True)
+
+    async with (
+        mcp_app.router.lifespan_context(mcp_app),
+        httpx.ASGITransport(mcp_app) as transport,
+    ):
+        session_manager = app._session_manager
+
+        async def make_and_abandon_tool_call():
+            async with httpx.AsyncClient(
+                transport=transport, base_url=f"http://{host}", timeout=30.0
+            ) as http_client:
+                async with Client(
+                    streamable_http_client(f"http://{host}/mcp", http_client=http_client)
+                ) as client:
+                    # Start tool call — this will block until tool completes
+                    # We'll cancel it from outside to simulate disconnect
+                    await client.call_tool("slow_tool", {})
+
+        num_requests = 3
+        for _ in range(num_requests):
+            async with anyio.create_task_group() as tg:
+                tg.start_soon(make_and_abandon_tool_call)
+                # Wait for the tool handler to actually start
+                await tool_started.wait()
+                tool_started = anyio.Event()  # Reset for next iteration
+                # Simulate client disconnect by cancelling the request
+                tg.cancel_scope.cancel()
+
+            # Let the tool finish now (response has nowhere to go)
+            tool_gate.set()
+            tool_gate = anyio.Event()  # Reset for next iteration
+
+            # Give tasks a chance to settle
+            await anyio.sleep(0.1)
+
+        # Check for leaked tasks in the session manager's global task group
+        await anyio.sleep(0.1)
+        leaked = len(session_manager._task_group._tasks)
+
+    assert leaked == 0, (
+        f"Expected 0 lingering tasks but found {leaked}. "
+        f"Stateless request tasks are leaking after client disconnect."
+    )
+
+
 @pytest.mark.anyio
 async def test_unknown_session_id_returns_404():
     """Test that requests with unknown session IDs return HTTP 404 per MCP spec."""