From 3a4a0853247ffe77cdfb9a6dba16fddcaef33570 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 14:28:40 +0100
Subject: [PATCH 1/2] test(openai-agents): Replace mocks with httpx in MCP tool
 tests

---
 .../openai_agents/test_openai_agents.py       | 479 +++++++++++-------
 1 file changed, 290 insertions(+), 189 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 1390455317..17f98f8d71 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -5,6 +5,7 @@
 import os
 import json
 import logging
+import httpx
 
 import sentry_sdk
 from sentry_sdk import start_span
@@ -314,6 +315,25 @@ def test_agent_custom_model():
     )
 
 
+@pytest.fixture
+def get_model_response():
+    def inner(response_content):
+        model_request = httpx.Request(
+            "POST",
+            "/responses",
+        )
+
+        response = httpx.Response(
+            200,
+            request=model_request,
+            content=json.dumps(response_content.model_dump()).encode("utf-8"),
+        )
+
+        return response
+
+    return inner
+
+
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
     sentry_init, capture_events, test_agent, mock_model_response
@@ -1708,79 +1728,106 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
 
 
 @pytest.mark.asyncio
-async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent):
+async def test_mcp_tool_execution_spans(
+    sentry_init, capture_events, test_agent, get_model_response
+):
     """
     Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a McpCall object
-            mcp_call = McpCall(
-                id="mcp_call_123",
-                name="test_mcp_tool",
-                arguments='{"query": "search term"}',
-                output="MCP tool executed successfully",
-                error=None,
-                type="mcp_call",
-                server_label="test_server",
-            )
-
-            # Create a ModelResponse with an McpCall in the output
-            mcp_response = ModelResponse(
-                output=[mcp_call],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=10,
-                    output_tokens=5,
-                    total_tokens=15,
+    mcp_response = get_model_response(
+        Response(
+            id="resp_mcp_123",
+            output=[
+                McpCall(
+                    id="mcp_call_123",
+                    name="test_mcp_tool",
+                    arguments='{"query": "search term"}',
+                    output="MCP tool executed successfully",
+                    error=None,
+                    type="mcp_call",
+                    server_label="test_server",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_mcp_123",
-            )
-
-            # Final response after MCP tool execution
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="Task completed using MCP tool",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=15,
-                    output_tokens=10,
-                    total_tokens=25,
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final_123",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [mcp_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="Task completed using MCP tool",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=15,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
+                ),
+                total_tokens=25,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        side_effect=[mcp_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            await agents.Runner.run(
-                test_agent,
-                "Please use MCP tool",
-                run_config=test_run_config,
-            )
+        await agents.Runner.run(
+            test_agent,
+            "Please use MCP tool",
+            run_config=test_run_config,
+        )
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -1811,79 +1858,106 @@ async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent)
 
 
 @pytest.mark.asyncio
-async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent):
+async def test_mcp_tool_execution_with_error(
+    sentry_init, capture_events, test_agent, get_model_response
+):
     """
     Test that MCP tool calls with errors are tracked with error status.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a McpCall object with an error
-            mcp_call_with_error = McpCall(
-                id="mcp_call_error_123",
-                name="failing_mcp_tool",
-                arguments='{"query": "test"}',
-                output=None,
-                error="MCP tool execution failed",
-                type="mcp_call",
-                server_label="test_server",
-            )
-
-            # Create a ModelResponse with a failing McpCall
-            mcp_response = ModelResponse(
-                output=[mcp_call_with_error],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=10,
-                    output_tokens=5,
-                    total_tokens=15,
+    mcp_response = get_model_response(
+        Response(
+            id="resp_mcp_123",
+            output=[
+                McpCall(
+                    id="mcp_call_error_123",
+                    name="failing_mcp_tool",
+                    arguments='{"query": "test"}',
+                    output=None,
+                    error="MCP tool execution failed",
+                    type="mcp_call",
+                    server_label="test_server",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_mcp_error_123",
-            )
-
-            # Final response after error
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="The MCP tool encountered an error",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=15,
-                    output_tokens=10,
-                    total_tokens=25,
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final_error_123",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [mcp_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="Task completed using MCP tool",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=15,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
+                ),
+                total_tokens=25,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        side_effect=[mcp_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            await agents.Runner.run(
-                test_agent,
-                "Please use failing MCP tool",
-                run_config=test_run_config,
-            )
+        await agents.Runner.run(
+            test_agent,
+            "Please use failing MCP tool",
+            run_config=test_run_config,
+        )
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -1912,79 +1986,106 @@ async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_a
 
 
 @pytest.mark.asyncio
-async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent):
+async def test_mcp_tool_execution_without_pii(
+    sentry_init, capture_events, test_agent, get_model_response
+):
     """
     Test that MCP tool input/output are not included when send_default_pii is False.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a McpCall object
-            mcp_call = McpCall(
-                id="mcp_call_pii_123",
-                name="test_mcp_tool",
-                arguments='{"query": "sensitive data"}',
-                output="Result with sensitive info",
-                error=None,
-                type="mcp_call",
-                server_label="test_server",
-            )
-
-            # Create a ModelResponse with an McpCall
-            mcp_response = ModelResponse(
-                output=[mcp_call],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=10,
-                    output_tokens=5,
-                    total_tokens=15,
+    mcp_response = get_model_response(
+        Response(
+            id="resp_mcp_123",
+            output=[
+                McpCall(
+                    id="mcp_call_pii_123",
+                    name="test_mcp_tool",
+                    arguments='{"query": "sensitive data"}',
+                    output="Result with sensitive info",
+                    error=None,
+                    type="mcp_call",
+                    server_label="test_server",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_mcp_123",
-            )
-
-            # Final response
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="Task completed",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=15,
-                    output_tokens=10,
-                    total_tokens=25,
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final_123",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [mcp_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="Task completed",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=15,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=25,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,  # PII disabled
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        side_effect=[mcp_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=False,  # PII disabled
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            await agents.Runner.run(
-                test_agent,
-                "Please use MCP tool",
-                run_config=test_run_config,
-            )
+        await agents.Runner.run(
+            test_agent,
+            "Please use MCP tool",
+            run_config=test_run_config,
+        )
 
     (transaction,) = events
     spans = transaction["spans"]

From febf2fc7848a2101a1a777aa805a4ed40887dc5a Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 14:31:04 +0100
Subject: [PATCH 2/2] use correct agent

---
 tests/integrations/openai_agents/test_openai_agents.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 17f98f8d71..e651fcc66b 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1824,7 +1824,7 @@ async def test_mcp_tool_execution_spans(
         events = capture_events()
 
         await agents.Runner.run(
-            test_agent,
+            agent,
             "Please use MCP tool",
             run_config=test_run_config,
         )
@@ -1954,7 +1954,7 @@ async def test_mcp_tool_execution_with_error(
         events = capture_events()
 
         await agents.Runner.run(
-            test_agent,
+            agent,
             "Please use failing MCP tool",
             run_config=test_run_config,
         )
@@ -2082,7 +2082,7 @@ async def test_mcp_tool_execution_without_pii(
         events = capture_events()
 
         await agents.Runner.run(
-            test_agent,
+            agent,
             "Please use MCP tool",
             run_config=test_run_config,
         )