From 3a4a0853247ffe77cdfb9a6dba16fddcaef33570 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 14:28:40 +0100 Subject: [PATCH 1/2] test(openai-agents): Replace mocks with httpx in MCP tool tests --- .../openai_agents/test_openai_agents.py | 479 +++++++++++------- 1 file changed, 290 insertions(+), 189 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 1390455317..17f98f8d71 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -5,6 +5,7 @@ import os import json import logging +import httpx import sentry_sdk from sentry_sdk import start_span @@ -314,6 +315,25 @@ def test_agent_custom_model(): ) +@pytest.fixture +def get_model_response(): + def inner(response_content): + model_request = httpx.Request( + "POST", + "/responses", + ) + + response = httpx.Response( + 200, + request=model_request, + content=json.dumps(response_content.model_dump()).encode("utf-8"), + ) + + return response + + return inner + + @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( sentry_init, capture_events, test_agent, mock_model_response @@ -1708,79 +1728,106 @@ async def test_span_status_error(sentry_init, capture_events, test_agent): @pytest.mark.asyncio -async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent): +async def test_mcp_tool_execution_spans( + sentry_init, capture_events, test_agent, get_model_response +): """ Test that MCP (Model Context Protocol) tool calls create execute_tool spans. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a McpCall object - mcp_call = McpCall( - id="mcp_call_123", - name="test_mcp_tool", - arguments='{"query": "search term"}', - output="MCP tool executed successfully", - error=None, - type="mcp_call", - server_label="test_server", - ) - - # Create a ModelResponse with an McpCall in the output - mcp_response = ModelResponse( - output=[mcp_call], - usage=Usage( - requests=1, - input_tokens=10, - output_tokens=5, - total_tokens=15, + mcp_response = get_model_response( + Response( + id="resp_mcp_123", + output=[ + McpCall( + id="mcp_call_123", + name="test_mcp_tool", + arguments='{"query": "search term"}', + output="MCP tool executed successfully", + error=None, + type="mcp_call", + server_label="test_server", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, ), - response_id="resp_mcp_123", - ) - - # Final response after MCP tool execution - final_response = ModelResponse( - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed using MCP tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - usage=Usage( - requests=1, - input_tokens=15, - output_tokens=10, - total_tokens=25, + output_tokens=5, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, ), - response_id="resp_final_123", - ) + total_tokens=15, + ), + ) + ) - mock_get_response.side_effect = [mcp_response, final_response] + final_response = get_model_response( + Response( + id="resp_final_123", + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed using MCP tool", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=25, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - await agents.Runner.run( - test_agent, - "Please use MCP tool", - run_config=test_run_config, - ) + await agents.Runner.run( + test_agent, + "Please use MCP tool", + run_config=test_run_config, + ) (transaction,) = events spans = transaction["spans"] @@ -1811,79 +1858,106 @@ async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent) @pytest.mark.asyncio -async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent): +async def test_mcp_tool_execution_with_error( + sentry_init, capture_events, test_agent, get_model_response +): """ Test that MCP tool calls with errors are tracked with error status. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a McpCall object with an error - mcp_call_with_error = McpCall( - id="mcp_call_error_123", - name="failing_mcp_tool", - arguments='{"query": "test"}', - output=None, - error="MCP tool execution failed", - type="mcp_call", - server_label="test_server", - ) - - # Create a ModelResponse with a failing McpCall - mcp_response = ModelResponse( - output=[mcp_call_with_error], - usage=Usage( - requests=1, - input_tokens=10, - output_tokens=5, - total_tokens=15, + mcp_response = get_model_response( + Response( + id="resp_mcp_123", + output=[ + McpCall( + id="mcp_call_error_123", + name="failing_mcp_tool", + arguments='{"query": "test"}', + output=None, + error="MCP tool execution failed", + type="mcp_call", + server_label="test_server", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, ), - response_id="resp_mcp_error_123", - ) - - # Final response after error - final_response = ModelResponse( - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="The MCP tool encountered an error", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - usage=Usage( - requests=1, - input_tokens=15, - output_tokens=10, - total_tokens=25, + output_tokens=5, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, ), - response_id="resp_final_error_123", - ) + total_tokens=15, + ), + ) + ) - mock_get_response.side_effect = [mcp_response, final_response] + final_response = get_model_response( + Response( + id="resp_final_123", + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed using MCP tool", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=25, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - ) + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) - events = capture_events() + events = capture_events() - await agents.Runner.run( - test_agent, - "Please use failing MCP tool", - run_config=test_run_config, - ) + await agents.Runner.run( + test_agent, + "Please use failing MCP tool", + run_config=test_run_config, + ) (transaction,) = events spans = transaction["spans"] @@ -1912,79 +1986,106 @@ async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_a @pytest.mark.asyncio -async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent): +async def test_mcp_tool_execution_without_pii( + sentry_init, capture_events, test_agent, get_model_response +): """ Test that MCP tool input/output are not included when send_default_pii is False. """ + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent = test_agent.clone(model=model) - with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): - with patch( - "agents.models.openai_responses.OpenAIResponsesModel.get_response" - ) as mock_get_response: - # Create a McpCall object - mcp_call = McpCall( - id="mcp_call_pii_123", - name="test_mcp_tool", - arguments='{"query": "sensitive data"}', - output="Result with sensitive info", - error=None, - type="mcp_call", - server_label="test_server", - ) - - # Create a ModelResponse with an McpCall - mcp_response = ModelResponse( - output=[mcp_call], - usage=Usage( - requests=1, - input_tokens=10, - output_tokens=5, - total_tokens=15, + mcp_response = get_model_response( + Response( + id="resp_mcp_123", + output=[ + McpCall( + id="mcp_call_pii_123", + name="test_mcp_tool", + arguments='{"query": "sensitive data"}', + output="Result with sensitive info", + error=None, + type="mcp_call", + server_label="test_server", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, ), - response_id="resp_mcp_123", - ) - - # Final response - final_response = ModelResponse( - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - usage=Usage( - requests=1, - input_tokens=15, - output_tokens=10, - total_tokens=25, + output_tokens=5, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, ), - response_id="resp_final_123", - ) + total_tokens=15, + ), + ) + ) - mock_get_response.side_effect = [mcp_response, final_response] + final_response = get_model_response( + Response( + id="resp_final_123", + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4.1-2025-04-14", + object="response", + usage=ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=25, + ), + ) + ) - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, # PII disabled - ) + with patch.object( + agent.model._client._client, + "send", + side_effect=[mcp_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + ) - events = capture_events() + events = capture_events() - await agents.Runner.run( - test_agent, - "Please use MCP tool", - run_config=test_run_config, - ) + await agents.Runner.run( + test_agent, + "Please use MCP tool", + run_config=test_run_config, + ) (transaction,) = events spans = transaction["spans"] From febf2fc7848a2101a1a777aa805a4ed40887dc5a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 6 Mar 2026 14:31:04 +0100 Subject: [PATCH 2/2] use correct agent --- tests/integrations/openai_agents/test_openai_agents.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 17f98f8d71..e651fcc66b 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1824,7 +1824,7 @@ async def test_mcp_tool_execution_spans( events = capture_events() await agents.Runner.run( - test_agent, + agent, "Please use MCP tool", run_config=test_run_config, ) @@ -1954,7 +1954,7 @@ async def test_mcp_tool_execution_with_error( events = capture_events() await agents.Runner.run( - test_agent, + agent, "Please use failing MCP tool", run_config=test_run_config, ) @@ -2082,7 +2082,7 @@ async def test_mcp_tool_execution_without_pii( events = capture_events() await agents.Runner.run( - test_agent, + agent, "Please use MCP tool", run_config=test_run_config, )