Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 206 additions & 114 deletions tests/integrations/openai_agents/test_openai_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import os
import json
import logging
import httpx

import sentry_sdk
from sentry_sdk import start_span
from sentry_sdk.consts import SPANDATA
from sentry_sdk.consts import SPANDATA, OP
from sentry_sdk.integrations.logging import LoggingIntegration
from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
Expand Down Expand Up @@ -314,6 +315,25 @@ def test_agent_custom_model():
)


@pytest.fixture
def get_model_response():
def inner(response_content):
model_request = httpx.Request(
"POST",
"/responses",
)

response = httpx.Response(
200,
request=model_request,
content=json.dumps(response_content.model_dump()).encode("utf-8"),
)

return response

return inner


@pytest.mark.asyncio
async def test_agent_invocation_span_no_pii(
sentry_init, capture_events, test_agent, mock_model_response
Expand Down Expand Up @@ -917,85 +937,120 @@ def test_agent_invocation_span_sync(


@pytest.mark.asyncio
async def test_handoff_span(sentry_init, capture_events, mock_usage):
async def test_handoff_span(sentry_init, capture_events, get_model_response):
"""
Test that handoff spans are created when agents hand off to other agents.
"""
client = AsyncOpenAI(api_key="test-key")
model = OpenAIResponsesModel(model="gpt-4-mini", openai_client=client)

# Create two simple agents with a handoff relationship
secondary_agent = agents.Agent(
name="secondary_agent",
instructions="You are a secondary agent.",
model="gpt-4o-mini",
model=model,
)

primary_agent = agents.Agent(
name="primary_agent",
instructions="You are a primary agent that hands off to secondary agent.",
model="gpt-4o-mini",
model=model,
handoffs=[secondary_agent],
)

with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
# Mock two responses:
# 1. Primary agent calls handoff tool
# 2. Secondary agent provides final response
handoff_response = ModelResponse(
output=[
ResponseFunctionToolCall(
id="call_handoff_123",
call_id="call_handoff_123",
name="transfer_to_secondary_agent",
type="function_call",
arguments="{}",
)
],
usage=mock_usage,
response_id="resp_handoff_123",
)

final_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="I'm the specialist and I can help with that!",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=mock_usage,
response_id="resp_final_123",
)
handoff_response = get_model_response(
Response(
id="resp_tool_123",
output=[
ResponseFunctionToolCall(
id="call_handoff_123",
call_id="call_handoff_123",
name="transfer_to_secondary_agent",
type="function_call",
arguments="{}",
)
],
parallel_tool_calls=False,
tool_choice="none",
tools=[],
created_at=10000000,
model="gpt-4",
object="response",
usage=ResponseUsage(
input_tokens=10,
input_tokens_details=InputTokensDetails(
cached_tokens=0,
),
output_tokens=20,
output_tokens_details=OutputTokensDetails(
reasoning_tokens=5,
),
total_tokens=30,
),
)
)

mock_get_response.side_effect = [handoff_response, final_response]
final_response = get_model_response(
Response(
id="resp_final_123",
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="I'm the specialist and I can help with that!",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
parallel_tool_calls=False,
tool_choice="none",
tools=[],
created_at=10000000,
model="gpt-4",
object="response",
usage=ResponseUsage(
input_tokens=10,
input_tokens_details=InputTokensDetails(
cached_tokens=0,
),
output_tokens=20,
output_tokens_details=OutputTokensDetails(
reasoning_tokens=5,
),
total_tokens=30,
),
)
)

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
)
with patch.object(
primary_agent.model._client._client,
"send",
side_effect=[handoff_response, final_response],
) as _:
sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
)

events = capture_events()
events = capture_events()

result = await agents.Runner.run(
primary_agent,
"Please hand off to secondary agent",
run_config=test_run_config,
)
result = await agents.Runner.run(
primary_agent,
"Please hand off to secondary agent",
run_config=test_run_config,
)

assert result is not None
assert result is not None

(transaction,) = events
spans = transaction["spans"]
handoff_span = spans[2]
handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)

# Verify handoff span was created
assert handoff_span is not None
Expand All @@ -1006,85 +1061,122 @@ async def test_handoff_span(sentry_init, capture_events, mock_usage):


@pytest.mark.asyncio
async def test_max_turns_before_handoff_span(sentry_init, capture_events, mock_usage):
async def test_max_turns_before_handoff_span(
sentry_init, capture_events, get_model_response
):
"""
Example raising agents.exceptions.AgentsException after the agent invocation span is complete.
"""
client = AsyncOpenAI(api_key="test-key")
model = OpenAIResponsesModel(model="gpt-4-mini", openai_client=client)

# Create two simple agents with a handoff relationship
secondary_agent = agents.Agent(
name="secondary_agent",
instructions="You are a secondary agent.",
model="gpt-4o-mini",
model=model,
)

primary_agent = agents.Agent(
name="primary_agent",
instructions="You are a primary agent that hands off to secondary agent.",
model="gpt-4o-mini",
model=model,
handoffs=[secondary_agent],
)

with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
# Mock two responses:
# 1. Primary agent calls handoff tool
# 2. Secondary agent provides final response
handoff_response = ModelResponse(
output=[
ResponseFunctionToolCall(
id="call_handoff_123",
call_id="call_handoff_123",
name="transfer_to_secondary_agent",
type="function_call",
arguments="{}",
)
],
usage=mock_usage,
response_id="resp_handoff_123",
)

final_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="I'm the specialist and I can help with that!",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=mock_usage,
response_id="resp_final_123",
)
handoff_response = get_model_response(
Response(
id="resp_tool_123",
output=[
ResponseFunctionToolCall(
id="call_handoff_123",
call_id="call_handoff_123",
name="transfer_to_secondary_agent",
type="function_call",
arguments="{}",
)
],
parallel_tool_calls=False,
tool_choice="none",
tools=[],
created_at=10000000,
model="gpt-4",
object="response",
usage=ResponseUsage(
input_tokens=10,
input_tokens_details=InputTokensDetails(
cached_tokens=0,
),
output_tokens=20,
output_tokens_details=OutputTokensDetails(
reasoning_tokens=5,
),
total_tokens=30,
),
)
)

mock_get_response.side_effect = [handoff_response, final_response]
final_response = get_model_response(
Response(
id="resp_final_123",
output=[
ResponseOutputMessage(
id="msg_final",
type="message",
status="completed",
content=[
ResponseOutputText(
text="I'm the specialist and I can help with that!",
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
parallel_tool_calls=False,
tool_choice="none",
tools=[],
created_at=10000000,
model="gpt-4",
object="response",
usage=ResponseUsage(
input_tokens=10,
input_tokens_details=InputTokensDetails(
cached_tokens=0,
),
output_tokens=20,
output_tokens_details=OutputTokensDetails(
reasoning_tokens=5,
),
total_tokens=30,
),
)
)

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
)
with patch.object(
primary_agent.model._client._client,
"send",
side_effect=[handoff_response, final_response],
) as _:
sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
)

events = capture_events()
events = capture_events()

with pytest.raises(MaxTurnsExceeded):
await agents.Runner.run(
primary_agent,
"Please hand off to secondary agent",
run_config=test_run_config,
max_turns=1,
)
with pytest.raises(MaxTurnsExceeded):
await agents.Runner.run(
primary_agent,
"Please hand off to secondary agent",
run_config=test_run_config,
max_turns=1,
)

(error, transaction) = events
spans = transaction["spans"]
handoff_span = spans[2]
handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)

# Verify handoff span was created
assert handoff_span is not None
Expand Down
Loading