diff --git a/openai_agents/otel_tracing/README.md b/openai_agents/otel_tracing/README.md new file mode 100644 index 00000000..701d0e99 --- /dev/null +++ b/openai_agents/otel_tracing/README.md @@ -0,0 +1,260 @@ +# OpenTelemetry (OTEL) Tracing + +Examples demonstrating OpenTelemetry tracing integration for OpenAI Agents workflows. + +*For background on OpenTelemetry integration, see the [SDK documentation](https://github.com/temporalio/sdk-python/blob/main/temporalio/contrib/openai_agents/README.md#opentelemetry-integration).* + +This example shows three progressive patterns: +1. **Basic**: Pure automatic instrumentation - plugin handles everything +2. **Custom Spans**: Automatic instrumentation + `custom_span()` for logical grouping +3. **Direct API**: `custom_span()` + direct OpenTelemetry API for detailed instrumentation + +## Prerequisites + +You need an OTEL-compatible backend running locally. For quick setup with Grafana Tempo: + +```bash +git clone https://github.com/grafana/tempo.git +cd tempo/example/docker-compose/local +mkdir tempo-data/ +docker compose up -d +``` + +View traces at: http://localhost:3000/explore + +Alternatively, use Jaeger at http://localhost:16686/ + +## Running the Examples + +First, start the worker: +```bash +uv run openai_agents/otel_tracing/run_worker.py +``` + +Then run examples in separate terminals: + +### 1. Basic Example - Pure Automatic Instrumentation +Shows automatic tracing without any manual code: +```bash +uv run openai_agents/otel_tracing/run_otel_basic_workflow.py +``` + +### 2. Custom Spans Example - Logical Grouping +Shows using `custom_span()` to group related operations: +```bash +uv run openai_agents/otel_tracing/run_otel_custom_spans_workflow.py +``` + +### 3. Direct API Example - Detailed Custom Instrumentation +Shows using direct OpenTelemetry API for fine-grained custom instrumentation: +```bash +uv run openai_agents/otel_tracing/run_otel_direct_api_workflow.py +``` + +## Example Progression + +The three examples show increasing levels of instrumentation: + +| Example | Manual Code | Use Case | +|---------|-------------|----------| +| **1. Basic** | None | Just want automatic tracing | +| **2. Custom Spans** | `custom_span()` | Group related operations logically | +| **3. Direct API** | `custom_span()` + OTEL tracer | Add detailed spans with custom attributes | + +## What Gets Traced + +The integration automatically creates spans for: +- Agent execution +- Model invocations (as Temporal activities) +- Tool/activity calls +- Workflow lifecycle events (optional) + +You can add custom instrumentation using three patterns: +1. **Pure Automatic** (example 1): No code needed - plugin handles everything +2. **Custom Spans** (example 2): `trace()` + `custom_span()` from Agents SDK for logical grouping +3. **Direct OTEL API** (example 3): `trace()` + `custom_span()` + OTEL tracer for detailed spans with attributes + +**Key Rule**: Never use `trace()` in client code. Only use it inside workflows when you need `custom_span()` (patterns 2 and 3). + +## Key Configuration + +### Plugin Setup (Worker & Client) + +```python +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, ModelActivityParameters + +exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) + +client = await Client.connect( + "localhost:7233", + plugins=[ + OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(seconds=60) + ), + otel_exporters=[exporter], # Enable OTEL export + add_temporal_spans=False, # Optional: exclude Temporal internal spans + ), + ], +) +``` + +### Exporters + +Common OTEL exporters: +- `OTLPSpanExporter` - For Grafana Tempo, Jaeger, and most OTEL backends +- `ConsoleSpanExporter` - For debugging (prints to console) +- Multiple exporters can be used simultaneously + +### Environment Variables + +Optionally set the service name: +```bash +export OTEL_SERVICE_NAME=my-agent-service +``` + +## Understanding Trace Context Patterns + +The integration supports three patterns depending on your instrumentation needs: + +### Pattern 1: Pure Automatic Instrumentation (Basic Example) +No manual code - plugin creates root trace automatically: + +```python +@workflow.defn +class MyWorkflow: + @workflow.run + async def run(self): + # No trace(), no custom_span() needed + # Plugin automatically creates root trace and all spans + result = await Runner.run(agent, input=question) + return result +``` + +### Pattern 2: Logical Grouping with Custom Spans (Custom Spans Example) +Use `trace()` in workflow + `custom_span()` for logical grouping: + +```python +from agents import trace, custom_span + +@workflow.defn +class MyWorkflow: + @workflow.run + async def run(self): + # trace() in workflow establishes context for custom_span() + with trace("My workflow"): + with custom_span("Multi-city check"): + # Group related operations + for city in cities: + result = await Runner.run(agent, input=f"Check {city}") + return result +``` + +**IMPORTANT**: When using `custom_span()`, you must wrap it with `trace()` in the workflow. Never use `trace()` in client code - only in workflows. + +### Pattern 3: Direct OTEL API (Direct API Example) +Use `trace()` + `custom_span()` wrapper + direct OpenTelemetry API for detailed instrumentation: + +```python +from agents import trace, custom_span +import opentelemetry.trace + +@workflow.defn +class MyWorkflow: + @workflow.run + async def run(self): + # trace() establishes root context, custom_span() bridges to OTEL + with trace("My workflow"): + with custom_span("My workflow logic"): + tracer = opentelemetry.trace.get_tracer(__name__) + + with tracer.start_as_current_span("Data processing") as span: + span.set_attribute("my.attribute", "value") + data = await self.process_data() + + with tracer.start_as_current_span("Business logic") as span: + result = await self.execute_business_logic(data) + return result +``` + +**Why both are required**: When using `custom_span()`, you must wrap it with `trace()` in the workflow. The `custom_span()` then bridges to OpenTelemetry's context system for direct API calls. + +### Worker Configuration for Direct OTEL API + +When using direct OTEL API (Pattern 3), configure sandbox passthrough: + +```python +from temporalio.worker import Worker +from temporalio.worker.workflow_sandbox import SandboxedWorkflowRunner, SandboxRestrictions + +worker = Worker( + client, + task_queue="my-queue", + workflows=[MyWorkflow], + # Required ONLY for Pattern 3 (direct OTEL API usage) + workflow_runner=SandboxedWorkflowRunner( + SandboxRestrictions.default.with_passthrough_modules("opentelemetry") + ), +) +``` + +**Note**: Patterns 1 and 2 (automatic and custom_span only) don't require sandbox configuration. + +## Troubleshooting + +### Multiple separate traces instead of one unified trace + +**For Custom Spans (Pattern 2)**: Ensure you wrap `custom_span()` with `trace()` in the workflow: +```python +from agents import trace, custom_span + +@workflow.defn +class MyWorkflow: + @workflow.run + async def run(self): + # ✅ CORRECT - trace() wraps custom_span() + with trace("My workflow"): + with custom_span("My grouping"): + # Related operations + pass +``` + +**For Direct OTEL API (Pattern 3)**: Ensure workflow wraps all direct OTEL calls in `custom_span()`: +```python +from agents import custom_span +import opentelemetry.trace + +@workflow.defn +class MyWorkflow: + @workflow.run + async def run(self): + # ✅ CORRECT - All direct OTEL spans inside custom_span() + with custom_span("My workflow"): + tracer = opentelemetry.trace.get_tracer(__name__) + with tracer.start_as_current_span("span1"): + pass + with tracer.start_as_current_span("span2"): + pass +``` + +**NEVER use `trace()` in client code** - this creates disconnected traces. Only use `trace()` inside workflows. + +### Spans not appearing in backend +- Verify OTLP endpoint is accessible: `http://localhost:4317` +- Check backend is running: `docker compose ps` +- Ensure workflow completes (spans only export on completion) + +### Direct OTEL spans are orphaned +- **For Pattern 2 (custom_span)**: Ensure you use `trace()` wrapper in workflow +- **For Pattern 3 (direct OTEL)**: Verify workflow wraps ALL direct OTEL calls in `custom_span()` +- Check sandbox passthrough is configured for `opentelemetry` module (Pattern 3 only) + +## Dependencies + +Required packages (already in `openai-agents` dependency group): +```toml +temporalio[openai-agents,opentelemetry] +openinference-instrumentation-openai-agents +opentelemetry-exporter-otlp-proto-grpc +``` diff --git a/openai_agents/otel_tracing/__init__.py b/openai_agents/otel_tracing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openai_agents/otel_tracing/run_otel_basic_workflow.py b/openai_agents/otel_tracing/run_otel_basic_workflow.py new file mode 100644 index 00000000..27480b3a --- /dev/null +++ b/openai_agents/otel_tracing/run_otel_basic_workflow.py @@ -0,0 +1,39 @@ +import asyncio +import uuid +from datetime import timedelta + +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from temporalio.client import Client +from temporalio.contrib.openai_agents import ModelActivityParameters, OpenAIAgentsPlugin + +from openai_agents.otel_tracing.workflows.otel_basic_workflow import OtelBasicWorkflow + + +async def main(): + exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) + + client = await Client.connect( + "localhost:7233", + plugins=[ + OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(seconds=60) + ), + otel_exporters=[exporter], + add_temporal_spans=False, + ), + ], + ) + + result = await client.execute_workflow( + OtelBasicWorkflow.run, + "What's the weather like in Tokyo?", + id=f"otel-basic-workflow-{uuid.uuid4()}", + task_queue="otel-task-queue", + ) + + print(f"Result: {result}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/openai_agents/otel_tracing/run_otel_custom_spans_workflow.py b/openai_agents/otel_tracing/run_otel_custom_spans_workflow.py new file mode 100644 index 00000000..7ece84f4 --- /dev/null +++ b/openai_agents/otel_tracing/run_otel_custom_spans_workflow.py @@ -0,0 +1,40 @@ +import asyncio +import uuid +from datetime import timedelta + +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from temporalio.client import Client +from temporalio.contrib.openai_agents import ModelActivityParameters, OpenAIAgentsPlugin + +from openai_agents.otel_tracing.workflows.otel_custom_spans_workflow import ( + OtelCustomSpansWorkflow, +) + + +async def main(): + exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) + + client = await Client.connect( + "localhost:7233", + plugins=[ + OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(seconds=60) + ), + otel_exporters=[exporter], + add_temporal_spans=False, + ), + ], + ) + + result = await client.execute_workflow( + OtelCustomSpansWorkflow.run, + id=f"otel-custom-spans-workflow-{uuid.uuid4()}", + task_queue="otel-task-queue", + ) + + print(f"Result:\n{result}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/openai_agents/otel_tracing/run_otel_direct_api_workflow.py b/openai_agents/otel_tracing/run_otel_direct_api_workflow.py new file mode 100644 index 00000000..31151592 --- /dev/null +++ b/openai_agents/otel_tracing/run_otel_direct_api_workflow.py @@ -0,0 +1,41 @@ +import asyncio +import uuid +from datetime import timedelta + +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from temporalio.client import Client +from temporalio.contrib.openai_agents import ModelActivityParameters, OpenAIAgentsPlugin + +from openai_agents.otel_tracing.workflows.otel_direct_api_workflow import ( + OtelDirectApiWorkflow, +) + + +async def main(): + exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) + + client = await Client.connect( + "localhost:7233", + plugins=[ + OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(seconds=60) + ), + otel_exporters=[exporter], + add_temporal_spans=False, + ), + ], + ) + + result = await client.execute_workflow( + OtelDirectApiWorkflow.run, + "Paris", + id=f"otel-direct-api-workflow-{uuid.uuid4()}", + task_queue="otel-task-queue", + ) + + print(f"Result:\n{result}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/openai_agents/otel_tracing/run_worker.py b/openai_agents/otel_tracing/run_worker.py new file mode 100755 index 00000000..a6a5756d --- /dev/null +++ b/openai_agents/otel_tracing/run_worker.py @@ -0,0 +1,55 @@ +import asyncio +from datetime import timedelta + +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from temporalio.client import Client +from temporalio.contrib.openai_agents import ModelActivityParameters, OpenAIAgentsPlugin +from temporalio.worker import Worker +from temporalio.worker.workflow_sandbox import ( + SandboxedWorkflowRunner, + SandboxRestrictions, +) + +from openai_agents.otel_tracing.workflows.otel_basic_workflow import ( + OtelBasicWorkflow, + get_weather, +) +from openai_agents.otel_tracing.workflows.otel_custom_spans_workflow import ( + OtelCustomSpansWorkflow, +) +from openai_agents.otel_tracing.workflows.otel_direct_api_workflow import ( + OtelDirectApiWorkflow, +) + + +async def main(): + exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) + + client = await Client.connect( + "localhost:7233", + plugins=[ + OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(seconds=60) + ), + otel_exporters=[exporter], + add_temporal_spans=False, + ), + ], + ) + + worker = Worker( + client, + task_queue="otel-task-queue", + workflows=[OtelBasicWorkflow, OtelCustomSpansWorkflow, OtelDirectApiWorkflow], + activities=[get_weather], + workflow_runner=SandboxedWorkflowRunner( + SandboxRestrictions.default.with_passthrough_modules("opentelemetry") + ), + ) + + await worker.run() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/openai_agents/otel_tracing/workflows/__init__.py b/openai_agents/otel_tracing/workflows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openai_agents/otel_tracing/workflows/otel_basic_workflow.py b/openai_agents/otel_tracing/workflows/otel_basic_workflow.py new file mode 100644 index 00000000..97ff9530 --- /dev/null +++ b/openai_agents/otel_tracing/workflows/otel_basic_workflow.py @@ -0,0 +1,58 @@ +"""Basic OTEL tracing workflow demonstrating automatic instrumentation. + +This workflow shows pure automatic instrumentation - the plugin handles all trace +creation and span instrumentation without any manual code. +""" + +from dataclasses import dataclass +from datetime import timedelta + +from agents import Agent, Runner +from temporalio import activity, workflow +from temporalio.contrib import openai_agents as temporal_agents + + +@dataclass +class Weather: + city: str + temperature_range: str + conditions: str + + +@activity.defn +async def get_weather(city: str) -> str: + """Get the weather for a given city.""" + weather = Weather( + city=city, temperature_range="14-20C", conditions="Sunny with wind." + ) + return f"{weather.city}: {weather.conditions}, {weather.temperature_range}" + + +@workflow.defn +class OtelBasicWorkflow: + """Workflow demonstrating automatic OTEL instrumentation. + + The OTEL integration automatically creates spans for: + - Workflow execution + - Agent runs + - Model invocations (as activities) + - Tool/activity calls + + No manual instrumentation needed - just configure the plugin! + """ + + @workflow.run + async def run(self, question: str) -> str: + agent = Agent( + name="Weather Assistant", + instructions="You are a helpful weather assistant.", + tools=[ + temporal_agents.workflow.activity_as_tool( + get_weather, start_to_close_timeout=timedelta(seconds=10) + ) + ], + ) + + # All spans are automatically created - no manual instrumentation required! + result = await Runner.run(agent, input=question) + return result.final_output diff --git a/openai_agents/otel_tracing/workflows/otel_custom_spans_workflow.py b/openai_agents/otel_tracing/workflows/otel_custom_spans_workflow.py new file mode 100644 index 00000000..639a05e2 --- /dev/null +++ b/openai_agents/otel_tracing/workflows/otel_custom_spans_workflow.py @@ -0,0 +1,78 @@ +"""Custom spans workflow demonstrating logical grouping with trace() + custom_span(). + +This workflow shows how to use trace() wrapper with custom_span() to create logical +groupings of related operations, while still benefiting from automatic instrumentation +of agent/model/activity calls. + +IMPORTANT: When using custom_span(), wrap it with trace() in the workflow (not client). +""" + +from dataclasses import dataclass +from datetime import timedelta + +from agents import Agent, Runner, custom_span, trace +from temporalio import activity, workflow +from temporalio.contrib import openai_agents as temporal_agents + + +@dataclass +class Weather: + city: str + temperature_range: str + conditions: str + + +@activity.defn +async def get_weather(city: str) -> str: + """Get the weather for a given city.""" + weather = Weather( + city=city, temperature_range="14-20C", conditions="Sunny with wind." + ) + return f"{weather.city}: {weather.conditions}, {weather.temperature_range}" + + +@workflow.defn +class OtelCustomSpansWorkflow: + """Workflow demonstrating custom spans for logical grouping. + + This example shows how to use trace() + custom_span() to create logical + groupings of related operations. This pattern is useful when you want to: + - Group related operations under a single span + - Add meaningful structure to your traces + - Keep instrumentation simple while adding context + + IMPORTANT: When using custom_span(), you must wrap it with trace() in the + workflow to establish proper trace context. Never use trace() in client code. + + The OTEL integration still automatically creates spans for: + - Workflow execution + - Agent runs + - Model invocations (as activities) + - Tool/activity calls + """ + + @workflow.run + async def run(self) -> str: + with trace("Custom span sample"): + agent = Agent( + name="Weather Assistant", + instructions="You are a helpful weather assistant. Be concise.", + tools=[ + temporal_agents.workflow.activity_as_tool( + get_weather, start_to_close_timeout=timedelta(seconds=10) + ) + ], + ) + + # Use custom_span to group multiple related agent calls under one logical operation + # This makes it easy to see all weather checks for this request in your trace + with custom_span("Multi-city weather check"): + cities = ["Tokyo", "Paris", "New York"] + results = [] + for city in cities: + result = await Runner.run( + agent, input=f"What's the weather in {city}?" + ) + results.append(f"{city}: {result.final_output}") + + return "\n\n".join(results) diff --git a/openai_agents/otel_tracing/workflows/otel_direct_api_workflow.py b/openai_agents/otel_tracing/workflows/otel_direct_api_workflow.py new file mode 100644 index 00000000..6a869823 --- /dev/null +++ b/openai_agents/otel_tracing/workflows/otel_direct_api_workflow.py @@ -0,0 +1,136 @@ +"""Direct OTEL API usage workflow demonstrating custom instrumentation. + +This workflow shows how to use the OpenTelemetry API directly in workflows +to instrument custom business logic, add domain-specific spans, and set +custom attributes. + +CRITICAL REQUIREMENTS: +1. Use trace() wrapper with custom_span() from Agents SDK to establish context +2. Wrap direct OTEL tracer calls in custom_span() (establishes OTEL bridge) +3. Configure sandbox passthrough for opentelemetry module in worker + +Pattern: trace() -> custom_span() -> tracer.start_as_current_span() +""" + +from dataclasses import dataclass +from datetime import timedelta + +import opentelemetry.trace +from agents import Agent, Runner, custom_span, trace +from temporalio import activity, workflow +from temporalio.contrib import openai_agents as temporal_agents + + +@dataclass +class Weather: + city: str + temperature_range: str + conditions: str + air_quality: str = "Good" + + +@activity.defn +async def get_weather(city: str) -> str: + """Get the weather for a given city.""" + weather = Weather( + city=city, + temperature_range="14-20C", + conditions="Sunny with wind.", + air_quality="Good", + ) + return f"{weather.city}: {weather.conditions}, {weather.temperature_range}, Air Quality: {weather.air_quality}" + + +def validate_city_name(city: str) -> bool: + """Validate that city name is reasonable.""" + # Simple validation logic + return len(city) > 0 and len(city) < 100 and city.replace(" ", "").isalpha() + + +def calculate_travel_score(weather: str) -> int: + """Calculate a travel score based on weather conditions.""" + # Simple scoring logic + score = 50 + if "sunny" in weather.lower(): + score += 30 + if "wind" in weather.lower(): + score += 10 + if "good" in weather.lower(): + score += 10 + return score + + +@workflow.defn +class OtelDirectApiWorkflow: + """Workflow demonstrating direct OTEL API usage for custom instrumentation. + + This workflow shows practical use cases for direct OTEL API: + - Instrumenting business logic validation + - Adding domain-specific spans with custom attributes + - Setting custom attributes for observability + - Creating detailed traces with business metrics + + IMPORTANT: When using direct OTEL API, wrap everything in trace() + custom_span(): + - trace() establishes the root trace context (required when using custom_span) + - custom_span() bridges to OpenTelemetry context for direct tracer calls + - Direct OTEL spans (tracer.start_as_current_span) go inside custom_span() + """ + + @workflow.run + async def run(self, city: str) -> str: + # trace() establishes the root context needed for custom_span() and direct OTEL API + with trace("Travel recommendation workflow"): + # custom_span() establishes OTEL context bridge for direct OTEL API calls + with custom_span("Travel recommendation processing"): + tracer = opentelemetry.trace.get_tracer(__name__) + + # Custom instrumentation: validate input + with tracer.start_as_current_span("validate-input") as span: + span.set_attribute("input.city", city) + is_valid = validate_city_name(city) + span.set_attribute( + "validation.result", "valid" if is_valid else "invalid" + ) + + if not is_valid: + span.set_attribute("error", "Invalid city name") + return "Invalid city name provided" + + # Agent execution with automatic instrumentation + agent = Agent( + name="Travel Weather Assistant", + instructions="You are a helpful travel weather assistant. Provide weather information in a friendly way.", + tools=[ + temporal_agents.workflow.activity_as_tool( + get_weather, start_to_close_timeout=timedelta(seconds=10) + ) + ], + ) + + with tracer.start_as_current_span("fetch-weather-info") as span: + span.set_attribute("request.city", city) + result = await Runner.run( + agent, input=f"What's the weather like in {city}?" + ) + weather_info = result.final_output + span.set_attribute("response.length", len(weather_info)) + + # Custom instrumentation: calculate business metric + with tracer.start_as_current_span("calculate-travel-score") as span: + span.set_attribute("city", city) + travel_score = calculate_travel_score(weather_info) + span.set_attribute("travel.score", travel_score) + span.set_attribute( + "travel.recommendation", + "recommended" if travel_score > 70 else "not_recommended", + ) + + # Custom instrumentation: format final response + with tracer.start_as_current_span("format-response") as span: + span.set_attribute("include.score", True) + final_response = ( + f"{weather_info}\n\nTravel Score: {travel_score}/100" + ) + span.set_attribute("response.final_length", len(final_response)) + + return final_response diff --git a/pyproject.toml b/pyproject.toml index bf97ef04..9553e41d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,9 @@ open-telemetry = [ ] openai-agents = [ "openai-agents[litellm] == 0.3.2", - "temporalio[openai-agents] >= 1.18.0", + "temporalio[openai-agents,opentelemetry] >= 1.18.0", + "openinference-instrumentation-openai-agents>=0.1.0", + "opentelemetry-exporter-otlp-proto-grpc", "requests>=2.32.0,<3", ] pydantic-converter = ["pydantic>=2.10.6,<3"]