diff --git a/docs.json b/docs.json index 13fe53d0..f9f6f3bb 100644 --- a/docs.json +++ b/docs.json @@ -42,6 +42,12 @@ "docs/billing" ] }, + { + "group": "Use cases", + "pages": [ + "docs/use-cases/computer-use" + ] + }, { "group": "Code Interpreting", "pages": [ diff --git a/docs/template/examples/desktop.mdx b/docs/template/examples/desktop.mdx index 2be2e915..c080d71d 100644 --- a/docs/template/examples/desktop.mdx +++ b/docs/template/examples/desktop.mdx @@ -3,6 +3,17 @@ title: "Desktop" description: "Sandbox with Ubuntu Desktop and VNC access" --- +This template creates a sandbox with a full Ubuntu 22.04 desktop environment, including the XFCE desktop, common applications, and VNC streaming for remote access. It's ideal for building AI agents that need to interact with graphical user interfaces. + +The template includes: +- **Ubuntu 22.04** with XFCE desktop environment +- **VNC streaming** via [noVNC](https://novnc.com/) for browser-based access +- **Pre-installed applications**: LibreOffice, text editors, file manager, and common utilities +- **Automation tools**: [xdotool](https://github.com/jordansissel/xdotool) and [scrot](https://github.com/resurrecting-open-source-projects/scrot) for programmatic desktop control + +## Template Definition + +The template installs the desktop environment, sets up VNC streaming via [x11vnc](https://github.com/LibVNC/x11vnc) and noVNC, and configures a startup script. @@ -79,6 +90,7 @@ template = ( "apt-get update", "apt-get install -y \ xserver-xorg \ + xorg \ x11-xserver-utils \ xvfb \ x11-utils \ @@ -131,6 +143,9 @@ template = ( +## Startup Script + +The startup script initializes the virtual display using [Xvfb](https://www.x.org/releases/X11R7.6/doc/man/man1/Xvfb.1.xhtml) (X Virtual Framebuffer), launches the XFCE desktop session, starts the VNC server, and exposes the desktop via noVNC on port 6080. This script runs automatically when the sandbox starts. ```bash start_command.sh #!/bin/bash @@ -156,6 +171,9 @@ cd /opt/noVNC/utils && ./novnc_proxy --vnc localhost:5900 --listen 6080 --web /o sleep 2 ``` +## Building the Template + +Build the template with increased CPU and memory allocation to handle the desktop environment installation. The build process may take several minutes due to the size of the packages being installed. diff --git a/docs/use-cases/computer-use.mdx b/docs/use-cases/computer-use.mdx new file mode 100644 index 00000000..f50f312a --- /dev/null +++ b/docs/use-cases/computer-use.mdx @@ -0,0 +1,1865 @@ +--- +title: "Computer Use" +description: "Build AI agents that interact with virtual desktops using OpenAI Computer Use API and E2B Desktop sandboxes with real-time visual feedback." +icon: "desktop" +--- + +This guide walks you through building an AI agent that can see and control a virtual Linux desktop — clicking, typing, scrolling, and navigating applications autonomously. It's based on [E2B Surf](https://github.com/e2b-dev/surf), an open-source computer use agent. Try the [live demo](https://surf.e2b.dev). + +## Related Guides + + + + Build desktop sandboxes with Ubuntu, XFCE, and VNC streaming + + + Integrate AI models with sandboxes using tool calling + + + Create, manage, and control sandbox lifecycle + + + Stream stdout, stderr, and results in real-time + + + Manage files within the sandbox filesystem + + + Set up authentication for E2B sandboxes + + + +Full source code is available in the [E2B Surf repository](https://github.com/e2b-dev/surf). + +## Project Structure + +This starter project follows a standard [Next.js App Router](https://nextjs.org/docs/app) structure with additional directories for AI services, utilities, and type definitions. The separation keeps concerns modular and makes the codebase easy to navigate. + +```text Project Structure +surf-starter/ +├── app/ +│ ├── api/chat/ +│ │ └── route.ts // SSE endpoint - handles AI loop + sandbox +│ ├── actions.ts // Server actions for sandbox management +│ ├── layout.tsx // Root layout with metadata +│ └── page.tsx // Main UI - chat interface + VNC viewer +├── lib/ +│ ├── ai/ +│ │ └── instructions.ts // System prompt for AI agent +│ ├── services/ +│ │ └── openai.ts // Computer use loop with OpenAI +│ ├── utils/ +│ │ ├── actions.ts // Execute computer actions on sandbox +│ │ ├── screenshot.ts // Process and resize screenshots +│ │ └── stream.ts // SSE streaming utilities +│ ├── constants.ts // Configuration constants +│ └── env.ts // Environment validation +├── styles/ +│ └── globals.css // Application styling +├── types/ +│ └── index.ts // TypeScript types and interfaces +├── .env // API keys (E2B, OpenAI) +├── package.json // Dependencies +└── tsconfig.json // TypeScript configuration +``` + +--- + +## How It Works + +This application creates an autonomous AI loop that enables natural language control of a virtual Linux desktop: + +1. **User Input** - You send a natural language command like "Open Firefox and search for AI news" +2. **Sandbox Creation** - E2B spins up an Ubuntu 22.04 desktop with [XFCE](https://xfce.org/) and [VNC](https://en.wikipedia.org/wiki/Virtual_Network_Computing) streaming for remote viewing (if not already running) +3. **Visual Analysis** - The AI receives a screenshot of the current desktop state +4. **Action Planning** - [OpenAI Computer Use API](https://platform.openai.com/docs/guides/computer-use) analyzes the screenshot and decides what action to take +5. **Action Execution** - The action (click, type, scroll, etc.) is executed on the desktop via E2B SDK +6. **Feedback Loop** - A new screenshot is taken and sent back to the AI +7. **Iteration** - The loop continues until the task is complete (maximum 15 iterations) + +All updates stream to your browser in real-time via [Server-Sent Events (SSE)](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events), giving you live visibility into what the AI is thinking and doing. + +--- + +## Implementation + +### Step 1: Project Setup + +Initialize a new Next.js project and install the required dependencies. + + + + ```bash + npx create-next-app@latest surf-starter --typescript --app --no-tailwind + cd surf-starter + ``` + + + + ```bash + npm install @e2b/desktop openai sharp + ``` + + **Dependencies explained:** + - `@e2b/desktop` - E2B Desktop SDK for controlling virtual Linux desktops + - `openai` - OpenAI SDK for Computer Use API integration + - [`sharp`](https://sharp.pixelplumbing.com/) - Fast image processing library for screenshot optimization + + + +### Step 2: Environment Configuration + +Set up your API keys and create environment validation utilities. + + + + Create a `.env` file in your project root: + + ```env + E2B_API_KEY=your_e2b_api_key_here + OPENAI_API_KEY=your_openai_api_key_here + ``` + + Get your API keys: + - E2B API Key: [https://e2b.dev/docs/api-key](https://e2b.dev/docs/api-key) + - OpenAI API Key: [https://platform.openai.com/api-keys](https://platform.openai.com/api-keys) + + + + Create `lib/env.ts` to validate environment variables: + + ```typescript JavaScript & TypeScript + // lib/env.ts + export function getEnv() { + const E2B_API_KEY = process.env.E2B_API_KEY; + const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + + if (!E2B_API_KEY || !OPENAI_API_KEY) { + throw new Error('Missing required environment variables'); + } + + return { E2B_API_KEY, OPENAI_API_KEY }; + } + + export function isEnvironmentConfigured(): boolean { + return !!(process.env.E2B_API_KEY && process.env.OPENAI_API_KEY); + } + ``` + + + +### Step 3: Type Definitions + +Define TypeScript interfaces for type safety throughout the application. + +Create `types/index.ts` with core application types: + +```typescript JavaScript & TypeScript +// types/index.ts + +// Message structure for chat interface +export interface ChatMessage { + role: 'user' | 'assistant' | 'system' | 'action'; + content: string; +} + +// Computer actions the AI can execute +export type ComputerAction = + | { type: 'click'; x: number; y: number; button: 'left' | 'right' | 'wheel' } + | { type: 'double_click'; x: number; y: number } + | { type: 'type'; text: string } + | { type: 'key' | 'keypress'; keys?: string[]; key?: string } + | { type: 'move'; x: number; y: number } + | { type: 'drag'; start_x: number; start_y: number; x: number; y: number } + | { type: 'scroll'; amount: number } + | { type: 'wait'; duration?: number } + | { type: 'screenshot' }; + +// SSE events for real-time updates +export interface SSEEvent { + type: 'sandbox_created' | 'reasoning' | 'action' | 'action_completed' | 'done' | 'error'; + content?: string; + action?: string; + sandboxId?: string; + url?: string; + message?: string; +} + +// Conversation tracking for context +export interface ConversationTurn { + userMessage: string; + aiResponse: string; + timestamp: number; +} +``` + +The `ComputerAction` discriminated union ensures type-safe action handling throughout the application. + +### Step 4: Configuration Constants + +Centralize all configuration values for easy management. + +Create `lib/constants.ts` with application-wide constants: + +```typescript JavaScript & TypeScript +// lib/constants.ts + +// Sandbox configuration +export const SANDBOX_CONFIG = { + TIMEOUT_MS: 300_000, // 5 minutes initial timeout + TIMEOUT_SECONDS: 300, + AUTO_EXTEND_THRESHOLD: 10, + ACTIVE_WORK_TIMEOUT_MS: 600_000, // 10 minutes during active work + MIN_EXTEND_INTERVAL_MS: 30_000, // Minimum 30s between extensions +} as const; + +// Screenshot processing +export const SCREENSHOT_CONFIG = { + MAX_WIDTH: 1024, + MAX_HEIGHT: 768, + MIN_WIDTH: 640, + MIN_HEIGHT: 480, +} as const; + +// AI model configuration +export const AI_CONFIG = { + MODEL: 'computer-use-preview', // OpenAI computer use model + MAX_ITERATIONS: 15, // Maximum loop iterations + MAX_WAIT_DURATION: 1500, // Maximum wait time (ms) + REASONING_EFFORT: 'medium', // AI reasoning level +} as const; + +// API configuration +export const API_CONFIG = { + MAX_DURATION: 300, // 5 minutes per request + RUNTIME: 'nodejs', +} as const; +``` + +These constants make it easy to adjust timeouts, screenshot sizes, and AI behavior without hunting through code. + +### Step 5: Utility Functions + +Build helper functions for screenshot processing, streaming, and action execution. + + + + Create `lib/utils/screenshot.ts` to optimize screenshots: + + ```typescript JavaScript & TypeScript + // lib/utils/screenshot.ts + import sharp from 'sharp'; + import { SCREENSHOT_CONFIG } from '@/lib/constants'; + + export async function processScreenshot( + screenshotBuffer: Uint8Array | Buffer + ): Promise { + const processedBuffer = await sharp(screenshotBuffer) + .resize(SCREENSHOT_CONFIG.MAX_WIDTH, SCREENSHOT_CONFIG.MAX_HEIGHT, { + fit: 'contain', + background: { r: 0, g: 0, b: 0, alpha: 1 }, + }) + .png() + .toBuffer(); + + return processedBuffer.toString('base64'); + } + ``` + + This function resizes screenshots to optimal dimensions and converts them to base64 for API transmission. + + + + Create `lib/utils/stream.ts` for SSE streaming: + + ```typescript JavaScript & TypeScript + // lib/utils/stream.ts + export function createSafeStreamController( + controller: ReadableStreamDefaultController + ) { + let isControllerClosed = false; + + const safeEnqueue = (data: Uint8Array): void => { + if (!isControllerClosed) { + try { + controller.enqueue(data); + } catch (error) { + isControllerClosed = true; + } + } + }; + + const safeClose = (): void => { + if (!isControllerClosed) { + try { + controller.close(); + isControllerClosed = true; + } catch (error) { + isControllerClosed = true; + } + } + }; + + return { enqueue: safeEnqueue, close: safeClose }; + } + + export function createSSEEvent(event: object): string { + return `data: ${JSON.stringify(event)}\n\n`; + } + ``` + + The safe stream controller prevents "already closed" errors during SSE streaming. + + + + Create `lib/utils/actions.ts` to map AI actions to E2B SDK calls: + + ```typescript JavaScript & TypeScript + // lib/utils/actions.ts + import type { Sandbox } from '@e2b/desktop'; + import type { ComputerAction } from '@/types'; + + export async function executeComputerAction( + sandbox: Sandbox, + action: ComputerAction + ): Promise { + switch (action.type) { + case 'click': + if (action.button === 'left') { + await sandbox.leftClick(action.x, action.y); + } else if (action.button === 'right') { + await sandbox.rightClick(action.x, action.y); + } + break; + + case 'double_click': + await sandbox.doubleClick(action.x, action.y); + break; + + case 'type': + await sandbox.write(action.text); + break; + + case 'key': + case 'keypress': + const key = action.keys?.[0] || action.key; + if (key) await sandbox.press(key); + break; + + case 'move': + await sandbox.moveMouse(action.x, action.y); + break; + + case 'scroll': + await sandbox.scroll(action.amount < 0 ? 'up' : 'down'); + break; + + case 'wait': + await new Promise(resolve => + setTimeout(resolve, Math.min(action.duration || 1000, 3000)) + ); + break; + } + } + + export function formatActionForDisplay(action: ComputerAction): string { + switch (action.type) { + case 'click': + return `Click ${action.button} at (${action.x}, ${action.y})`; + case 'type': + return `Type: "${action.text}"`; + case 'key': + case 'keypress': + return `Press key: ${action.keys?.[0] || action.key}`; + default: + return `Action: ${action.type}`; + } + } + ``` + + This utility translates OpenAI Computer Use actions into E2B Desktop SDK method calls. + + + +### Step 6: AI System Prompt + +Define the system instructions that guide the AI agent's behavior. + +Create `lib/ai/instructions.ts` with the AI agent prompt: + +```typescript JavaScript & TypeScript +// lib/ai/instructions.ts +export const SYSTEM_INSTRUCTIONS = `You are Surf, an AI assistant that controls a Linux desktop to help users with tasks. + +ENVIRONMENT: +- Ubuntu 22.04 desktop with Firefox, VS Code, LibreOffice, Terminal, File Manager, Text Editor +- Desktop has bottom taskbar with application launchers +- Desktop is ready - you can start immediately + +AVAILABLE ACTIONS: +- screenshot: View current desktop state +- click/double_click: Click at coordinates (left/right/middle button) +- type: Type text into focused field +- key: Press keyboard keys (ENTER, ESCAPE, TAB, BACKSPACE, etc.) +- move: Move mouse cursor +- drag: Drag between two positions +- scroll: Scroll up or down +- wait: Pause briefly (use only after opening apps or loading pages) + +EXECUTION GUIDELINES: +1. Take screenshots to see the current state +2. Identify UI elements using coordinates from screenshots +3. Execute actions precisely +4. After opening applications or loading pages, wait 1-2 seconds for them to load +5. After terminal commands, press ENTER to execute +6. Complete tasks efficiently with minimal delays + +AUTONOMY: +- Execute tasks directly when intent is clear +- Ask clarifying questions only when there's genuine ambiguity +- When user confirms ("yes", "proceed", "do it"), take the next action immediately + +COMPLETION: +- When done, explain what you accomplished +- Stop taking actions once the goal is achieved + +Be helpful, precise, and efficient.`; +``` + +This prompt is crucial for effective agent behavior. It teaches the AI about the environment, available actions, and expected execution patterns. + +### Step 7: Computer Use Loop + +Implement the core AI execution loop that powers desktop control. + +Create `lib/services/openai.ts` with the computer use loop: + +```typescript JavaScript & TypeScript +// lib/services/openai.ts +import OpenAI from 'openai'; +import type { Sandbox } from '@e2b/desktop'; +import { AI_CONFIG, SCREENSHOT_CONFIG } from '@/lib/constants'; +import { SYSTEM_INSTRUCTIONS } from '@/lib/ai/instructions'; +import { processScreenshot } from '@/lib/utils/screenshot'; +import { executeComputerAction, formatActionForDisplay } from '@/lib/utils/actions'; +import { getEnv } from '@/lib/env'; + +export async function runComputerUseLoop( + sandbox: Sandbox, + userMessage: string, + sendEvent: (data: Uint8Array) => void +): Promise { + const { OPENAI_API_KEY } = getEnv(); + const openai = new OpenAI({ apiKey: OPENAI_API_KEY }); + const encoder = new TextEncoder(); + + // Take initial screenshot + const screenshotBuffer = await sandbox.screenshot(); + const screenshotBase64 = await processScreenshot(screenshotBuffer); + + // Define computer tool + const computerTool = { + type: 'computer_use_preview' as const, + display_width: SCREENSHOT_CONFIG.MAX_WIDTH, + display_height: SCREENSHOT_CONFIG.MAX_HEIGHT, + environment: 'linux' as const, + }; + + // Create initial request with screenshot + let response = await openai.responses.create({ + model: AI_CONFIG.MODEL, + tools: [computerTool], + input: [{ + type: 'message', + role: 'user', + content: [ + { type: 'input_text', text: userMessage }, + { type: 'input_image', image_url: `data:image/png;base64,${screenshotBase64}`, detail: 'high' }, + ], + }], + instructions: SYSTEM_INSTRUCTIONS, + truncation: 'auto', + reasoning: { effort: AI_CONFIG.REASONING_EFFORT, generate_summary: 'concise' }, + }); + + let iterations = 0; + + // Main execution loop + while (iterations < AI_CONFIG.MAX_ITERATIONS) { + iterations++; + + // Extract computer actions from AI response + const computerCalls = response.output.filter( + (item: any) => item.type === 'computer_call' + ); + + // If no actions, task is complete + if (computerCalls.length === 0) { + sendEvent(encoder.encode(`data: ${JSON.stringify({ + type: 'reasoning', + content: response.output_text || 'Task complete!' + })}\n\n`)); + break; + } + + const computerCall = computerCalls[0] as any; + const action = computerCall.action; + + // Send action to client + sendEvent(encoder.encode(`data: ${JSON.stringify({ + type: 'action', + action: formatActionForDisplay(action) + })}\n\n`)); + + // Execute action on sandbox + await executeComputerAction(sandbox, action); + + sendEvent(encoder.encode(`data: ${JSON.stringify({ + type: 'action_completed' + })}\n\n`)); + + // Take new screenshot + const newScreenshotBuffer = await sandbox.screenshot(); + const newScreenshotBase64 = await processScreenshot(newScreenshotBuffer); + + // Continue conversation with new screenshot + response = await openai.responses.create({ + model: AI_CONFIG.MODEL, + previous_response_id: response.id, + instructions: SYSTEM_INSTRUCTIONS, + tools: [computerTool], + input: [{ + call_id: computerCall.call_id, + type: 'computer_call_output', + output: { + type: 'computer_screenshot', + image_url: `data:image/png;base64,${newScreenshotBase64}`, + }, + }], + truncation: 'auto', + reasoning: { effort: AI_CONFIG.REASONING_EFFORT, generate_summary: 'concise' }, + }); + } +} +``` + +This is the heart of the application. The loop continuously: +1. Takes screenshots of the desktop +2. Sends them to OpenAI with context +3. Receives structured computer actions +4. Executes actions via E2B +5. Repeats until the task is complete + +### Step 8: API Endpoint + +Create the backend API endpoint that orchestrates sandbox creation and AI execution. + +Create `app/api/chat/route.ts` for the SSE streaming endpoint: + +```typescript JavaScript & TypeScript +// app/api/chat/route.ts +import { Sandbox } from '@e2b/desktop'; +import { NextRequest } from 'next/server'; +import { getEnv, isEnvironmentConfigured } from '@/lib/env'; +import { SANDBOX_CONFIG } from '@/lib/constants'; +import { createSafeStreamController, createSSEEvent } from '@/lib/utils/stream'; +import { runComputerUseLoop } from '@/lib/services/openai'; + +// In-memory store for active sandboxes +const sandboxes = new Map(); + +export async function POST(req: NextRequest) { + try { + const { message, sandboxId } = await req.json(); + + if (!message) { + return new Response(JSON.stringify({ error: 'Message required' }), { status: 400 }); + } + + if (!isEnvironmentConfigured()) { + return new Response(createSSEEvent({ type: 'error', message: 'Missing API keys' }), { + headers: { 'Content-Type': 'text/event-stream' }, + }); + } + + const { E2B_API_KEY } = getEnv(); + const encoder = new TextEncoder(); + + // Create SSE stream + const stream = new ReadableStream({ + async start(controller) { + const safeController = createSafeStreamController(controller); + + try { + // Reuse existing sandbox or create new one + let sandbox = sandboxId ? sandboxes.get(sandboxId) : null; + + if (!sandbox) { + safeController.enqueue(encoder.encode(createSSEEvent({ + type: 'reasoning', + content: 'Creating sandbox...', + }))); + + sandbox = await Sandbox.create({ + apiKey: E2B_API_KEY, + timeoutMs: SANDBOX_CONFIG.TIMEOUT_MS, + }); + + await sandbox.stream.start(); + sandboxes.set(sandbox.sandboxId, sandbox); + + safeController.enqueue(encoder.encode(createSSEEvent({ + type: 'sandbox_created', + sandboxId: sandbox.sandboxId, + url: sandbox.stream.getUrl(), + }))); + } else { + await sandbox.setTimeout(SANDBOX_CONFIG.TIMEOUT_MS); + } + + // Run the AI loop + await runComputerUseLoop(sandbox, message, safeController.enqueue); + + safeController.enqueue(encoder.encode(createSSEEvent({ type: 'done' }))); + safeController.close(); + } catch (error) { + safeController.enqueue(encoder.encode(createSSEEvent({ + type: 'error', + message: error instanceof Error ? error.message : 'Unknown error', + }))); + safeController.close(); + } + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + }, + }); + } catch (error) { + return new Response( + JSON.stringify({ error: error instanceof Error ? error.message : 'Internal error' }), + { status: 500 } + ); + } +} + +export const runtime = 'nodejs'; +export const maxDuration = 300; +``` + +The endpoint: +- Validates environment and request +- Creates or reuses E2B sandboxes +- Starts VNC streaming +- Runs the computer use loop +- Streams events back to the client in real-time + +### Step 9: Server Actions + +Add Next.js server actions for sandbox management from the client. + +Create `app/actions.ts` for server-side operations: + +```typescript JavaScript & TypeScript +// app/actions.ts +'use server'; + +import { Sandbox } from '@e2b/desktop'; +import { getEnv } from '@/lib/env'; +import { SANDBOX_CONFIG } from '@/lib/constants'; + +export async function extendSandboxTimeout(sandboxId: string) { + try { + if (!sandboxId) { + return { success: false, error: 'Sandbox ID required' }; + } + + const { E2B_API_KEY } = getEnv(); + const sandbox = await Sandbox.connect(sandboxId, { apiKey: E2B_API_KEY }); + await sandbox.setTimeout(SANDBOX_CONFIG.TIMEOUT_MS); + + return { success: true }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : 'Unknown error', + }; + } +} + +export async function stopSandbox(sandboxId: string) { + try { + if (!sandboxId) { + return { success: false, error: 'Sandbox ID required' }; + } + + const { E2B_API_KEY } = getEnv(); + const sandbox = await Sandbox.connect(sandboxId, { apiKey: E2B_API_KEY }); + await sandbox.kill(); + + return { success: true }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : 'Unknown error', + }; + } +} +``` + +These server actions allow the client to: +- **Extend timeout**: Add 5 more minutes to prevent sandbox from expiring +- **Stop sandbox**: Immediately terminate and clean up resources + +### Step 10: Chat Interface + +Create `app/page.tsx` with a chat interface, real-time status tracking, VNC viewer, and countdown timer with timeout management. + + + ```typescript JavaScript & TypeScript + 'use client'; + + import { useState, useRef, useEffect, useCallback } from 'react'; + import { extendSandboxTimeout, stopSandbox } from './actions'; + import { SANDBOX_CONFIG } from '@/lib/constants'; + import type { ChatMessage, ConversationTurn } from '@/types'; + + export default function Home() { + const [messages, setMessages] = useState([]); + const [input, setInput] = useState(''); + const [loading, setLoading] = useState(false); + const [sandboxUrl, setSandboxUrl] = useState(''); + const [sandboxId, setSandboxId] = useState(''); + const [currentStatus, setCurrentStatus] = useState(''); + const [currentAction, setCurrentAction] = useState(''); + const [timeRemaining, setTimeRemaining] = useState( + SANDBOX_CONFIG.TIMEOUT_SECONDS + ); + const [isExtending, setIsExtending] = useState(false); + + // Conversation history tracking + const [conversationHistory, setConversationHistory] = useState< + ConversationTurn[] + >([]); + const [currentUserMessage, setCurrentUserMessage] = useState(''); + const [currentAiResponse, setCurrentAiResponse] = useState(''); + + const messagesEndRef = useRef(null); + const timerRef = useRef(null); + + const scrollToBottom = () => { + messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); + }; + + useEffect(() => { + scrollToBottom(); + }, [messages]); + + const handleExtendTimeout = useCallback(async (isAutoExtend = false) => { + // Manual timeout extension by user + // Note: Server automatically extends timeout during active AI work + if (!sandboxId || isExtending) return; + + setIsExtending(true); + try { + console.log('Extending timeout for sandbox:', sandboxId); + const result = await extendSandboxTimeout(sandboxId); + console.log('Extend timeout result:', result); + + if (result.success) { + setTimeRemaining(SANDBOX_CONFIG.TIMEOUT_SECONDS); + if (!isAutoExtend) { + setMessages(prev => [ + ...prev, + { role: 'system', content: '⏰ Sandbox timeout extended by 5 minutes' }, + ]); + } + } else { + const errorMsg = result.error ? `: ${result.error}` : ''; + setMessages(prev => [ + ...prev, + { role: 'system', content: `❌ Failed to extend timeout${errorMsg}` }, + ]); + } + } catch (error) { + console.error('Error extending timeout:', error); + setMessages(prev => [ + ...prev, + { role: 'system', content: `❌ Error extending timeout: ${error}` }, + ]); + } finally { + setIsExtending(false); + } + }, [sandboxId, isExtending]); + + // Countdown timer + useEffect(() => { + if (!sandboxId) { + if (timerRef.current) { + clearInterval(timerRef.current); + timerRef.current = null; + } + return; + } + + // Start countdown timer + timerRef.current = setInterval(() => { + setTimeRemaining((prev) => { + const newTime = Math.max(0, prev - 1); + // Server now handles timeout extension during active work + return newTime; + }); + }, 1000); + + return () => { + if (timerRef.current) { + clearInterval(timerRef.current); + } + }; + }, [sandboxId, isExtending, handleExtendTimeout]); + + const handleStopSandbox = async () => { + if (!sandboxId) return; + + try { + const result = await stopSandbox(sandboxId); + if (result.success) { + setSandboxId(''); + setSandboxUrl(''); + setTimeRemaining(SANDBOX_CONFIG.TIMEOUT_SECONDS); + setMessages(prev => [ + ...prev, + { role: 'system', content: '🛑 Sandbox stopped' }, + ]); + } + } catch (error) { + console.error('Error stopping sandbox:', error); + } + }; + + const sendMessage = async (e: React.FormEvent) => { + e.preventDefault(); + if (!input.trim() || loading) return; + + const userMessage: ChatMessage = { role: 'user', content: input }; + setMessages((prev) => [...prev, userMessage]); + setCurrentUserMessage(input); + setCurrentAiResponse(''); + setInput(''); + setLoading(true); + setCurrentStatus('Sending request...'); + + try { + const response = await fetch('/api/chat', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + message: input, + sandboxId: sandboxId || undefined, + conversationHistory: conversationHistory, + }), + }); + + if (!response.ok) { + throw new Error(`Error: ${response.statusText}`); + } + + const reader = response.body?.getReader(); + const decoder = new TextDecoder(); + + if (!reader) { + throw new Error('No response body'); + } + + let buffer = ''; + let currentThinking = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + + for (const line of lines) { + if (line.startsWith('data: ')) { + const data = line.slice(6); + if (data === '[DONE]') continue; + + try { + const parsed = JSON.parse(data); + + if (parsed.type === 'sandbox_created') { + setSandboxId(parsed.sandboxId); + setSandboxUrl(parsed.url); + setTimeRemaining(SANDBOX_CONFIG.TIMEOUT_SECONDS); + setCurrentStatus('✅ Sandbox ready'); + setMessages(prev => [ + ...prev, + { role: 'system', content: '✅ Sandbox created and connected!' }, + ]); + setTimeout(() => setCurrentStatus(''), 2000); + } else if (parsed.type === 'reasoning') { + currentThinking = parsed.content; + setCurrentStatus('🤔 Thinking...'); + setMessages(prev => { + const newMessages = [...prev]; + const lastMessage = newMessages[newMessages.length - 1]; + if (lastMessage?.role === 'assistant') { + lastMessage.content = currentThinking; + } else { + newMessages.push({ role: 'assistant', content: currentThinking }); + } + return newMessages; + }); + } else if (parsed.type === 'action') { + setCurrentStatus('⚡ Executing action...'); + setCurrentAction(parsed.action); + setMessages(prev => [ + ...prev, + { role: 'action', content: parsed.action }, + ]); + } else if (parsed.type === 'action_completed') { + setCurrentStatus('✓ Action completed'); + setCurrentAction(''); + setTimeout(() => setCurrentStatus(''), 1000); + } else if (parsed.type === 'response') { + // Capture final AI response for history + const aiResponse = parsed.content; + setCurrentAiResponse(aiResponse); + + // Add completed turn to history + setConversationHistory((prev) => [ + ...prev, + { + userMessage: currentUserMessage, + aiResponse: aiResponse, + timestamp: Date.now(), + }, + ]); + } else if (parsed.type === 'done') { + setCurrentStatus('✅ Task complete!'); + setTimeout(() => setCurrentStatus(''), 3000); + } else if (parsed.type === 'error') { + setCurrentStatus('❌ Error occurred'); + setMessages(prev => [ + ...prev, + { role: 'system', content: `❌ Error: ${parsed.message}` }, + ]); + } + } catch (e) { + console.error('Failed to parse SSE data:', e); + } + } + } + } + } catch (error) { + console.error('Error sending message:', error); + setCurrentStatus('❌ Error'); + setMessages(prev => [ + ...prev, + { role: 'system', content: `❌ Error: ${error}` }, + ]); + } finally { + setLoading(false); + setCurrentAction(''); + } + }; + + const formatTime = (seconds: number): string => { + const mins = Math.floor(seconds / 60); + const secs = seconds % 60; + return `${mins}:${secs.toString().padStart(2, '0')}`; + }; + + const getTimeColor = (): string => { + if (timeRemaining > 60) return 'var(--e2b-orange)'; + if (timeRemaining > 30) return 'hsl(45 100% 50%)'; // Yellow + return 'hsl(0 75% 60%)'; // Red + }; + + const getTimePercentage = (): number => { + return (timeRemaining / SANDBOX_CONFIG.TIMEOUT_SECONDS) * 100; + }; + + return ( +
+
+

🏄 Surf Demo

+

AI agent with E2B desktop sandbox

+
+ + {/* Status Bar */} + {(loading || currentStatus) && ( +
+
+ {loading &&
} + {currentStatus || 'Processing...'} +
+ {currentAction && ( +
+ Current action: + {currentAction} +
+ )} +
+ )} + +
+
+ {sandboxUrl ? ( + <> +