From ad968d16bfb9a8b6b9d514ae7fb45bdeadab6c51 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Tue, 17 Feb 2026 18:01:34 +0000 Subject: [PATCH] feat: add foundational support for programmatic tool calling - Add supportsProgrammaticToolCalling flag to ModelInfo schema - Add enableProgrammaticToolCalling setting to GlobalSettings - Set the flag on supported Anthropic models (Claude Sonnet 4.5, Opus 4.x) - Add ApiStreamCodeExecutionChunk type to the API stream - Create programmatic-tool-calling service with: - DockerSandboxExecutor for isolated Python code execution - ToolBridge for generating Python SDK with tool function stubs - IPC protocol for tool call communication between sandbox and host - Support for read_file, write_to_file, execute_command, search_files, list_files - Add comprehensive tests (27 passing) Addresses #11506 --- packages/types/src/global-settings.ts | 9 + packages/types/src/model.ts | 7 + packages/types/src/providers/anthropic.ts | 6 + src/api/transform/stream.ts | 14 + .../DockerSandboxExecutor.ts | 309 ++++++++++++++++++ .../programmatic-tool-calling/ToolBridge.ts | 215 ++++++++++++ .../__tests__/DockerSandboxExecutor.spec.ts | 103 ++++++ .../__tests__/ToolBridge.spec.ts | 109 ++++++ .../__tests__/types.spec.ts | 57 ++++ .../programmatic-tool-calling/index.ts | 25 ++ .../programmatic-tool-calling/types.ts | 120 +++++++ 11 files changed, 974 insertions(+) create mode 100644 src/services/programmatic-tool-calling/DockerSandboxExecutor.ts create mode 100644 src/services/programmatic-tool-calling/ToolBridge.ts create mode 100644 src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts create mode 100644 src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts create mode 100644 src/services/programmatic-tool-calling/__tests__/types.spec.ts create mode 100644 src/services/programmatic-tool-calling/index.ts create mode 100644 src/services/programmatic-tool-calling/types.ts diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index de3bd076616..bd03fb0ff86 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -231,6 +231,15 @@ export const globalSettingsSchema = z.object({ * Tools in this list will be excluded from prompt generation and rejected at execution time. */ disabledTools: z.array(toolNamesSchema).optional(), + + /** + * Whether to enable programmatic tool calling. + * When enabled, supported models can generate Python code that calls multiple tools + * within a single sandboxed code execution, reducing round-trips to the model. + * Tools still require individual approval before execution. + * @default false + */ + enableProgrammaticToolCalling: z.boolean().optional(), }) export type GlobalSettings = z.infer diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 95e9095a89e..04c1b413308 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -117,6 +117,13 @@ export const modelInfoSchema = z.object({ // These tools will be added if they belong to an allowed group in the current mode // Cannot force-add tools from groups the mode doesn't allow includedTools: z.array(z.string()).optional(), + /** + * Whether the model supports programmatic tool calling. + * When true, the model can generate Python code that calls tool functions, + * enabling multiple tool calls within a single code execution in a sandbox. + * Currently supported by Anthropic Claude models. + */ + supportsProgrammaticToolCalling: z.boolean().optional(), /** * Service tiers with pricing information. * Each tier can have a name (for OpenAI service tiers) and pricing overrides. diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index 62e377c7e54..4807e709336 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -17,6 +17,7 @@ export const anthropicModels = { cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens supportsReasoningBudget: true, + supportsProgrammaticToolCalling: true, // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') tiers: [ { @@ -38,6 +39,7 @@ export const anthropicModels = { cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens supportsReasoningBudget: true, + supportsProgrammaticToolCalling: true, // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') tiers: [ { @@ -59,6 +61,7 @@ export const anthropicModels = { cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens supportsReasoningBudget: true, + supportsProgrammaticToolCalling: true, // Tiered pricing for extended context (requires beta flag) tiers: [ { @@ -80,6 +83,7 @@ export const anthropicModels = { cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens supportsReasoningBudget: true, + supportsProgrammaticToolCalling: true, }, "claude-opus-4-1-20250805": { maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. @@ -91,6 +95,7 @@ export const anthropicModels = { cacheWritesPrice: 18.75, // $18.75 per million tokens cacheReadsPrice: 1.5, // $1.50 per million tokens supportsReasoningBudget: true, + supportsProgrammaticToolCalling: true, }, "claude-opus-4-20250514": { maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. @@ -102,6 +107,7 @@ export const anthropicModels = { cacheWritesPrice: 18.75, // $18.75 per million tokens cacheReadsPrice: 1.5, // $1.50 per million tokens supportsReasoningBudget: true, + supportsProgrammaticToolCalling: true, }, "claude-3-7-sonnet-20250219:thinking": { maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k. diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts index 960ebbe770d..8fa39cb770a 100644 --- a/src/api/transform/stream.ts +++ b/src/api/transform/stream.ts @@ -11,6 +11,7 @@ export type ApiStreamChunk = | ApiStreamToolCallDeltaChunk | ApiStreamToolCallEndChunk | ApiStreamToolCallPartialChunk + | ApiStreamCodeExecutionChunk | ApiStreamError export interface ApiStreamError { @@ -107,6 +108,19 @@ export interface ApiStreamToolCallPartialChunk { arguments?: string } +/** + * Code execution chunk from programmatic tool calling. + * The model generates Python code that calls tool functions in a sandboxed environment. + * This chunk contains the code to execute and, once executed, the results. + */ +export interface ApiStreamCodeExecutionChunk { + type: "code_execution" + /** Unique identifier for this code execution block */ + id: string + /** The Python code generated by the model */ + code: string +} + export interface GroundingSource { title: string url: string diff --git a/src/services/programmatic-tool-calling/DockerSandboxExecutor.ts b/src/services/programmatic-tool-calling/DockerSandboxExecutor.ts new file mode 100644 index 00000000000..8504223e05a --- /dev/null +++ b/src/services/programmatic-tool-calling/DockerSandboxExecutor.ts @@ -0,0 +1,309 @@ +/** + * DockerSandboxExecutor - Executes Python code in an isolated Docker container. + * + * This executor manages the lifecycle of Docker containers used for programmatic + * tool calling. It handles: + * - Container creation with resource limits + * - Mounting the Python tool SDK + * - IPC communication between the container and Roo Code + * - Tool call routing and approval + * - Execution timeout and cleanup + */ + +import { spawn } from "child_process" +import { EventEmitter } from "events" +import * as fs from "fs/promises" +import * as path from "path" +import * as os from "os" + +import type { CodeExecutionResult, SandboxConfig, SandboxToolCall, SandboxToolResult } from "./types" +import { DEFAULT_SANDBOX_CONFIG, isSupportedProgrammaticTool } from "./types" +import { generatePythonSDK, generateExecutionScript } from "./ToolBridge" + +/** + * Callback type for requesting tool approval from the user. + * Returns true if the tool call is approved, false otherwise. + */ +export type ToolApprovalCallback = (toolCall: SandboxToolCall) => Promise + +/** + * Callback type for executing an approved tool call. + * Returns the tool result. + */ +export type ToolExecutorCallback = (toolCall: SandboxToolCall) => Promise + +interface ExecutorOptions { + config?: Partial + onToolApproval: ToolApprovalCallback + onToolExecute: ToolExecutorCallback +} + +/** + * DockerSandboxExecutor manages the execution of Python code in a Docker container + * with IPC-based tool calling support. + */ +export class DockerSandboxExecutor extends EventEmitter { + private config: SandboxConfig + private onToolApproval: ToolApprovalCallback + private onToolExecute: ToolExecutorCallback + + constructor(options: ExecutorOptions) { + super() + this.config = { ...DEFAULT_SANDBOX_CONFIG, ...options.config } + this.onToolApproval = options.onToolApproval + this.onToolExecute = options.onToolExecute + } + + /** + * Check if Docker is available on the system. + */ + async isDockerAvailable(): Promise { + return new Promise((resolve) => { + const proc = spawn("docker", ["info"], { + stdio: ["ignore", "ignore", "ignore"], + }) + proc.on("close", (code) => resolve(code === 0)) + proc.on("error", () => resolve(false)) + }) + } + + /** + * Execute Python code in a Docker sandbox with tool calling support. + * + * @param code - The Python code to execute (generated by the model) + * @returns The execution result including stdout, stderr, and tool call history + */ + async execute(code: string): Promise { + const startTime = Date.now() + const toolCalls: Array<{ call: SandboxToolCall; result: SandboxToolResult }> = [] + + // Create a temporary directory for mounting the SDK and script + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "roo-sandbox-")) + + try { + // Write the Python SDK module + const sdkCode = generatePythonSDK() + await fs.writeFile(path.join(tmpDir, "roo_tools.py"), sdkCode, "utf-8") + + // Write the execution script + const scriptCode = generateExecutionScript(code) + await fs.writeFile(path.join(tmpDir, "run.py"), scriptCode, "utf-8") + + // Build Docker run arguments + const dockerArgs = this.buildDockerArgs(tmpDir) + + return await new Promise((resolve) => { + let stdout = "" + let stderr = "" + let resolved = false + + const proc = spawn("docker", dockerArgs, { + stdio: ["pipe", "pipe", "pipe"], + }) + + // Set up timeout + const timeoutId = setTimeout(() => { + if (!resolved) { + resolved = true + proc.kill("SIGKILL") + resolve({ + success: false, + stdout, + stderr, + toolCalls, + error: `Execution timed out after ${this.config.timeoutMs}ms`, + durationMs: Date.now() - startTime, + }) + } + }, this.config.timeoutMs) + + // Handle stdout (IPC messages from the sandbox) + let stdoutBuffer = "" + proc.stdout!.on("data", (data: Buffer) => { + stdoutBuffer += data.toString() + + // Process complete lines (JSON messages) + const lines = stdoutBuffer.split("\n") + stdoutBuffer = lines.pop() ?? "" // Keep incomplete line in buffer + + for (const line of lines) { + if (!line.trim()) { + continue + } + this.handleSandboxMessage(line, proc, toolCalls).catch((err) => { + stderr += `IPC error: ${err.message}\n` + }) + } + }) + + // Collect stderr + proc.stderr!.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + // Handle process exit + proc.on("close", (exitCode) => { + clearTimeout(timeoutId) + if (!resolved) { + resolved = true + + // Process any remaining stdout + if (stdoutBuffer.trim()) { + // Try to parse last message as completion + try { + const msg = JSON.parse(stdoutBuffer) + if (msg.type === "complete") { + resolve({ + success: msg.success ?? false, + stdout, + stderr, + toolCalls, + error: msg.error, + durationMs: Date.now() - startTime, + }) + return + } + } catch { + // Not valid JSON, treat as regular output + stdout += stdoutBuffer + } + } + + resolve({ + success: exitCode === 0, + stdout, + stderr, + toolCalls, + error: exitCode !== 0 ? `Process exited with code ${exitCode}` : undefined, + durationMs: Date.now() - startTime, + }) + } + }) + + proc.on("error", (err) => { + clearTimeout(timeoutId) + if (!resolved) { + resolved = true + resolve({ + success: false, + stdout, + stderr, + toolCalls, + error: `Failed to start Docker container: ${err.message}`, + durationMs: Date.now() - startTime, + }) + } + }) + }) + } finally { + // Clean up temporary directory + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => { + // Ignore cleanup errors + }) + } + } + + /** + * Build the Docker run command arguments. + */ + private buildDockerArgs(tmpDir: string): string[] { + const args = [ + "run", + "--rm", + // Resource limits + `--memory=${this.config.memoryLimit}`, + `--cpus=${this.config.cpuLimit}`, + // Security: read-only root filesystem, no new privileges + "--read-only", + "--security-opt=no-new-privileges", + // Disable network unless explicitly enabled + ...(this.config.networkEnabled ? [] : ["--network=none"]), + // Mount the SDK and script as read-only + `-v=${tmpDir}:/workspace:ro`, + // Working directory + "-w=/workspace", + // Interactive mode for stdin/stdout IPC + "-i", + // Image + this.config.image, + // Command + "python", + "/workspace/run.py", + ] + return args + } + + /** + * Handle a JSON message from the sandbox process. + */ + private async handleSandboxMessage( + line: string, + proc: ReturnType, + toolCalls: Array<{ call: SandboxToolCall; result: SandboxToolResult }>, + ): Promise { + let message: Record + try { + message = JSON.parse(line) + } catch { + // Not a JSON message, treat as regular stdout output + this.emit("stdout", line) + return + } + + switch (message.type) { + case "ready": + // Container is ready, send start signal + proc.stdin!.write(JSON.stringify({ type: "start" }) + "\n") + break + + case "tool_request": { + const toolCall: SandboxToolCall = { + tool: message.tool as string, + args: message.args as Record, + } + + let result: SandboxToolResult + + // Validate tool is supported + if (!isSupportedProgrammaticTool(toolCall.tool)) { + result = { + success: false, + error: `Tool '${toolCall.tool}' is not supported in programmatic tool calling`, + } + } else { + // Request approval from the user + const approved = await this.onToolApproval(toolCall) + + if (approved) { + // Execute the tool + result = await this.onToolExecute(toolCall) + } else { + result = { + success: false, + error: `Tool '${toolCall.tool}' was not approved by the user`, + } + } + } + + toolCalls.push({ call: toolCall, result }) + + // Send response back to the sandbox + const response = { + requestId: message.requestId, + result, + } + proc.stdin!.write(JSON.stringify(response) + "\n") + break + } + + case "complete": + // Code execution completed - handled in the close event + break + + default: + // Unknown message type, emit as stdout + this.emit("stdout", line) + break + } + } +} diff --git a/src/services/programmatic-tool-calling/ToolBridge.ts b/src/services/programmatic-tool-calling/ToolBridge.ts new file mode 100644 index 00000000000..3aae9407d9f --- /dev/null +++ b/src/services/programmatic-tool-calling/ToolBridge.ts @@ -0,0 +1,215 @@ +/** + * ToolBridge - Maps Roo Code tools to Python function definitions for the sandbox. + * + * This module generates the Python SDK code that gets mounted into the Docker container. + * It provides Python function stubs that communicate back to Roo Code via stdin/stdout + * IPC to execute actual tool operations. + */ + +import { SUPPORTED_PROGRAMMATIC_TOOLS, type SupportedProgrammaticTool } from "./types" + +/** + * Python function signatures for each supported tool. + * These are used to generate the Python SDK that runs inside the Docker sandbox. + */ +const TOOL_PYTHON_DEFINITIONS: Record = { + read_file: `def read_file(path: str, mode: str = "slice", offset: int = 1, limit: int = 2000) -> str: + """Read a file and return its contents. + + Args: + path: The file path relative to the workspace directory. + mode: Reading mode - 'slice' for sequential lines (default). + offset: 1-based line offset to start reading from (default: 1). + limit: Maximum number of lines to return (default: 2000). + + Returns: + The file contents as a string. + """ + return _call_tool("read_file", {"path": path, "mode": mode, "offset": offset, "limit": limit})`, + + write_to_file: `def write_to_file(path: str, content: str) -> str: + """Write content to a file. Creates the file if it doesn't exist, overwrites if it does. + + Args: + path: The file path relative to the workspace directory. + content: The complete file content to write. + + Returns: + A confirmation message. + """ + return _call_tool("write_to_file", {"path": path, "content": content})`, + + execute_command: `def execute_command(command: str, cwd: str = None) -> str: + """Execute a CLI command on the host system. + + Args: + command: The command to execute. + cwd: Optional working directory for the command. + + Returns: + The command output (stdout and stderr). + """ + args = {"command": command} + if cwd is not None: + args["cwd"] = cwd + return _call_tool("execute_command", args)`, + + search_files: `def search_files(path: str, regex: str, file_pattern: str = None) -> str: + """Search for a regex pattern across files in a directory. + + Args: + path: The directory path to search in (relative to workspace). + regex: The regular expression pattern to search for. + file_pattern: Optional glob pattern to filter files (e.g., '*.ts'). + + Returns: + Search results with context. + """ + args = {"path": path, "regex": regex} + if file_pattern is not None: + args["file_pattern"] = file_pattern + return _call_tool("search_files", args)`, + + list_files: `def list_files(path: str, recursive: bool = False) -> str: + """List files and directories in a specified directory. + + Args: + path: The directory path to list contents for (relative to workspace). + recursive: Whether to list files recursively (default: False). + + Returns: + A listing of files and directories. + """ + return _call_tool("list_files", {"path": path, "recursive": recursive})`, +} + +/** + * Generate the complete Python SDK code that will be mounted into the Docker sandbox. + * + * The SDK provides: + * - Tool function definitions (read_file, write_to_file, etc.) + * - IPC mechanism using stdin/stdout JSON messages + * - Error handling and result parsing + * + * @param enabledTools - Subset of tools to include in the SDK (defaults to all supported tools) + * @returns The Python SDK source code as a string + */ +export function generatePythonSDK(enabledTools?: SupportedProgrammaticTool[]): string { + const tools = enabledTools ?? [...SUPPORTED_PROGRAMMATIC_TOOLS] + + const toolDefinitions = tools.map((tool) => TOOL_PYTHON_DEFINITIONS[tool]).join("\n\n") + + return `""" +Roo Code Programmatic Tool Calling SDK + +This module provides Python functions for calling Roo Code tools from within +a sandboxed environment. Each function communicates with the host Roo Code +instance via JSON IPC over stdin/stdout. + +Auto-generated - do not edit manually. +""" + +import json +import sys +import uuid + + +def _call_tool(tool_name: str, args: dict) -> str: + """Internal: Send a tool call request to Roo Code and wait for the result. + + Communication protocol: + 1. Write a JSON request to stdout (flushed) + 2. Read a JSON response from stdin + 3. Parse and return the result or raise an error + """ + request_id = str(uuid.uuid4()) + + request = { + "type": "tool_request", + "requestId": request_id, + "tool": tool_name, + "args": args, + } + + # Send request via stdout + sys.stdout.write(json.dumps(request) + "\\n") + sys.stdout.flush() + + # Read response from stdin + response_line = sys.stdin.readline() + if not response_line: + raise RuntimeError(f"No response received for tool call: {tool_name}") + + response = json.loads(response_line) + + if response.get("requestId") != request_id: + raise RuntimeError( + f"Response ID mismatch: expected {request_id}, got {response.get('requestId')}" + ) + + result = response.get("result", {}) + + if not result.get("success", False): + error_msg = result.get("error", "Unknown error") + raise RuntimeError(f"Tool '{tool_name}' failed: {error_msg}") + + return result.get("result", "") + + +# Tool function definitions +${toolDefinitions} + + +# Export list of available tools +AVAILABLE_TOOLS = [${tools.map((t) => `"${t}"`).join(", ")}] +` +} + +/** + * Generate a Python wrapper script that imports the SDK and executes user code. + * + * @param userCode - The Python code generated by the model + * @returns Complete Python script to execute in the sandbox + */ +export function generateExecutionScript(userCode: string): string { + return `""" +Roo Code Programmatic Tool Calling - Execution Wrapper +""" +import json +import sys +import traceback + +# Import the tool SDK +from roo_tools import * + +# Signal ready +sys.stdout.write(json.dumps({"type": "ready"}) + "\\n") +sys.stdout.flush() + +# Wait for start signal +start_line = sys.stdin.readline() +start = json.loads(start_line) +if start.get("type") != "start": + sys.exit(1) + +try: + # Execute the model-generated code +${userCode + .split("\n") + .map((line) => ` ${line}`) + .join("\n")} + + # Signal completion + sys.stdout.write(json.dumps({"type": "complete", "success": True}) + "\\n") + sys.stdout.flush() +except Exception as e: + # Signal error + sys.stdout.write(json.dumps({ + "type": "complete", + "success": False, + "error": str(e), + "traceback": traceback.format_exc(), + }) + "\\n") + sys.stdout.flush() +` +} diff --git a/src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts b/src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts new file mode 100644 index 00000000000..67d39d2cf33 --- /dev/null +++ b/src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts @@ -0,0 +1,103 @@ +import { DockerSandboxExecutor } from "../DockerSandboxExecutor" +import type { SandboxToolCall, SandboxToolResult } from "../types" +import { DEFAULT_SANDBOX_CONFIG } from "../types" + +// Mock child_process +vi.mock("child_process", () => ({ + spawn: vi.fn(), +})) + +// Mock fs/promises +vi.mock("fs/promises", () => ({ + mkdtemp: vi.fn().mockResolvedValue("/tmp/roo-sandbox-test"), + writeFile: vi.fn().mockResolvedValue(undefined), + rm: vi.fn().mockResolvedValue(undefined), +})) + +describe("DockerSandboxExecutor", () => { + let executor: DockerSandboxExecutor + let mockApproval: ReturnType + let mockExecute: ReturnType + + beforeEach(() => { + mockApproval = vi.fn().mockResolvedValue(true) + mockExecute = vi.fn().mockResolvedValue({ success: true, result: "ok" } satisfies SandboxToolResult) + executor = new DockerSandboxExecutor({ + onToolApproval: mockApproval, + onToolExecute: mockExecute, + }) + }) + + describe("constructor", () => { + it("should use default config when no config is provided", () => { + const exec = new DockerSandboxExecutor({ + onToolApproval: mockApproval, + onToolExecute: mockExecute, + }) + // Executor is created without errors + expect(exec).toBeDefined() + }) + + it("should merge custom config with defaults", () => { + const exec = new DockerSandboxExecutor({ + config: { timeoutMs: 60_000, networkEnabled: true }, + onToolApproval: mockApproval, + onToolExecute: mockExecute, + }) + expect(exec).toBeDefined() + }) + }) + + describe("isDockerAvailable", () => { + it("should resolve to false when docker is not available", async () => { + const { spawn } = await import("child_process") + const mockSpawn = vi.mocked(spawn) + + // Mock spawn to simulate docker not found + mockSpawn.mockImplementation((() => { + const proc = { + on: vi.fn((event: string, cb: (...args: unknown[]) => void) => { + if (event === "error") { + setTimeout(() => cb(new Error("ENOENT")), 0) + } + return proc + }), + } + return proc + }) as unknown as typeof spawn) + + const result = await executor.isDockerAvailable() + expect(result).toBe(false) + }) + + it("should resolve to true when docker is available", async () => { + const { spawn } = await import("child_process") + const mockSpawn = vi.mocked(spawn) + + mockSpawn.mockImplementation((() => { + const proc = { + on: vi.fn((event: string, cb: (...args: unknown[]) => void) => { + if (event === "close") { + setTimeout(() => cb(0), 0) + } + return proc + }), + } + return proc + }) as unknown as typeof spawn) + + const result = await executor.isDockerAvailable() + expect(result).toBe(true) + }) + }) + + describe("buildDockerArgs (private, tested indirectly)", () => { + it("should construct proper Docker arguments", () => { + // We test this indirectly through the execute method + // but also verify the config structure + const config = { ...DEFAULT_SANDBOX_CONFIG } + expect(config.image).toBe("python:3.12-slim") + expect(config.networkEnabled).toBe(false) + }) + }) +}) diff --git a/src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts b/src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts new file mode 100644 index 00000000000..0b389ca7449 --- /dev/null +++ b/src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts @@ -0,0 +1,109 @@ +import { generatePythonSDK, generateExecutionScript } from "../ToolBridge" +import { SUPPORTED_PROGRAMMATIC_TOOLS } from "../types" + +describe("ToolBridge", () => { + describe("generatePythonSDK", () => { + it("should generate valid Python code", () => { + const sdk = generatePythonSDK() + expect(sdk).toContain("import json") + expect(sdk).toContain("import sys") + expect(sdk).toContain("import uuid") + }) + + it("should include the _call_tool helper function", () => { + const sdk = generatePythonSDK() + expect(sdk).toContain("def _call_tool(tool_name: str, args: dict) -> str:") + }) + + it("should include all supported tool functions by default", () => { + const sdk = generatePythonSDK() + expect(sdk).toContain("def read_file(") + expect(sdk).toContain("def write_to_file(") + expect(sdk).toContain("def execute_command(") + expect(sdk).toContain("def search_files(") + expect(sdk).toContain("def list_files(") + }) + + it("should include AVAILABLE_TOOLS list with all tools", () => { + const sdk = generatePythonSDK() + expect(sdk).toContain("AVAILABLE_TOOLS") + for (const tool of SUPPORTED_PROGRAMMATIC_TOOLS) { + expect(sdk).toContain(`"${tool}"`) + } + }) + + it("should generate SDK with only specified tools when filtered", () => { + const sdk = generatePythonSDK(["read_file", "list_files"]) + expect(sdk).toContain("def read_file(") + expect(sdk).toContain("def list_files(") + expect(sdk).not.toContain("def write_to_file(") + expect(sdk).not.toContain("def execute_command(") + expect(sdk).not.toContain("def search_files(") + }) + + it("should include the IPC protocol implementation", () => { + const sdk = generatePythonSDK() + // Should write JSON to stdout + expect(sdk).toContain("sys.stdout.write(json.dumps(request)") + // Should read JSON from stdin + expect(sdk).toContain("sys.stdin.readline()") + // Should use request IDs + expect(sdk).toContain("uuid.uuid4()") + }) + + it("should include proper docstrings for tool functions", () => { + const sdk = generatePythonSDK() + expect(sdk).toContain('"""Read a file and return its contents.') + expect(sdk).toContain('"""Write content to a file.') + expect(sdk).toContain('"""Execute a CLI command') + expect(sdk).toContain('"""Search for a regex pattern') + expect(sdk).toContain('"""List files and directories') + }) + }) + + describe("generateExecutionScript", () => { + it("should wrap user code in the execution template", () => { + const userCode = 'result = read_file("test.txt")\nprint(result)' + const script = generateExecutionScript(userCode) + + // Should import the tool SDK + expect(script).toContain("from roo_tools import *") + + // Should include the user code (indented) + expect(script).toContain(' result = read_file("test.txt")') + expect(script).toContain(" print(result)") + }) + + it("should send ready signal on startup", () => { + const script = generateExecutionScript("pass") + expect(script).toContain('"type": "ready"') + }) + + it("should wait for start signal before executing", () => { + const script = generateExecutionScript("pass") + expect(script).toContain('!= "start"') + }) + + it("should send completion signal with success status", () => { + const script = generateExecutionScript("pass") + expect(script).toContain('"type": "complete"') + expect(script).toContain('"success": True') + }) + + it("should handle exceptions and report errors", () => { + const script = generateExecutionScript("pass") + expect(script).toContain("except Exception as e") + expect(script).toContain('"success": False') + expect(script).toContain("traceback.format_exc()") + }) + + it("should properly indent multi-line user code", () => { + const userCode = "for i in range(3):\n print(i)\n read_file(f'file_{i}.txt')" + const script = generateExecutionScript(userCode) + + expect(script).toContain(" for i in range(3):") + expect(script).toContain(" print(i)") + expect(script).toContain(" read_file(f'file_{i}.txt')") + }) + }) +}) diff --git a/src/services/programmatic-tool-calling/__tests__/types.spec.ts b/src/services/programmatic-tool-calling/__tests__/types.spec.ts new file mode 100644 index 00000000000..63c026f2cbf --- /dev/null +++ b/src/services/programmatic-tool-calling/__tests__/types.spec.ts @@ -0,0 +1,57 @@ +import { SUPPORTED_PROGRAMMATIC_TOOLS, DEFAULT_SANDBOX_CONFIG, isSupportedProgrammaticTool } from "../types" + +describe("programmatic-tool-calling types", () => { + describe("SUPPORTED_PROGRAMMATIC_TOOLS", () => { + it("should include the initial subset of tools", () => { + expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("read_file") + expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("write_to_file") + expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("execute_command") + expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("search_files") + expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("list_files") + }) + + it("should have exactly 5 tools in the initial implementation", () => { + expect(SUPPORTED_PROGRAMMATIC_TOOLS).toHaveLength(5) + }) + }) + + describe("DEFAULT_SANDBOX_CONFIG", () => { + it("should use python:3.12-slim as the default image", () => { + expect(DEFAULT_SANDBOX_CONFIG.image).toBe("python:3.12-slim") + }) + + it("should have a 256MB memory limit", () => { + expect(DEFAULT_SANDBOX_CONFIG.memoryLimit).toBe(256 * 1024 * 1024) + }) + + it("should have a 0.5 CPU limit", () => { + expect(DEFAULT_SANDBOX_CONFIG.cpuLimit).toBe(0.5) + }) + + it("should have a 30 second timeout", () => { + expect(DEFAULT_SANDBOX_CONFIG.timeoutMs).toBe(30_000) + }) + + it("should have network disabled by default", () => { + expect(DEFAULT_SANDBOX_CONFIG.networkEnabled).toBe(false) + }) + }) + + describe("isSupportedProgrammaticTool", () => { + it("should return true for supported tools", () => { + expect(isSupportedProgrammaticTool("read_file")).toBe(true) + expect(isSupportedProgrammaticTool("write_to_file")).toBe(true) + expect(isSupportedProgrammaticTool("execute_command")).toBe(true) + expect(isSupportedProgrammaticTool("search_files")).toBe(true) + expect(isSupportedProgrammaticTool("list_files")).toBe(true) + }) + + it("should return false for unsupported tools", () => { + expect(isSupportedProgrammaticTool("apply_diff")).toBe(false) + expect(isSupportedProgrammaticTool("attempt_completion")).toBe(false) + expect(isSupportedProgrammaticTool("switch_mode")).toBe(false) + expect(isSupportedProgrammaticTool("nonexistent_tool")).toBe(false) + expect(isSupportedProgrammaticTool("")).toBe(false) + }) + }) +}) diff --git a/src/services/programmatic-tool-calling/index.ts b/src/services/programmatic-tool-calling/index.ts new file mode 100644 index 00000000000..0c6329774e7 --- /dev/null +++ b/src/services/programmatic-tool-calling/index.ts @@ -0,0 +1,25 @@ +/** + * Programmatic Tool Calling Service + * + * Enables models to generate Python code that calls multiple tools within a single + * sandboxed Docker execution, reducing round-trips to the model. + * + * Architecture: + * - DockerSandboxExecutor: Manages Docker container lifecycle and IPC + * - ToolBridge: Generates Python SDK code for tool functions + * - Types: Shared type definitions for the service + */ + +export { DockerSandboxExecutor } from "./DockerSandboxExecutor" +export type { ToolApprovalCallback, ToolExecutorCallback } from "./DockerSandboxExecutor" +export { generatePythonSDK, generateExecutionScript } from "./ToolBridge" +export { SUPPORTED_PROGRAMMATIC_TOOLS, DEFAULT_SANDBOX_CONFIG, isSupportedProgrammaticTool } from "./types" +export type { + CodeExecutionResult, + SandboxConfig, + SandboxToolCall, + SandboxToolResult, + ToolExecutionRequest, + ToolExecutionResponse, + SupportedProgrammaticTool, +} from "./types" diff --git a/src/services/programmatic-tool-calling/types.ts b/src/services/programmatic-tool-calling/types.ts new file mode 100644 index 00000000000..43c968dafb3 --- /dev/null +++ b/src/services/programmatic-tool-calling/types.ts @@ -0,0 +1,120 @@ +/** + * Types for the programmatic tool calling service. + * + * Programmatic tool calling allows models to generate Python code that calls + * tool functions within a sandboxed Docker container, enabling multiple tool + * invocations in a single round-trip to the model. + */ + +/** + * Represents a single tool call made from within the Python sandbox. + */ +export interface SandboxToolCall { + /** The tool name (e.g., "read_file", "write_to_file") */ + tool: string + /** The tool arguments as a JSON-serializable object */ + args: Record +} + +/** + * Result of a single tool call executed from the sandbox. + */ +export interface SandboxToolResult { + /** Whether the tool call succeeded */ + success: boolean + /** The tool result (string content) on success */ + result?: string + /** Error message on failure */ + error?: string +} + +/** + * A request from the sandbox to execute a tool. + * Sent from the Docker container to Roo Code via the IPC bridge. + */ +export interface ToolExecutionRequest { + /** Unique request ID for correlating responses */ + requestId: string + /** The tool to invoke */ + tool: string + /** Arguments for the tool */ + args: Record +} + +/** + * A response sent back to the sandbox after a tool is executed. + */ +export interface ToolExecutionResponse { + /** The request ID this response corresponds to */ + requestId: string + /** The result of the tool execution */ + result: SandboxToolResult +} + +/** + * Result of executing a Python code block in the sandbox. + */ +export interface CodeExecutionResult { + /** Whether the code execution completed successfully */ + success: boolean + /** Standard output from the code execution */ + stdout: string + /** Standard error from the code execution */ + stderr: string + /** The tool calls that were made during execution, with their results */ + toolCalls: Array<{ + call: SandboxToolCall + result: SandboxToolResult + }> + /** Error message if execution failed */ + error?: string + /** Execution duration in milliseconds */ + durationMs: number +} + +/** + * Configuration for the Docker sandbox. + */ +export interface SandboxConfig { + /** Docker image to use for the sandbox (default: "python:3.12-slim") */ + image: string + /** Memory limit in bytes (default: 256MB) */ + memoryLimit: number + /** CPU limit as a fraction of a CPU (default: 0.5) */ + cpuLimit: number + /** Execution timeout in milliseconds (default: 30000) */ + timeoutMs: number + /** Whether network access is allowed (default: false) */ + networkEnabled: boolean +} + +/** + * Default sandbox configuration values. + */ +export const DEFAULT_SANDBOX_CONFIG: SandboxConfig = { + image: "python:3.12-slim", + memoryLimit: 256 * 1024 * 1024, // 256MB + cpuLimit: 0.5, + timeoutMs: 30_000, + networkEnabled: false, +} + +/** + * The subset of tools supported in the initial programmatic tool calling implementation. + */ +export const SUPPORTED_PROGRAMMATIC_TOOLS = [ + "read_file", + "write_to_file", + "execute_command", + "search_files", + "list_files", +] as const + +export type SupportedProgrammaticTool = (typeof SUPPORTED_PROGRAMMATIC_TOOLS)[number] + +/** + * Check if a tool name is supported for programmatic tool calling. + */ +export function isSupportedProgrammaticTool(toolName: string): toolName is SupportedProgrammaticTool { + return (SUPPORTED_PROGRAMMATIC_TOOLS as readonly string[]).includes(toolName) +}