From ad968d16bfb9a8b6b9d514ae7fb45bdeadab6c51 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Tue, 17 Feb 2026 18:01:34 +0000
Subject: [PATCH] feat: add foundational support for programmatic tool calling

- Add supportsProgrammaticToolCalling flag to ModelInfo schema
- Add enableProgrammaticToolCalling setting to GlobalSettings
- Set the flag on supported Anthropic models (Claude Sonnet 4.5, Opus 4.x)
- Add ApiStreamCodeExecutionChunk type to the API stream
- Create programmatic-tool-calling service with:
  - DockerSandboxExecutor for isolated Python code execution
  - ToolBridge for generating Python SDK with tool function stubs
  - IPC protocol for tool call communication between sandbox and host
  - Support for read_file, write_to_file, execute_command, search_files, list_files
- Add comprehensive tests (27 passing)

Addresses #11506
---
 packages/types/src/global-settings.ts         |   9 +
 packages/types/src/model.ts                   |   7 +
 packages/types/src/providers/anthropic.ts     |   6 +
 src/api/transform/stream.ts                   |  14 +
 .../DockerSandboxExecutor.ts                  | 309 ++++++++++++++++++
 .../programmatic-tool-calling/ToolBridge.ts   | 215 ++++++++++++
 .../__tests__/DockerSandboxExecutor.spec.ts   | 103 ++++++
 .../__tests__/ToolBridge.spec.ts              | 109 ++++++
 .../__tests__/types.spec.ts                   |  57 ++++
 .../programmatic-tool-calling/index.ts        |  25 ++
 .../programmatic-tool-calling/types.ts        | 120 +++++++
 11 files changed, 974 insertions(+)
 create mode 100644 src/services/programmatic-tool-calling/DockerSandboxExecutor.ts
 create mode 100644 src/services/programmatic-tool-calling/ToolBridge.ts
 create mode 100644 src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts
 create mode 100644 src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts
 create mode 100644 src/services/programmatic-tool-calling/__tests__/types.spec.ts
 create mode 100644 src/services/programmatic-tool-calling/index.ts
 create mode 100644 src/services/programmatic-tool-calling/types.ts
diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts
index de3bd076616..bd03fb0ff86 100644
--- a/packages/types/src/global-settings.ts
+++ b/packages/types/src/global-settings.ts
@@ -231,6 +231,15 @@ export const globalSettingsSchema = z.object({
 	 * Tools in this list will be excluded from prompt generation and rejected at execution time.
 	 */
 	disabledTools: z.array(toolNamesSchema).optional(),
+
+	/**
+	 * Whether to enable programmatic tool calling.
+	 * When enabled, supported models can generate Python code that calls multiple tools
+	 * within a single sandboxed code execution, reducing round-trips to the model.
+	 * Tools still require individual approval before execution.
+	 * @default false
+	 */
+	enableProgrammaticToolCalling: z.boolean().optional(),
 })
 
 export type GlobalSettings = z.infer<typeof globalSettingsSchema>
diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
index 95e9095a89e..04c1b413308 100644
--- a/packages/types/src/model.ts
+++ b/packages/types/src/model.ts
@@ -117,6 +117,13 @@ export const modelInfoSchema = z.object({
 	// These tools will be added if they belong to an allowed group in the current mode
 	// Cannot force-add tools from groups the mode doesn't allow
 	includedTools: z.array(z.string()).optional(),
+	/**
+	 * Whether the model supports programmatic tool calling.
+	 * When true, the model can generate Python code that calls tool functions,
+	 * enabling multiple tool calls within a single code execution in a sandbox.
+	 * Currently supported by Anthropic Claude models.
+	 */
+	supportsProgrammaticToolCalling: z.boolean().optional(),
 	/**
 	 * Service tiers with pricing information.
 	 * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts
index 62e377c7e54..4807e709336 100644
--- a/packages/types/src/providers/anthropic.ts
+++ b/packages/types/src/providers/anthropic.ts
@@ -17,6 +17,7 @@ export const anthropicModels = {
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
 		cacheReadsPrice: 0.3, // $0.30 per million tokens
 		supportsReasoningBudget: true,
+		supportsProgrammaticToolCalling: true,
 		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
 		tiers: [
 			{
@@ -38,6 +39,7 @@ export const anthropicModels = {
 		cacheWritesPrice: 3.75, // $3.75 per million tokens
 		cacheReadsPrice: 0.3, // $0.30 per million tokens
 		supportsReasoningBudget: true,
+		supportsProgrammaticToolCalling: true,
 		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
 		tiers: [
 			{
@@ -59,6 +61,7 @@ export const anthropicModels = {
 		cacheWritesPrice: 6.25, // $6.25 per million tokens
 		cacheReadsPrice: 0.5, // $0.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsProgrammaticToolCalling: true,
 		// Tiered pricing for extended context (requires beta flag)
 		tiers: [
 			{
@@ -80,6 +83,7 @@ export const anthropicModels = {
 		cacheWritesPrice: 6.25, // $6.25 per million tokens
 		cacheReadsPrice: 0.5, // $0.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsProgrammaticToolCalling: true,
 	},
 	"claude-opus-4-1-20250805": {
 		maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
@@ -91,6 +95,7 @@ export const anthropicModels = {
 		cacheWritesPrice: 18.75, // $18.75 per million tokens
 		cacheReadsPrice: 1.5, // $1.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsProgrammaticToolCalling: true,
 	},
 	"claude-opus-4-20250514": {
 		maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
@@ -102,6 +107,7 @@ export const anthropicModels = {
 		cacheWritesPrice: 18.75, // $18.75 per million tokens
 		cacheReadsPrice: 1.5, // $1.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsProgrammaticToolCalling: true,
 	},
 	"claude-3-7-sonnet-20250219:thinking": {
 		maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k.
diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts
index 960ebbe770d..8fa39cb770a 100644
--- a/src/api/transform/stream.ts
+++ b/src/api/transform/stream.ts
@@ -11,6 +11,7 @@ export type ApiStreamChunk =
 	| ApiStreamToolCallDeltaChunk
 	| ApiStreamToolCallEndChunk
 	| ApiStreamToolCallPartialChunk
+	| ApiStreamCodeExecutionChunk
 	| ApiStreamError
 
 export interface ApiStreamError {
@@ -107,6 +108,19 @@ export interface ApiStreamToolCallPartialChunk {
 	arguments?: string
 }
 
+/**
+ * Code execution chunk from programmatic tool calling.
+ * The model generates Python code that calls tool functions in a sandboxed environment.
+ * This chunk contains the code to execute and, once executed, the results.
+ */
+export interface ApiStreamCodeExecutionChunk {
+	type: "code_execution"
+	/** Unique identifier for this code execution block */
+	id: string
+	/** The Python code generated by the model */
+	code: string
+}
+
 export interface GroundingSource {
 	title: string
 	url: string
diff --git a/src/services/programmatic-tool-calling/DockerSandboxExecutor.ts b/src/services/programmatic-tool-calling/DockerSandboxExecutor.ts
new file mode 100644
index 00000000000..8504223e05a
--- /dev/null
+++ b/src/services/programmatic-tool-calling/DockerSandboxExecutor.ts
@@ -0,0 +1,309 @@
+/**
+ * DockerSandboxExecutor - Executes Python code in an isolated Docker container.
+ *
+ * This executor manages the lifecycle of Docker containers used for programmatic
+ * tool calling. It handles:
+ * - Container creation with resource limits
+ * - Mounting the Python tool SDK
+ * - IPC communication between the container and Roo Code
+ * - Tool call routing and approval
+ * - Execution timeout and cleanup
+ */
+
+import { spawn } from "child_process"
+import { EventEmitter } from "events"
+import * as fs from "fs/promises"
+import * as path from "path"
+import * as os from "os"
+
+import type { CodeExecutionResult, SandboxConfig, SandboxToolCall, SandboxToolResult } from "./types"
+import { DEFAULT_SANDBOX_CONFIG, isSupportedProgrammaticTool } from "./types"
+import { generatePythonSDK, generateExecutionScript } from "./ToolBridge"
+
+/**
+ * Callback type for requesting tool approval from the user.
+ * Returns true if the tool call is approved, false otherwise.
+ */
+export type ToolApprovalCallback = (toolCall: SandboxToolCall) => Promise<boolean>
+
+/**
+ * Callback type for executing an approved tool call.
+ * Returns the tool result.
+ */
+export type ToolExecutorCallback = (toolCall: SandboxToolCall) => Promise<SandboxToolResult>
+
+interface ExecutorOptions {
+	config?: Partial<SandboxConfig>
+	onToolApproval: ToolApprovalCallback
+	onToolExecute: ToolExecutorCallback
+}
+
+/**
+ * DockerSandboxExecutor manages the execution of Python code in a Docker container
+ * with IPC-based tool calling support.
+ */
+export class DockerSandboxExecutor extends EventEmitter {
+	private config: SandboxConfig
+	private onToolApproval: ToolApprovalCallback
+	private onToolExecute: ToolExecutorCallback
+
+	constructor(options: ExecutorOptions) {
+		super()
+		this.config = { ...DEFAULT_SANDBOX_CONFIG, ...options.config }
+		this.onToolApproval = options.onToolApproval
+		this.onToolExecute = options.onToolExecute
+	}
+
+	/**
+	 * Check if Docker is available on the system.
+	 */
+	async isDockerAvailable(): Promise<boolean> {
+		return new Promise((resolve) => {
+			const proc = spawn("docker", ["info"], {
+				stdio: ["ignore", "ignore", "ignore"],
+			})
+			proc.on("close", (code) => resolve(code === 0))
+			proc.on("error", () => resolve(false))
+		})
+	}
+
+	/**
+	 * Execute Python code in a Docker sandbox with tool calling support.
+	 *
+	 * @param code - The Python code to execute (generated by the model)
+	 * @returns The execution result including stdout, stderr, and tool call history
+	 */
+	async execute(code: string): Promise<CodeExecutionResult> {
+		const startTime = Date.now()
+		const toolCalls: Array<{ call: SandboxToolCall; result: SandboxToolResult }> = []
+
+		// Create a temporary directory for mounting the SDK and script
+		const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "roo-sandbox-"))
+
+		try {
+			// Write the Python SDK module
+			const sdkCode = generatePythonSDK()
+			await fs.writeFile(path.join(tmpDir, "roo_tools.py"), sdkCode, "utf-8")
+
+			// Write the execution script
+			const scriptCode = generateExecutionScript(code)
+			await fs.writeFile(path.join(tmpDir, "run.py"), scriptCode, "utf-8")
+
+			// Build Docker run arguments
+			const dockerArgs = this.buildDockerArgs(tmpDir)
+
+			return await new Promise<CodeExecutionResult>((resolve) => {
+				let stdout = ""
+				let stderr = ""
+				let resolved = false
+
+				const proc = spawn("docker", dockerArgs, {
+					stdio: ["pipe", "pipe", "pipe"],
+				})
+
+				// Set up timeout
+				const timeoutId = setTimeout(() => {
+					if (!resolved) {
+						resolved = true
+						proc.kill("SIGKILL")
+						resolve({
+							success: false,
+							stdout,
+							stderr,
+							toolCalls,
+							error: `Execution timed out after ${this.config.timeoutMs}ms`,
+							durationMs: Date.now() - startTime,
+						})
+					}
+				}, this.config.timeoutMs)
+
+				// Handle stdout (IPC messages from the sandbox)
+				let stdoutBuffer = ""
+				proc.stdout!.on("data", (data: Buffer) => {
+					stdoutBuffer += data.toString()
+
+					// Process complete lines (JSON messages)
+					const lines = stdoutBuffer.split("\n")
+					stdoutBuffer = lines.pop() ?? "" // Keep incomplete line in buffer
+
+					for (const line of lines) {
+						if (!line.trim()) {
+							continue
+						}
+						this.handleSandboxMessage(line, proc, toolCalls).catch((err) => {
+							stderr += `IPC error: ${err.message}\n`
+						})
+					}
+				})
+
+				// Collect stderr
+				proc.stderr!.on("data", (data: Buffer) => {
+					stderr += data.toString()
+				})
+
+				// Handle process exit
+				proc.on("close", (exitCode) => {
+					clearTimeout(timeoutId)
+					if (!resolved) {
+						resolved = true
+
+						// Process any remaining stdout
+						if (stdoutBuffer.trim()) {
+							// Try to parse last message as completion
+							try {
+								const msg = JSON.parse(stdoutBuffer)
+								if (msg.type === "complete") {
+									resolve({
+										success: msg.success ?? false,
+										stdout,
+										stderr,
+										toolCalls,
+										error: msg.error,
+										durationMs: Date.now() - startTime,
+									})
+									return
+								}
+							} catch {
+								// Not valid JSON, treat as regular output
+								stdout += stdoutBuffer
+							}
+						}
+
+						resolve({
+							success: exitCode === 0,
+							stdout,
+							stderr,
+							toolCalls,
+							error: exitCode !== 0 ? `Process exited with code ${exitCode}` : undefined,
+							durationMs: Date.now() - startTime,
+						})
+					}
+				})
+
+				proc.on("error", (err) => {
+					clearTimeout(timeoutId)
+					if (!resolved) {
+						resolved = true
+						resolve({
+							success: false,
+							stdout,
+							stderr,
+							toolCalls,
+							error: `Failed to start Docker container: ${err.message}`,
+							durationMs: Date.now() - startTime,
+						})
+					}
+				})
+			})
+		} finally {
+			// Clean up temporary directory
+			await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {
+				// Ignore cleanup errors
+			})
+		}
+	}
+
+	/**
+	 * Build the Docker run command arguments.
+	 */
+	private buildDockerArgs(tmpDir: string): string[] {
+		const args = [
+			"run",
+			"--rm",
+			// Resource limits
+			`--memory=${this.config.memoryLimit}`,
+			`--cpus=${this.config.cpuLimit}`,
+			// Security: read-only root filesystem, no new privileges
+			"--read-only",
+			"--security-opt=no-new-privileges",
+			// Disable network unless explicitly enabled
+			...(this.config.networkEnabled ? [] : ["--network=none"]),
+			// Mount the SDK and script as read-only
+			`-v=${tmpDir}:/workspace:ro`,
+			// Working directory
+			"-w=/workspace",
+			// Interactive mode for stdin/stdout IPC
+			"-i",
+			// Image
+			this.config.image,
+			// Command
+			"python",
+			"/workspace/run.py",
+		]
+		return args
+	}
+
+	/**
+	 * Handle a JSON message from the sandbox process.
+	 */
+	private async handleSandboxMessage(
+		line: string,
+		proc: ReturnType<typeof spawn>,
+		toolCalls: Array<{ call: SandboxToolCall; result: SandboxToolResult }>,
+	): Promise<void> {
+		let message: Record<string, unknown>
+		try {
+			message = JSON.parse(line)
+		} catch {
+			// Not a JSON message, treat as regular stdout output
+			this.emit("stdout", line)
+			return
+		}
+
+		switch (message.type) {
+			case "ready":
+				// Container is ready, send start signal
+				proc.stdin!.write(JSON.stringify({ type: "start" }) + "\n")
+				break
+
+			case "tool_request": {
+				const toolCall: SandboxToolCall = {
+					tool: message.tool as string,
+					args: message.args as Record<string, unknown>,
+				}
+
+				let result: SandboxToolResult
+
+				// Validate tool is supported
+				if (!isSupportedProgrammaticTool(toolCall.tool)) {
+					result = {
+						success: false,
+						error: `Tool '${toolCall.tool}' is not supported in programmatic tool calling`,
+					}
+				} else {
+					// Request approval from the user
+					const approved = await this.onToolApproval(toolCall)
+
+					if (approved) {
+						// Execute the tool
+						result = await this.onToolExecute(toolCall)
+					} else {
+						result = {
+							success: false,
+							error: `Tool '${toolCall.tool}' was not approved by the user`,
+						}
+					}
+				}
+
+				toolCalls.push({ call: toolCall, result })
+
+				// Send response back to the sandbox
+				const response = {
+					requestId: message.requestId,
+					result,
+				}
+				proc.stdin!.write(JSON.stringify(response) + "\n")
+				break
+			}
+
+			case "complete":
+				// Code execution completed - handled in the close event
+				break
+
+			default:
+				// Unknown message type, emit as stdout
+				this.emit("stdout", line)
+				break
+		}
+	}
+}
diff --git a/src/services/programmatic-tool-calling/ToolBridge.ts b/src/services/programmatic-tool-calling/ToolBridge.ts
new file mode 100644
index 00000000000..3aae9407d9f
--- /dev/null
+++ b/src/services/programmatic-tool-calling/ToolBridge.ts
@@ -0,0 +1,215 @@
+/**
+ * ToolBridge - Maps Roo Code tools to Python function definitions for the sandbox.
+ *
+ * This module generates the Python SDK code that gets mounted into the Docker container.
+ * It provides Python function stubs that communicate back to Roo Code via stdin/stdout
+ * IPC to execute actual tool operations.
+ */
+
+import { SUPPORTED_PROGRAMMATIC_TOOLS, type SupportedProgrammaticTool } from "./types"
+
+/**
+ * Python function signatures for each supported tool.
+ * These are used to generate the Python SDK that runs inside the Docker sandbox.
+ */
+const TOOL_PYTHON_DEFINITIONS: Record<SupportedProgrammaticTool, string> = {
+	read_file: `def read_file(path: str, mode: str = "slice", offset: int = 1, limit: int = 2000) -> str:
+    """Read a file and return its contents.
+    
+    Args:
+        path: The file path relative to the workspace directory.
+        mode: Reading mode - 'slice' for sequential lines (default).
+        offset: 1-based line offset to start reading from (default: 1).
+        limit: Maximum number of lines to return (default: 2000).
+    
+    Returns:
+        The file contents as a string.
+    """
+    return _call_tool("read_file", {"path": path, "mode": mode, "offset": offset, "limit": limit})`,
+
+	write_to_file: `def write_to_file(path: str, content: str) -> str:
+    """Write content to a file. Creates the file if it doesn't exist, overwrites if it does.
+    
+    Args:
+        path: The file path relative to the workspace directory.
+        content: The complete file content to write.
+    
+    Returns:
+        A confirmation message.
+    """
+    return _call_tool("write_to_file", {"path": path, "content": content})`,
+
+	execute_command: `def execute_command(command: str, cwd: str = None) -> str:
+    """Execute a CLI command on the host system.
+    
+    Args:
+        command: The command to execute.
+        cwd: Optional working directory for the command.
+    
+    Returns:
+        The command output (stdout and stderr).
+    """
+    args = {"command": command}
+    if cwd is not None:
+        args["cwd"] = cwd
+    return _call_tool("execute_command", args)`,
+
+	search_files: `def search_files(path: str, regex: str, file_pattern: str = None) -> str:
+    """Search for a regex pattern across files in a directory.
+    
+    Args:
+        path: The directory path to search in (relative to workspace).
+        regex: The regular expression pattern to search for.
+        file_pattern: Optional glob pattern to filter files (e.g., '*.ts').
+    
+    Returns:
+        Search results with context.
+    """
+    args = {"path": path, "regex": regex}
+    if file_pattern is not None:
+        args["file_pattern"] = file_pattern
+    return _call_tool("search_files", args)`,
+
+	list_files: `def list_files(path: str, recursive: bool = False) -> str:
+    """List files and directories in a specified directory.
+    
+    Args:
+        path: The directory path to list contents for (relative to workspace).
+        recursive: Whether to list files recursively (default: False).
+    
+    Returns:
+        A listing of files and directories.
+    """
+    return _call_tool("list_files", {"path": path, "recursive": recursive})`,
+}
+
+/**
+ * Generate the complete Python SDK code that will be mounted into the Docker sandbox.
+ *
+ * The SDK provides:
+ * - Tool function definitions (read_file, write_to_file, etc.)
+ * - IPC mechanism using stdin/stdout JSON messages
+ * - Error handling and result parsing
+ *
+ * @param enabledTools - Subset of tools to include in the SDK (defaults to all supported tools)
+ * @returns The Python SDK source code as a string
+ */
+export function generatePythonSDK(enabledTools?: SupportedProgrammaticTool[]): string {
+	const tools = enabledTools ?? [...SUPPORTED_PROGRAMMATIC_TOOLS]
+
+	const toolDefinitions = tools.map((tool) => TOOL_PYTHON_DEFINITIONS[tool]).join("\n\n")
+
+	return `"""
+Roo Code Programmatic Tool Calling SDK
+
+This module provides Python functions for calling Roo Code tools from within
+a sandboxed environment. Each function communicates with the host Roo Code
+instance via JSON IPC over stdin/stdout.
+
+Auto-generated - do not edit manually.
+"""
+
+import json
+import sys
+import uuid
+
+
+def _call_tool(tool_name: str, args: dict) -> str:
+    """Internal: Send a tool call request to Roo Code and wait for the result.
+    
+    Communication protocol:
+    1. Write a JSON request to stdout (flushed)
+    2. Read a JSON response from stdin
+    3. Parse and return the result or raise an error
+    """
+    request_id = str(uuid.uuid4())
+    
+    request = {
+        "type": "tool_request",
+        "requestId": request_id,
+        "tool": tool_name,
+        "args": args,
+    }
+    
+    # Send request via stdout
+    sys.stdout.write(json.dumps(request) + "\\n")
+    sys.stdout.flush()
+    
+    # Read response from stdin
+    response_line = sys.stdin.readline()
+    if not response_line:
+        raise RuntimeError(f"No response received for tool call: {tool_name}")
+    
+    response = json.loads(response_line)
+    
+    if response.get("requestId") != request_id:
+        raise RuntimeError(
+            f"Response ID mismatch: expected {request_id}, got {response.get('requestId')}"
+        )
+    
+    result = response.get("result", {})
+    
+    if not result.get("success", False):
+        error_msg = result.get("error", "Unknown error")
+        raise RuntimeError(f"Tool '{tool_name}' failed: {error_msg}")
+    
+    return result.get("result", "")
+
+
+# Tool function definitions
+${toolDefinitions}
+
+
+# Export list of available tools
+AVAILABLE_TOOLS = [${tools.map((t) => `"${t}"`).join(", ")}]
+`
+}
+
+/**
+ * Generate a Python wrapper script that imports the SDK and executes user code.
+ *
+ * @param userCode - The Python code generated by the model
+ * @returns Complete Python script to execute in the sandbox
+ */
+export function generateExecutionScript(userCode: string): string {
+	return `"""
+Roo Code Programmatic Tool Calling - Execution Wrapper
+"""
+import json
+import sys
+import traceback
+
+# Import the tool SDK
+from roo_tools import *
+
+# Signal ready
+sys.stdout.write(json.dumps({"type": "ready"}) + "\\n")
+sys.stdout.flush()
+
+# Wait for start signal
+start_line = sys.stdin.readline()
+start = json.loads(start_line)
+if start.get("type") != "start":
+    sys.exit(1)
+
+try:
+    # Execute the model-generated code
+${userCode
+	.split("\n")
+	.map((line) => `    ${line}`)
+	.join("\n")}
+    
+    # Signal completion
+    sys.stdout.write(json.dumps({"type": "complete", "success": True}) + "\\n")
+    sys.stdout.flush()
+except Exception as e:
+    # Signal error
+    sys.stdout.write(json.dumps({
+        "type": "complete",
+        "success": False,
+        "error": str(e),
+        "traceback": traceback.format_exc(),
+    }) + "\\n")
+    sys.stdout.flush()
+`
+}
diff --git a/src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts b/src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts
new file mode 100644
index 00000000000..67d39d2cf33
--- /dev/null
+++ b/src/services/programmatic-tool-calling/__tests__/DockerSandboxExecutor.spec.ts
@@ -0,0 +1,103 @@
+import { DockerSandboxExecutor } from "../DockerSandboxExecutor"
+import type { SandboxToolCall, SandboxToolResult } from "../types"
+import { DEFAULT_SANDBOX_CONFIG } from "../types"
+
+// Mock child_process
+vi.mock("child_process", () => ({
+	spawn: vi.fn(),
+}))
+
+// Mock fs/promises
+vi.mock("fs/promises", () => ({
+	mkdtemp: vi.fn().mockResolvedValue("/tmp/roo-sandbox-test"),
+	writeFile: vi.fn().mockResolvedValue(undefined),
+	rm: vi.fn().mockResolvedValue(undefined),
+}))
+
+describe("DockerSandboxExecutor", () => {
+	let executor: DockerSandboxExecutor
+	let mockApproval: ReturnType<typeof vi.fn>
+	let mockExecute: ReturnType<typeof vi.fn>
+
+	beforeEach(() => {
+		mockApproval = vi.fn().mockResolvedValue(true)
+		mockExecute = vi.fn().mockResolvedValue({ success: true, result: "ok" } satisfies SandboxToolResult)
+		executor = new DockerSandboxExecutor({
+			onToolApproval: mockApproval,
+			onToolExecute: mockExecute,
+		})
+	})
+
+	describe("constructor", () => {
+		it("should use default config when no config is provided", () => {
+			const exec = new DockerSandboxExecutor({
+				onToolApproval: mockApproval,
+				onToolExecute: mockExecute,
+			})
+			// Executor is created without errors
+			expect(exec).toBeDefined()
+		})
+
+		it("should merge custom config with defaults", () => {
+			const exec = new DockerSandboxExecutor({
+				config: { timeoutMs: 60_000, networkEnabled: true },
+				onToolApproval: mockApproval,
+				onToolExecute: mockExecute,
+			})
+			expect(exec).toBeDefined()
+		})
+	})
+
+	describe("isDockerAvailable", () => {
+		it("should resolve to false when docker is not available", async () => {
+			const { spawn } = await import("child_process")
+			const mockSpawn = vi.mocked(spawn)
+
+			// Mock spawn to simulate docker not found
+			mockSpawn.mockImplementation((() => {
+				const proc = {
+					on: vi.fn((event: string, cb: (...args: unknown[]) => void) => {
+						if (event === "error") {
+							setTimeout(() => cb(new Error("ENOENT")), 0)
+						}
+						return proc
+					}),
+				}
+				return proc
+			}) as unknown as typeof spawn)
+
+			const result = await executor.isDockerAvailable()
+			expect(result).toBe(false)
+		})
+
+		it("should resolve to true when docker is available", async () => {
+			const { spawn } = await import("child_process")
+			const mockSpawn = vi.mocked(spawn)
+
+			mockSpawn.mockImplementation((() => {
+				const proc = {
+					on: vi.fn((event: string, cb: (...args: unknown[]) => void) => {
+						if (event === "close") {
+							setTimeout(() => cb(0), 0)
+						}
+						return proc
+					}),
+				}
+				return proc
+			}) as unknown as typeof spawn)
+
+			const result = await executor.isDockerAvailable()
+			expect(result).toBe(true)
+		})
+	})
+
+	describe("buildDockerArgs (private, tested indirectly)", () => {
+		it("should construct proper Docker arguments", () => {
+			// We test this indirectly through the execute method
+			// but also verify the config structure
+			const config = { ...DEFAULT_SANDBOX_CONFIG }
+			expect(config.image).toBe("python:3.12-slim")
+			expect(config.networkEnabled).toBe(false)
+		})
+	})
+})
diff --git a/src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts b/src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts
new file mode 100644
index 00000000000..0b389ca7449
--- /dev/null
+++ b/src/services/programmatic-tool-calling/__tests__/ToolBridge.spec.ts
@@ -0,0 +1,109 @@
+import { generatePythonSDK, generateExecutionScript } from "../ToolBridge"
+import { SUPPORTED_PROGRAMMATIC_TOOLS } from "../types"
+
+describe("ToolBridge", () => {
+	describe("generatePythonSDK", () => {
+		it("should generate valid Python code", () => {
+			const sdk = generatePythonSDK()
+			expect(sdk).toContain("import json")
+			expect(sdk).toContain("import sys")
+			expect(sdk).toContain("import uuid")
+		})
+
+		it("should include the _call_tool helper function", () => {
+			const sdk = generatePythonSDK()
+			expect(sdk).toContain("def _call_tool(tool_name: str, args: dict) -> str:")
+		})
+
+		it("should include all supported tool functions by default", () => {
+			const sdk = generatePythonSDK()
+			expect(sdk).toContain("def read_file(")
+			expect(sdk).toContain("def write_to_file(")
+			expect(sdk).toContain("def execute_command(")
+			expect(sdk).toContain("def search_files(")
+			expect(sdk).toContain("def list_files(")
+		})
+
+		it("should include AVAILABLE_TOOLS list with all tools", () => {
+			const sdk = generatePythonSDK()
+			expect(sdk).toContain("AVAILABLE_TOOLS")
+			for (const tool of SUPPORTED_PROGRAMMATIC_TOOLS) {
+				expect(sdk).toContain(`"${tool}"`)
+			}
+		})
+
+		it("should generate SDK with only specified tools when filtered", () => {
+			const sdk = generatePythonSDK(["read_file", "list_files"])
+			expect(sdk).toContain("def read_file(")
+			expect(sdk).toContain("def list_files(")
+			expect(sdk).not.toContain("def write_to_file(")
+			expect(sdk).not.toContain("def execute_command(")
+			expect(sdk).not.toContain("def search_files(")
+		})
+
+		it("should include the IPC protocol implementation", () => {
+			const sdk = generatePythonSDK()
+			// Should write JSON to stdout
+			expect(sdk).toContain("sys.stdout.write(json.dumps(request)")
+			// Should read JSON from stdin
+			expect(sdk).toContain("sys.stdin.readline()")
+			// Should use request IDs
+			expect(sdk).toContain("uuid.uuid4()")
+		})
+
+		it("should include proper docstrings for tool functions", () => {
+			const sdk = generatePythonSDK()
+			expect(sdk).toContain('"""Read a file and return its contents.')
+			expect(sdk).toContain('"""Write content to a file.')
+			expect(sdk).toContain('"""Execute a CLI command')
+			expect(sdk).toContain('"""Search for a regex pattern')
+			expect(sdk).toContain('"""List files and directories')
+		})
+	})
+
+	describe("generateExecutionScript", () => {
+		it("should wrap user code in the execution template", () => {
+			const userCode = 'result = read_file("test.txt")\nprint(result)'
+			const script = generateExecutionScript(userCode)
+
+			// Should import the tool SDK
+			expect(script).toContain("from roo_tools import *")
+
+			// Should include the user code (indented)
+			expect(script).toContain('    result = read_file("test.txt")')
+			expect(script).toContain("    print(result)")
+		})
+
+		it("should send ready signal on startup", () => {
+			const script = generateExecutionScript("pass")
+			expect(script).toContain('"type": "ready"')
+		})
+
+		it("should wait for start signal before executing", () => {
+			const script = generateExecutionScript("pass")
+			expect(script).toContain('!= "start"')
+		})
+
+		it("should send completion signal with success status", () => {
+			const script = generateExecutionScript("pass")
+			expect(script).toContain('"type": "complete"')
+			expect(script).toContain('"success": True')
+		})
+
+		it("should handle exceptions and report errors", () => {
+			const script = generateExecutionScript("pass")
+			expect(script).toContain("except Exception as e")
+			expect(script).toContain('"success": False')
+			expect(script).toContain("traceback.format_exc()")
+		})
+
+		it("should properly indent multi-line user code", () => {
+			const userCode = "for i in range(3):\n    print(i)\n    read_file(f'file_{i}.txt')"
+			const script = generateExecutionScript(userCode)
+
+			expect(script).toContain("    for i in range(3):")
+			expect(script).toContain("        print(i)")
+			expect(script).toContain("        read_file(f'file_{i}.txt')")
+		})
+	})
+})
diff --git a/src/services/programmatic-tool-calling/__tests__/types.spec.ts b/src/services/programmatic-tool-calling/__tests__/types.spec.ts
new file mode 100644
index 00000000000..63c026f2cbf
--- /dev/null
+++ b/src/services/programmatic-tool-calling/__tests__/types.spec.ts
@@ -0,0 +1,57 @@
+import { SUPPORTED_PROGRAMMATIC_TOOLS, DEFAULT_SANDBOX_CONFIG, isSupportedProgrammaticTool } from "../types"
+
+describe("programmatic-tool-calling types", () => {
+	describe("SUPPORTED_PROGRAMMATIC_TOOLS", () => {
+		it("should include the initial subset of tools", () => {
+			expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("read_file")
+			expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("write_to_file")
+			expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("execute_command")
+			expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("search_files")
+			expect(SUPPORTED_PROGRAMMATIC_TOOLS).toContain("list_files")
+		})
+
+		it("should have exactly 5 tools in the initial implementation", () => {
+			expect(SUPPORTED_PROGRAMMATIC_TOOLS).toHaveLength(5)
+		})
+	})
+
+	describe("DEFAULT_SANDBOX_CONFIG", () => {
+		it("should use python:3.12-slim as the default image", () => {
+			expect(DEFAULT_SANDBOX_CONFIG.image).toBe("python:3.12-slim")
+		})
+
+		it("should have a 256MB memory limit", () => {
+			expect(DEFAULT_SANDBOX_CONFIG.memoryLimit).toBe(256 * 1024 * 1024)
+		})
+
+		it("should have a 0.5 CPU limit", () => {
+			expect(DEFAULT_SANDBOX_CONFIG.cpuLimit).toBe(0.5)
+		})
+
+		it("should have a 30 second timeout", () => {
+			expect(DEFAULT_SANDBOX_CONFIG.timeoutMs).toBe(30_000)
+		})
+
+		it("should have network disabled by default", () => {
+			expect(DEFAULT_SANDBOX_CONFIG.networkEnabled).toBe(false)
+		})
+	})
+
+	describe("isSupportedProgrammaticTool", () => {
+		it("should return true for supported tools", () => {
+			expect(isSupportedProgrammaticTool("read_file")).toBe(true)
+			expect(isSupportedProgrammaticTool("write_to_file")).toBe(true)
+			expect(isSupportedProgrammaticTool("execute_command")).toBe(true)
+			expect(isSupportedProgrammaticTool("search_files")).toBe(true)
+			expect(isSupportedProgrammaticTool("list_files")).toBe(true)
+		})
+
+		it("should return false for unsupported tools", () => {
+			expect(isSupportedProgrammaticTool("apply_diff")).toBe(false)
+			expect(isSupportedProgrammaticTool("attempt_completion")).toBe(false)
+			expect(isSupportedProgrammaticTool("switch_mode")).toBe(false)
+			expect(isSupportedProgrammaticTool("nonexistent_tool")).toBe(false)
+			expect(isSupportedProgrammaticTool("")).toBe(false)
+		})
+	})
+})
diff --git a/src/services/programmatic-tool-calling/index.ts b/src/services/programmatic-tool-calling/index.ts
new file mode 100644
index 00000000000..0c6329774e7
--- /dev/null
+++ b/src/services/programmatic-tool-calling/index.ts
@@ -0,0 +1,25 @@
+/**
+ * Programmatic Tool Calling Service
+ *
+ * Enables models to generate Python code that calls multiple tools within a single
+ * sandboxed Docker execution, reducing round-trips to the model.
+ *
+ * Architecture:
+ * - DockerSandboxExecutor: Manages Docker container lifecycle and IPC
+ * - ToolBridge: Generates Python SDK code for tool functions
+ * - Types: Shared type definitions for the service
+ */
+
+export { DockerSandboxExecutor } from "./DockerSandboxExecutor"
+export type { ToolApprovalCallback, ToolExecutorCallback } from "./DockerSandboxExecutor"
+export { generatePythonSDK, generateExecutionScript } from "./ToolBridge"
+export { SUPPORTED_PROGRAMMATIC_TOOLS, DEFAULT_SANDBOX_CONFIG, isSupportedProgrammaticTool } from "./types"
+export type {
+	CodeExecutionResult,
+	SandboxConfig,
+	SandboxToolCall,
+	SandboxToolResult,
+	ToolExecutionRequest,
+	ToolExecutionResponse,
+	SupportedProgrammaticTool,
+} from "./types"
diff --git a/src/services/programmatic-tool-calling/types.ts b/src/services/programmatic-tool-calling/types.ts
new file mode 100644
index 00000000000..43c968dafb3
--- /dev/null
+++ b/src/services/programmatic-tool-calling/types.ts
@@ -0,0 +1,120 @@
+/**
+ * Types for the programmatic tool calling service.
+ *
+ * Programmatic tool calling allows models to generate Python code that calls
+ * tool functions within a sandboxed Docker container, enabling multiple tool
+ * invocations in a single round-trip to the model.
+ */
+
+/**
+ * Represents a single tool call made from within the Python sandbox.
+ */
+export interface SandboxToolCall {
+	/** The tool name (e.g., "read_file", "write_to_file") */
+	tool: string
+	/** The tool arguments as a JSON-serializable object */
+	args: Record<string, unknown>
+}
+
+/**
+ * Result of a single tool call executed from the sandbox.
+ */
+export interface SandboxToolResult {
+	/** Whether the tool call succeeded */
+	success: boolean
+	/** The tool result (string content) on success */
+	result?: string
+	/** Error message on failure */
+	error?: string
+}
+
+/**
+ * A request from the sandbox to execute a tool.
+ * Sent from the Docker container to Roo Code via the IPC bridge.
+ */
+export interface ToolExecutionRequest {
+	/** Unique request ID for correlating responses */
+	requestId: string
+	/** The tool to invoke */
+	tool: string
+	/** Arguments for the tool */
+	args: Record<string, unknown>
+}
+
+/**
+ * A response sent back to the sandbox after a tool is executed.
+ */
+export interface ToolExecutionResponse {
+	/** The request ID this response corresponds to */
+	requestId: string
+	/** The result of the tool execution */
+	result: SandboxToolResult
+}
+
+/**
+ * Result of executing a Python code block in the sandbox.
+ */
+export interface CodeExecutionResult {
+	/** Whether the code execution completed successfully */
+	success: boolean
+	/** Standard output from the code execution */
+	stdout: string
+	/** Standard error from the code execution */
+	stderr: string
+	/** The tool calls that were made during execution, with their results */
+	toolCalls: Array<{
+		call: SandboxToolCall
+		result: SandboxToolResult
+	}>
+	/** Error message if execution failed */
+	error?: string
+	/** Execution duration in milliseconds */
+	durationMs: number
+}
+
+/**
+ * Configuration for the Docker sandbox.
+ */
+export interface SandboxConfig {
+	/** Docker image to use for the sandbox (default: "python:3.12-slim") */
+	image: string
+	/** Memory limit in bytes (default: 256MB) */
+	memoryLimit: number
+	/** CPU limit as a fraction of a CPU (default: 0.5) */
+	cpuLimit: number
+	/** Execution timeout in milliseconds (default: 30000) */
+	timeoutMs: number
+	/** Whether network access is allowed (default: false) */
+	networkEnabled: boolean
+}
+
+/**
+ * Default sandbox configuration values.
+ */
+export const DEFAULT_SANDBOX_CONFIG: SandboxConfig = {
+	image: "python:3.12-slim",
+	memoryLimit: 256 * 1024 * 1024, // 256MB
+	cpuLimit: 0.5,
+	timeoutMs: 30_000,
+	networkEnabled: false,
+}
+
+/**
+ * The subset of tools supported in the initial programmatic tool calling implementation.
+ */
+export const SUPPORTED_PROGRAMMATIC_TOOLS = [
+	"read_file",
+	"write_to_file",
+	"execute_command",
+	"search_files",
+	"list_files",
+] as const
+
+export type SupportedProgrammaticTool = (typeof SUPPORTED_PROGRAMMATIC_TOOLS)[number]
+
+/**
+ * Check if a tool name is supported for programmatic tool calling.
+ */
+export function isSupportedProgrammaticTool(toolName: string): toolName is SupportedProgrammaticTool {
+	return (SUPPORTED_PROGRAMMATIC_TOOLS as readonly string[]).includes(toolName)
+}