From 7f05089103bdff1f0f739bf78014a20f2eeb2e82 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Thu, 12 Feb 2026 18:01:44 -0700 Subject: [PATCH] feat: wire Vercel AI Gateway reasoning via AI SDK v6 native path - Parse model tags from /v1/models for reasoning capability detection - Use top-level 'reasoning' param in streamText/generateText (AI SDK v6) - Add providerOptions.anthropic.thinking for Anthropic models through Gateway - Use computed reasoningBudget (80% cap) for thinking budget tokens - Add test coverage for reasoning + anthropic thinking payload shape --- .../__tests__/vercel-ai-gateway.spec.ts | 42 +++++++++ .../__tests__/vercel-ai-gateway.spec.ts | 15 ++++ .../providers/fetchers/vercel-ai-gateway.ts | 10 +++ src/api/providers/vercel-ai-gateway.ts | 86 ++++++++++++++++--- 4 files changed, 141 insertions(+), 12 deletions(-) diff --git a/src/api/providers/__tests__/vercel-ai-gateway.spec.ts b/src/api/providers/__tests__/vercel-ai-gateway.spec.ts index f482c7cf2d4..9b47573a7a6 100644 --- a/src/api/providers/__tests__/vercel-ai-gateway.spec.ts +++ b/src/api/providers/__tests__/vercel-ai-gateway.spec.ts @@ -58,6 +58,18 @@ vi.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 0.25, description: "GPT-4o", }, + "anthropic/claude-opus-4.6": { + maxTokens: 128000, + contextWindow: 1000000, + supportsImages: true, + supportsPromptCache: true, + supportsReasoningEffort: true, + inputPrice: 5, + outputPrice: 25, + cacheWritesPrice: 6.25, + cacheReadsPrice: 0.5, + description: "Claude Opus 4.6", + }, }) }), getModelsFromCache: vi.fn().mockReturnValue(undefined), @@ -232,6 +244,36 @@ describe("VercelAiGatewayHandler", () => { ) }) + it("passes gateway reasoning effort when enabled", async () => { + mockStreamText.mockReturnValue(createMockStreamResult()) + + const handler = new VercelAiGatewayHandler({ + ...mockOptions, + vercelAiGatewayModelId: "anthropic/claude-opus-4.6", + enableReasoningEffort: true, + reasoningEffort: "high", + }) + + await handler.createMessage("test", []).next() + + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning: { + enabled: true, + effort: "high", + }, + providerOptions: { + anthropic: { + thinking: { + type: "enabled", + budgetTokens: 102400, + }, + }, + }, + }), + ) + }) + it("sets correct maxOutputTokens", async () => { mockStreamText.mockReturnValue(createMockStreamResult()) diff --git a/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts b/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts index 3a4a234de96..ab352024cb0 100644 --- a/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts +++ b/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts @@ -237,6 +237,21 @@ describe("Vercel AI Gateway Fetchers", () => { ) }) + it("sets supportsReasoningEffort when model tags include reasoning", () => { + const reasoningModel = { + ...baseModel, + id: "anthropic/claude-opus-4.6", + tags: ["tool-use", "reasoning", "vision"], + } + + const result = parseVercelAiGatewayModel({ + id: reasoningModel.id, + model: reasoningModel, + }) + + expect(result.supportsReasoningEffort).toBe(true) + }) + it("handles missing cache pricing", () => { const modelNoCachePricing = { ...baseModel, diff --git a/src/api/providers/fetchers/vercel-ai-gateway.ts b/src/api/providers/fetchers/vercel-ai-gateway.ts index a708d106f08..d3ca5c493aa 100644 --- a/src/api/providers/fetchers/vercel-ai-gateway.ts +++ b/src/api/providers/fetchers/vercel-ai-gateway.ts @@ -33,6 +33,7 @@ const vercelAiGatewayModelSchema = z.object({ context_window: z.number(), max_tokens: z.number(), type: z.string(), + tags: z.array(z.string()).optional(), pricing: vercelAiGatewayPricingSchema, }) @@ -91,6 +92,14 @@ export async function getVercelAiGatewayModels(options?: ApiHandlerOptions): Pro */ export const parseVercelAiGatewayModel = ({ id, model }: { id: string; model: VercelAiGatewayModel }): ModelInfo => { + const tags = model.tags ?? [] + const supportsReasoningEffort = + tags.includes("reasoning") || + id.startsWith("openai/o") || + id.startsWith("openai/gpt-5") || + id.startsWith("openai/gpt-oss") || + id.startsWith("xai/grok-3-mini") + const cacheWritesPrice = model.pricing?.input_cache_write ? parseApiPrice(model.pricing?.input_cache_write) : undefined @@ -105,6 +114,7 @@ export const parseVercelAiGatewayModel = ({ id, model }: { id: string; model: Ve maxTokens: model.max_tokens, contextWindow: model.context_window, supportsImages, + ...(supportsReasoningEffort ? { supportsReasoningEffort: true } : {}), supportsPromptCache, inputPrice: parseApiPrice(model.pricing?.input), outputPrice: parseApiPrice(model.pricing?.output), diff --git a/src/api/providers/vercel-ai-gateway.ts b/src/api/providers/vercel-ai-gateway.ts index 817a543e88b..ab6ca16e3df 100644 --- a/src/api/providers/vercel-ai-gateway.ts +++ b/src/api/providers/vercel-ai-gateway.ts @@ -21,6 +21,8 @@ import { } from "../transform/ai-sdk" import { applyToolCacheOptions } from "../transform/cache-breakpoints" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" +import { getModelParams } from "../transform/model-params" +import type { OpenAiReasoningParams } from "../transform/reasoning" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" @@ -29,6 +31,15 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import type { RooMessage } from "../../core/task-persistence/rooMessage" import { sanitizeMessagesForProvider } from "../transform/sanitize-messages" +type ModelSelection = { + id: string + info: ModelInfo + maxTokens?: number + temperature?: number + reasoning?: OpenAiReasoningParams + reasoningBudget?: number +} + /** * Vercel AI Gateway provider using the built-in AI SDK gateway support. * Uses `createGateway` from the `ai` package to communicate with the @@ -50,20 +61,41 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple }) } - override getModel(): { id: string; info: ModelInfo } { + override getModel(): ModelSelection { const id = this.options.vercelAiGatewayModelId ?? vercelAiGatewayDefaultModelId + const resolveModel = (modelInfo: ModelInfo) => ({ + id, + info: modelInfo, + ...getModelParams({ + format: "openai", + modelId: id, + model: modelInfo, + settings: this.options, + defaultTemperature: VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE, + }), + }) if (this.models[id]) { - return { id, info: this.models[id] } + return resolveModel(this.models[id]) } const cachedModels = getModelsFromCache(this.name) if (cachedModels?.[id]) { this.models = cachedModels - return { id, info: cachedModels[id] } + return resolveModel(cachedModels[id]) } - return { id: vercelAiGatewayDefaultModelId, info: vercelAiGatewayDefaultModelInfo } + return { + id: vercelAiGatewayDefaultModelId, + info: vercelAiGatewayDefaultModelInfo, + ...getModelParams({ + format: "openai", + modelId: vercelAiGatewayDefaultModelId, + model: vercelAiGatewayDefaultModelInfo, + settings: this.options, + defaultTemperature: VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE, + }), + } } public async fetchModel() { @@ -115,7 +147,7 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple messages: RooMessage[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { id: modelId, info } = await this.fetchModel() + const { id: modelId, info, temperature, reasoning, reasoningBudget } = await this.fetchModel() const languageModel = this.getLanguageModel(modelId) const aiSdkMessages = sanitizeMessagesForProvider(messages) @@ -124,18 +156,33 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined applyToolCacheOptions(aiSdkTools as Parameters[0], metadata?.toolProviderOptions) - const temperature = this.supportsTemperature(modelId) - ? (this.options.modelTemperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE) + const resolvedTemperature = this.supportsTemperature(modelId) + ? (this.options.modelTemperature ?? temperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE) : undefined + const reasoningConfig = reasoning ? { enabled: true, effort: reasoning.reasoning_effort } : undefined + const anthropicProviderOptions = + modelId.startsWith("anthropic/") && reasoningConfig + ? { + anthropic: { + thinking: { + type: "enabled" as const, + budgetTokens: reasoningBudget ?? Math.floor((info.maxTokens ?? 0) * 0.8), + }, + }, + } + : undefined + const result = streamText({ model: languageModel, system: systemPrompt || undefined, messages: aiSdkMessages, - temperature, + temperature: resolvedTemperature, maxOutputTokens: info.maxTokens ?? undefined, tools: aiSdkTools, toolChoice: mapToolChoice(metadata?.tool_choice), + ...(reasoningConfig ? { reasoning: reasoningConfig } : {}), + ...(anthropicProviderOptions ? { providerOptions: anthropicProviderOptions } : {}), }) try { @@ -170,19 +217,34 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple } async completePrompt(prompt: string): Promise { - const { id: modelId, info } = await this.fetchModel() + const { id: modelId, info, temperature, reasoning, reasoningBudget } = await this.fetchModel() const languageModel = this.getLanguageModel(modelId) - const temperature = this.supportsTemperature(modelId) - ? (this.options.modelTemperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE) + const resolvedTemperature = this.supportsTemperature(modelId) + ? (this.options.modelTemperature ?? temperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE) : undefined + const reasoningConfig = reasoning ? { enabled: true, effort: reasoning.reasoning_effort } : undefined + const anthropicProviderOptions = + modelId.startsWith("anthropic/") && reasoningConfig + ? { + anthropic: { + thinking: { + type: "enabled" as const, + budgetTokens: reasoningBudget ?? Math.floor((info.maxTokens ?? 0) * 0.8), + }, + }, + } + : undefined + try { const { text } = await generateText({ model: languageModel, prompt, maxOutputTokens: info.maxTokens ?? undefined, - temperature, + temperature: resolvedTemperature, + ...(reasoningConfig ? { reasoning: reasoningConfig } : {}), + ...(anthropicProviderOptions ? { providerOptions: anthropicProviderOptions } : {}), }) return text