diff --git a/packages/types/src/providers/openai-codex.ts b/packages/types/src/providers/openai-codex.ts index 72b909591a6..f8d9bd25df3 100644 --- a/packages/types/src/providers/openai-codex.ts +++ b/packages/types/src/providers/openai-codex.ts @@ -68,6 +68,20 @@ export const openAiCodexModels = { supportsTemperature: false, description: "GPT-5.3 Codex: OpenAI's flagship coding model via ChatGPT subscription", }, + "gpt-5.3-codex-spark": { + maxTokens: 8192, + contextWindow: 128000, + includedTools: ["apply_patch"], + excludedTools: ["apply_diff", "write_to_file"], + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["low", "medium", "high", "xhigh"], + reasoningEffort: "medium", + inputPrice: 0, + outputPrice: 0, + supportsTemperature: false, + description: "GPT-5.3 Codex Spark: Fast, text-only coding model via ChatGPT subscription", + }, "gpt-5.2-codex": { maxTokens: 128000, contextWindow: 400000, diff --git a/src/api/providers/__tests__/openai-codex-native-tool-calls.spec.ts b/src/api/providers/__tests__/openai-codex-native-tool-calls.spec.ts index 608f639ed44..360ca1117c5 100644 --- a/src/api/providers/__tests__/openai-codex-native-tool-calls.spec.ts +++ b/src/api/providers/__tests__/openai-codex-native-tool-calls.spec.ts @@ -97,4 +97,232 @@ describe("OpenAiCodexHandler native tool calls", () => { name: "attempt_completion", }) }) + + it("yields text when Codex emits assistant message only in response.output_item.done", async () => { + vi.spyOn(openAiCodexOAuthManager, "getAccessToken").mockResolvedValue("test-token") + vi.spyOn(openAiCodexOAuthManager, "getAccountId").mockResolvedValue("acct_test") + ;(handler as any).client = { + responses: { + create: vi.fn().mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.output_item.done", + item: { + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "hello from spark" }], + }, + output_index: 0, + } + yield { + type: "response.completed", + response: { + id: "resp_done_only", + status: "completed", + output: [ + { + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "hello from spark" }], + }, + ], + usage: { input_tokens: 1, output_tokens: 2 }, + }, + } + }, + }), + }, + } + + const stream = handler.createMessage("system", [{ role: "user", content: "test" } as any], { + taskId: "t", + tools: [], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks.length).toBeGreaterThan(0) + expect(textChunks.map((c) => c.text).join("")).toContain("hello from spark") + }) + + it("yields text when Codex emits assistant message only in response.completed output", async () => { + vi.spyOn(openAiCodexOAuthManager, "getAccessToken").mockResolvedValue("test-token") + vi.spyOn(openAiCodexOAuthManager, "getAccountId").mockResolvedValue("acct_test") + ;(handler as any).client = { + responses: { + create: vi.fn().mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.completed", + response: { + id: "resp_completed_only", + status: "completed", + output: [ + { + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "final payload only" }], + }, + ], + usage: { input_tokens: 1, output_tokens: 2 }, + }, + } + }, + }), + }, + } + + const stream = handler.createMessage("system", [{ role: "user", content: "test" } as any], { + taskId: "t", + tools: [], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks.length).toBeGreaterThan(0) + expect(textChunks.map((c) => c.text).join("")).toContain("final payload only") + }) + + it("yields text when Codex emits response.output_text.done without deltas", async () => { + vi.spyOn(openAiCodexOAuthManager, "getAccessToken").mockResolvedValue("test-token") + vi.spyOn(openAiCodexOAuthManager, "getAccountId").mockResolvedValue("acct_test") + ;(handler as any).client = { + responses: { + create: vi.fn().mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.output_text.done", + text: "done-event text only", + } + yield { + type: "response.completed", + response: { + id: "resp_done_text_only", + status: "completed", + output: [], + usage: { input_tokens: 1, output_tokens: 2 }, + }, + } + }, + }), + }, + } + + const stream = handler.createMessage("system", [{ role: "user", content: "test" } as any], { + taskId: "t", + tools: [], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks.length).toBeGreaterThan(0) + expect(textChunks.map((c) => c.text).join("")).toContain("done-event text only") + }) + + it("yields tool_call when Codex emits function_call only in response.output_item.done", async () => { + vi.spyOn(openAiCodexOAuthManager, "getAccessToken").mockResolvedValue("test-token") + vi.spyOn(openAiCodexOAuthManager, "getAccountId").mockResolvedValue("acct_test") + ;(handler as any).client = { + responses: { + create: vi.fn().mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.output_item.done", + item: { + type: "function_call", + call_id: "call_done_only", + name: "attempt_completion", + arguments: '{"result":"ok"}', + }, + output_index: 0, + } + yield { + type: "response.completed", + response: { + id: "resp_done_tool_only", + status: "completed", + output: [], + usage: { input_tokens: 1, output_tokens: 2 }, + }, + } + }, + }), + }, + } + + const stream = handler.createMessage("system", [{ role: "user", content: "test" } as any], { + taskId: "t", + tools: [], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const toolCalls = chunks.filter((c) => c.type === "tool_call") + expect(toolCalls.length).toBeGreaterThan(0) + expect(toolCalls[0]).toMatchObject({ + type: "tool_call", + id: "call_done_only", + name: "attempt_completion", + }) + }) + + it("yields text when Codex emits response.content_part.added", async () => { + vi.spyOn(openAiCodexOAuthManager, "getAccessToken").mockResolvedValue("test-token") + vi.spyOn(openAiCodexOAuthManager, "getAccountId").mockResolvedValue("acct_test") + ;(handler as any).client = { + responses: { + create: vi.fn().mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.content_part.added", + part: { + type: "output_text", + text: "content part text", + }, + output_index: 0, + content_index: 0, + } + yield { + type: "response.completed", + response: { + id: "resp_content_part", + status: "completed", + output: [], + usage: { input_tokens: 1, output_tokens: 2 }, + }, + } + }, + }), + }, + } + + const stream = handler.createMessage("system", [{ role: "user", content: "test" } as any], { + taskId: "t", + tools: [], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks.length).toBeGreaterThan(0) + expect(textChunks.map((c) => c.text).join("")).toContain("content part text") + }) }) diff --git a/src/api/providers/__tests__/openai-codex.spec.ts b/src/api/providers/__tests__/openai-codex.spec.ts index 26a0e83c45c..2e164fe469f 100644 --- a/src/api/providers/__tests__/openai-codex.spec.ts +++ b/src/api/providers/__tests__/openai-codex.spec.ts @@ -3,7 +3,7 @@ import { OpenAiCodexHandler } from "../openai-codex" describe("OpenAiCodexHandler.getModel", () => { - it.each(["gpt-5.1", "gpt-5", "gpt-5.1-codex", "gpt-5-codex", "gpt-5-codex-mini"])( + it.each(["gpt-5.1", "gpt-5", "gpt-5.1-codex", "gpt-5-codex", "gpt-5-codex-mini", "gpt-5.3-codex-spark"])( "should return specified model when a valid model id is provided: %s", (apiModelId) => { const handler = new OpenAiCodexHandler({ apiModelId }) @@ -23,4 +23,14 @@ describe("OpenAiCodexHandler.getModel", () => { expect(model.id).toBe("gpt-5.3-codex") expect(model.info).toBeDefined() }) + + it("should use Spark-specific limits and capabilities", () => { + const handler = new OpenAiCodexHandler({ apiModelId: "gpt-5.3-codex-spark" }) + const model = handler.getModel() + + expect(model.id).toBe("gpt-5.3-codex-spark") + expect(model.info.contextWindow).toBe(128000) + expect(model.info.maxTokens).toBe(8192) + expect(model.info.supportsImages).toBe(false) + }) }) diff --git a/src/api/providers/openai-codex.ts b/src/api/providers/openai-codex.ts index d64780c5557..ed43042ec0f 100644 --- a/src/api/providers/openai-codex.ts +++ b/src/api/providers/openai-codex.ts @@ -64,11 +64,19 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion */ private pendingToolCallId: string | undefined private pendingToolCallName: string | undefined + // Tracks whether this response already emitted text to avoid duplicate done-event rendering. + private sawTextOutputInCurrentResponse = false + // Tracks tool call IDs emitted via streaming partial events to prevent done-event duplicates. + private streamedToolCallIds = new Set() // Event types handled by the shared event processor private readonly coreHandledEventTypes = new Set([ "response.text.delta", "response.output_text.delta", + "response.text.done", + "response.output_text.done", + "response.content_part.added", + "response.content_part.done", "response.reasoning.delta", "response.reasoning_text.delta", "response.reasoning_summary.delta", @@ -149,6 +157,8 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion this.lastResponseId = undefined this.pendingToolCallId = undefined this.pendingToolCallName = undefined + this.sawTextOutputInCurrentResponse = false + this.streamedToolCallIds.clear() // Get access token from OAuth manager let accessToken = await openAiCodexOAuthManager.getAccessToken() @@ -378,6 +388,9 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion } for await (const outChunk of this.processEvent(event, model)) { + if (outChunk.type === "text") { + this.sawTextOutputInCurrentResponse = true + } yield outChunk } } @@ -647,6 +660,9 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion for await (const outChunk of this.processEvent(parsed, model)) { if (outChunk.type === "text" || outChunk.type === "reasoning") { hasContent = true + if (outChunk.type === "text") { + this.sawTextOutputInCurrentResponse = true + } } yield outChunk } @@ -660,6 +676,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion for (const content of outputItem.content) { if (content.type === "text" && content.text) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: content.text } } } @@ -685,8 +702,26 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion ) { if (parsed.delta) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: parsed.delta } } + } else if ( + (parsed.type === "response.text.done" || parsed.type === "response.output_text.done") && + !hasContent + ) { + const doneText = + typeof parsed.text === "string" + ? parsed.text + : typeof parsed.output_text === "string" + ? parsed.output_text + : typeof parsed.delta === "string" + ? parsed.delta + : undefined + if (doneText) { + hasContent = true + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: doneText } + } } else if ( parsed.type === "response.reasoning.delta" || parsed.type === "response.reasoning_text.delta" @@ -706,12 +741,14 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion } else if (parsed.type === "response.refusal.delta") { if (parsed.delta) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: `[Refusal] ${parsed.delta}` } } } else if (parsed.type === "response.output_item.added") { if (parsed.item) { if (parsed.item.type === "text" && parsed.item.text) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: parsed.item.text } } else if (parsed.item.type === "reasoning" && parsed.item.text) { hasContent = true @@ -720,6 +757,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion for (const content of parsed.item.content) { if (content.type === "text" && content.text) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: content.text } } } @@ -760,6 +798,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion for (const content of outputItem.content) { if (content.type === "output_text" && content.text) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: content.text } } } @@ -779,6 +818,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion } } else if (parsed.choices?.[0]?.delta?.content) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: parsed.choices[0].delta.content } } else if ( parsed.item && @@ -786,6 +826,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion parsed.item.text.length > 0 ) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: parsed.item.text } } else if (parsed.usage) { const usageData = this.normalizeUsage(parsed.usage, model) @@ -803,6 +844,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion const parsed = JSON.parse(line) if (parsed.content || parsed.text || parsed.message) { hasContent = true + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: parsed.content || parsed.text || parsed.message } } } catch { @@ -836,11 +878,43 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion // Handle text deltas if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") { if (event?.delta) { + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: event.delta } } return } + if (event?.type === "response.text.done" || event?.type === "response.output_text.done") { + const doneText = + typeof event?.text === "string" + ? event.text + : typeof event?.output_text === "string" + ? event.output_text + : typeof event?.delta === "string" + ? event.delta + : undefined + if (!this.sawTextOutputInCurrentResponse && doneText) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: doneText } + } + return + } + + if (event?.type === "response.content_part.added" || event?.type === "response.content_part.done") { + const part = event?.part + if ( + (part?.type === "text" || part?.type === "output_text") && + (typeof part?.text === "string" || typeof part?.text?.value === "string") + ) { + const partText = typeof part.text === "string" ? part.text : part.text.value + if (partText) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: partText } + } + } + return + } + // Handle reasoning deltas if ( event?.type === "response.reasoning.delta" || @@ -857,6 +931,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion // Handle refusal deltas if (event?.type === "response.refusal.delta") { if (event?.delta) { + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: `[Refusal] ${event.delta}` } } return @@ -875,6 +950,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion // to include a stable id/name. Avoid emitting incomplete tool_call_partial chunks because // NativeToolCallParser requires a name to start a call. if (typeof callId === "string" && callId.length > 0 && typeof name === "string" && name.length > 0) { + this.streamedToolCallIds.add(callId) yield { type: "tool_call_partial", index: event.index ?? 0, @@ -908,17 +984,64 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion } } - // For "added" events, yield text/reasoning content (streaming path) - // For "done" events, do NOT yield text/reasoning - it's already been streamed via deltas - // and would cause double-emission (A, B, C, ABC). + // For "added" events, yield text/reasoning content (streaming path). + // For "done" events, normally text was already streamed via deltas, but some models + // only provide assistant text on done events. Emit fallback text only if none was emitted yet. if (event.type === "response.output_item.added") { if (item.type === "text" && item.text) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: item.text } + } else if (item.type === "output_text" && item.text) { + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: item.text } } else if (item.type === "reasoning" && item.text) { yield { type: "reasoning", text: item.text } } else if (item.type === "message" && Array.isArray(item.content)) { for (const content of item.content) { if ((content?.type === "text" || content?.type === "output_text") && content?.text) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: content.text } + } + } + } + } else if ( + event.type === "response.output_item.done" && + (item.type === "function_call" || item.type === "tool_call") + ) { + const callId = item.call_id || item.tool_call_id || item.id + const name = item.name || item.function?.name || item.function_name + const argsRaw = item.arguments || item.function?.arguments || item.input + const args = + typeof argsRaw === "string" + ? argsRaw + : argsRaw && typeof argsRaw === "object" + ? JSON.stringify(argsRaw) + : "" + + // Fallback for models that only emit a complete function_call in output_item.done. + // If we already streamed partials for this ID, skip to avoid duplicate tool execution. + if ( + typeof callId === "string" && + callId.length > 0 && + typeof name === "string" && + name.length > 0 && + !this.streamedToolCallIds.has(callId) + ) { + yield { + type: "tool_call", + id: callId, + name, + arguments: args, + } + } + } else if (!this.sawTextOutputInCurrentResponse) { + if ((item.type === "text" || item.type === "output_text") && item.text) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: item.text } + } else if (item.type === "message" && Array.isArray(item.content)) { + for (const content of item.content) { + if ((content?.type === "text" || content?.type === "output_text") && content?.text) { + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: content.text } } } @@ -937,6 +1060,26 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion // Handle completion events if (event?.type === "response.done" || event?.type === "response.completed") { + // Some Codex variants only provide assistant text in the final completed payload. + if (!this.sawTextOutputInCurrentResponse && Array.isArray(event?.response?.output)) { + for (const outputItem of event.response.output) { + if ((outputItem?.type === "text" || outputItem?.type === "output_text") && outputItem?.text) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: outputItem.text } + continue + } + + if (outputItem?.type === "message" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if ((content?.type === "text" || content?.type === "output_text") && content?.text) { + this.sawTextOutputInCurrentResponse = true + yield { type: "text", text: content.text } + } + } + } + } + } + const usage = event?.response?.usage || event?.usage || undefined const usageData = this.normalizeUsage(usage, model) if (usageData) { @@ -947,6 +1090,7 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion // Fallbacks if (event?.choices?.[0]?.delta?.content) { + this.sawTextOutputInCurrentResponse = true yield { type: "text", text: event.choices[0].delta.content } return }