From f1e3865d1e0ea2213e57e717aa770756b19e7e36 Mon Sep 17 00:00:00 2001 From: mousberg Date: Thu, 27 Feb 2025 17:17:31 +0000 Subject: [PATCH 1/6] Fix memory leak in AsyncCompletions.parse() with dynamically created models This commit fixes a memory leak issue in AsyncCompletions.parse() when repeatedly called with Pydantic models created via create_model(). The issue was occurring because schema representations of models were being retained indefinitely. The fix implements a WeakKeyDictionary cache that allows the schema objects to be garbage-collected when the model types are no longer referenced elsewhere in code. Added test cases to verify the fix prevents memory leaks with dynamically created models. --- src/openai/lib/_parsing/_completions.py | 15 +++- tests/lib/_parsing/test_memory_leak.py | 101 ++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 tests/lib/_parsing/test_memory_leak.py diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index c160070b66..fc7e6e5ab8 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import weakref from typing import TYPE_CHECKING, Any, Iterable, cast from typing_extensions import TypeVar, TypeGuard, assert_never @@ -28,6 +29,9 @@ from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam from ...types.chat.chat_completion_message_tool_call import Function +# Cache to store weak references to schema objects +_schema_cache = weakref.WeakKeyDictionary() + ResponseFormatT = TypeVar( "ResponseFormatT", # if it isn't given then we don't do any parsing @@ -243,6 +247,10 @@ def type_to_response_format_param( # can only be a `type` response_format = cast(type, response_format) + # Check if we already have a schema for this type in the cache + if response_format in _schema_cache: + return _schema_cache[response_format] + json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None if is_basemodel_type(response_format): @@ -254,7 +262,7 @@ def type_to_response_format_param( else: raise TypeError(f"Unsupported response_format type - {response_format}") - return { + schema_param = { "type": "json_schema", "json_schema": { "schema": to_strict_json_schema(json_schema_type), @@ -262,3 +270,8 @@ def type_to_response_format_param( "strict": True, }, } + + # Store a weak reference to the schema parameter + _schema_cache[response_format] = schema_param + + return schema_param diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py new file mode 100644 index 0000000000..3e7dd46a22 --- /dev/null +++ b/tests/lib/_parsing/test_memory_leak.py @@ -0,0 +1,101 @@ +import unittest +import gc +import sys +from unittest.mock import AsyncMock, patch, MagicMock +from typing import List + +import pytest +from pydantic import Field, create_model + +from openai.resources.beta.chat.completions import AsyncCompletions +from openai.lib._parsing import type_to_response_format_param +from openai.lib._parsing._completions import _schema_cache + +class TestMemoryLeak(unittest.TestCase): + def setUp(self): + # Clear the schema cache before each test + _schema_cache.clear() + + def test_schema_cache_with_models(self): + """Test if schema cache properly handles dynamic models and prevents memory leak""" + + StepModel = create_model( + "Step", + explanation=(str, Field()), + output=(str, Field()), + ) + + # Create several models and ensure they're cached properly + models = [] + for i in range(5): + model = create_model( + f"MathResponse{i}", + steps=(List[StepModel], Field()), + final_answer=(str, Field()), + ) + models.append(model) + + # Convert model to response format param + param = type_to_response_format_param(model) + + # Check if the model is in the cache + self.assertIn(model, _schema_cache) + + # Test that all models are in the cache + self.assertEqual(len(_schema_cache), 5) + + # Let the models go out of scope and trigger garbage collection + models = None + gc.collect() + + # After garbage collection, the cache should be empty or reduced + # since we're using weakref.WeakKeyDictionary + self.assertLess(len(_schema_cache), 5) + +@pytest.mark.asyncio +async def test_async_completions_parse_memory(): + """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models""" + StepModel = create_model( + "Step", + explanation=(str, Field()), + output=(str, Field()), + ) + + # Clear the cache and record initial state + _schema_cache.clear() + initial_cache_size = len(_schema_cache) + + # Create a mock client + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock() + + # Create the AsyncCompletions instance with our mock client + completions = AsyncCompletions(mock_client) + + # Simulate the issue by creating multiple models and making calls + models = [] + for i in range(10): + # Create a new dynamic model each time + new_model = create_model( + f"MathResponse{i}", + steps=(List[StepModel], Field()), + final_answer=(str, Field()), + ) + models.append(new_model) + + # Convert to response format and check if it's in the cache + type_to_response_format_param(new_model) + assert new_model in _schema_cache + + # Record cache size with all models referenced + cache_size_with_references = len(_schema_cache) + + # Let the models go out of scope and trigger garbage collection + models = None + gc.collect() + + # After garbage collection, the cache should be significantly reduced + cache_size_after_gc = len(_schema_cache) + assert cache_size_after_gc < cache_size_with_references + # The cache size should be close to the initial size (with some tolerance) + assert cache_size_after_gc < cache_size_with_references / 2 \ No newline at end of file From d5957256ef48060e44ed8b61b425d89726fee6ff Mon Sep 17 00:00:00 2001 From: mousberg Date: Fri, 28 Mar 2025 22:53:39 +0000 Subject: [PATCH 2/6] Fix lint issues, remove unittest class, add proper type annotations --- src/openai/lib/_parsing/_completions.py | 2 +- tests/lib/_parsing/test_memory_leak.py | 51 ------------------------- 2 files changed, 1 insertion(+), 52 deletions(-) diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index fc7e6e5ab8..2f4dd47ba4 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -262,7 +262,7 @@ def type_to_response_format_param( else: raise TypeError(f"Unsupported response_format type - {response_format}") - schema_param = { + schema_param: ResponseFormatParam = { "type": "json_schema", "json_schema": { "schema": to_strict_json_schema(json_schema_type), diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py index 3e7dd46a22..4a83f4ec0b 100644 --- a/tests/lib/_parsing/test_memory_leak.py +++ b/tests/lib/_parsing/test_memory_leak.py @@ -1,56 +1,12 @@ -import unittest import gc -import sys -from unittest.mock import AsyncMock, patch, MagicMock from typing import List import pytest from pydantic import Field, create_model -from openai.resources.beta.chat.completions import AsyncCompletions from openai.lib._parsing import type_to_response_format_param from openai.lib._parsing._completions import _schema_cache -class TestMemoryLeak(unittest.TestCase): - def setUp(self): - # Clear the schema cache before each test - _schema_cache.clear() - - def test_schema_cache_with_models(self): - """Test if schema cache properly handles dynamic models and prevents memory leak""" - - StepModel = create_model( - "Step", - explanation=(str, Field()), - output=(str, Field()), - ) - - # Create several models and ensure they're cached properly - models = [] - for i in range(5): - model = create_model( - f"MathResponse{i}", - steps=(List[StepModel], Field()), - final_answer=(str, Field()), - ) - models.append(model) - - # Convert model to response format param - param = type_to_response_format_param(model) - - # Check if the model is in the cache - self.assertIn(model, _schema_cache) - - # Test that all models are in the cache - self.assertEqual(len(_schema_cache), 5) - - # Let the models go out of scope and trigger garbage collection - models = None - gc.collect() - - # After garbage collection, the cache should be empty or reduced - # since we're using weakref.WeakKeyDictionary - self.assertLess(len(_schema_cache), 5) @pytest.mark.asyncio async def test_async_completions_parse_memory(): @@ -65,13 +21,6 @@ async def test_async_completions_parse_memory(): _schema_cache.clear() initial_cache_size = len(_schema_cache) - # Create a mock client - mock_client = MagicMock() - mock_client.chat.completions.create = AsyncMock() - - # Create the AsyncCompletions instance with our mock client - completions = AsyncCompletions(mock_client) - # Simulate the issue by creating multiple models and making calls models = [] for i in range(10): From bcb75a8ed336cd9f8744c448308c4006fd45be49 Mon Sep 17 00:00:00 2001 From: mousberg Date: Thu, 23 Oct 2025 20:37:09 +0100 Subject: [PATCH 3/6] Fix lint errors and add type annotations - Remove unused initial_cache_size variable - Add type annotations for _schema_cache, test function return type - Use del instead of assignment to None for better garbage collection - Add type ignore comment for Pydantic dynamic model typing - All ruff checks pass --- src/openai/lib/_parsing/_completions.py | 2 +- tests/lib/_parsing/test_memory_leak.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index 6bc8c0b2e7..e946c7edf8 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -32,7 +32,7 @@ from ...types.chat.chat_completion_message_function_tool_call import Function # Cache to store weak references to schema objects -_schema_cache = weakref.WeakKeyDictionary() +_schema_cache: weakref.WeakKeyDictionary[type, ResponseFormatParam] = weakref.WeakKeyDictionary() ResponseFormatT = TypeVar( "ResponseFormatT", diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py index 4a83f4ec0b..e60dcf8953 100644 --- a/tests/lib/_parsing/test_memory_leak.py +++ b/tests/lib/_parsing/test_memory_leak.py @@ -9,25 +9,25 @@ @pytest.mark.asyncio -async def test_async_completions_parse_memory(): +async def test_async_completions_parse_memory() -> None: """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models""" + # Create a base step model StepModel = create_model( "Step", explanation=(str, Field()), output=(str, Field()), ) - # Clear the cache and record initial state + # Clear the cache before testing _schema_cache.clear() - initial_cache_size = len(_schema_cache) - + # Simulate the issue by creating multiple models and making calls - models = [] + models: list[type] = [] for i in range(10): # Create a new dynamic model each time new_model = create_model( f"MathResponse{i}", - steps=(List[StepModel], Field()), + steps=(List[StepModel], Field()), # type: ignore[valid-type] final_answer=(str, Field()), ) models.append(new_model) @@ -40,7 +40,7 @@ async def test_async_completions_parse_memory(): cache_size_with_references = len(_schema_cache) # Let the models go out of scope and trigger garbage collection - models = None + del models gc.collect() # After garbage collection, the cache should be significantly reduced From 35376a5e5ac11603abd5aecaf0b0b85293906e44 Mon Sep 17 00:00:00 2001 From: mousberg Date: Wed, 11 Feb 2026 23:51:36 +0000 Subject: [PATCH 4/6] Fix memory leak in parse methods Avoid creating new type objects at runtime in construct_type_unchecked calls. Python's type cache holds references indefinitely, causing memory growth when parsing responses in long-running processes. Fixed across completions, responses, and streaming paths. --- src/openai/lib/_parsing/_completions.py | 17 ++++++------- src/openai/lib/_parsing/_responses.py | 24 ++++++++----------- src/openai/lib/streaming/chat/_completions.py | 9 +------ 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index e946c7edf8..bea6e35893 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -142,7 +142,7 @@ def parse_chat_completion( choices.append( construct_type_unchecked( - type_=cast(Any, ParsedChoice)[solve_response_format_t(response_format)], + type_=ParsedChoice[ResponseFormatT], value={ **choice.to_dict(), "message": { @@ -157,15 +157,12 @@ def parse_chat_completion( ) ) - return cast( - ParsedChatCompletion[ResponseFormatT], - construct_type_unchecked( - type_=cast(Any, ParsedChatCompletion)[solve_response_format_t(response_format)], - value={ - **chat_completion.to_dict(), - "choices": choices, - }, - ), + return construct_type_unchecked( + type_=ParsedChatCompletion[ResponseFormatT], + value={ + **chat_completion.to_dict(), + "choices": choices, + }, ) diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py index 8a1bf3cf2c..dc7dcc5c27 100644 --- a/src/openai/lib/_parsing/_responses.py +++ b/src/openai/lib/_parsing/_responses.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import TYPE_CHECKING, Any, List, Iterable, cast +from typing import TYPE_CHECKING, List, Iterable, cast from typing_extensions import TypeVar, assert_never import pydantic @@ -12,7 +12,7 @@ from ..._compat import PYDANTIC_V1, model_parse_json from ..._models import construct_type_unchecked from .._pydantic import is_basemodel_type, is_dataclass_like_type -from ._completions import solve_response_format_t, type_to_response_format_param +from ._completions import type_to_response_format_param from ...types.responses import ( Response, ToolParam, @@ -56,7 +56,6 @@ def parse_response( input_tools: Iterable[ToolParam] | Omit | None, response: Response | ParsedResponse[object], ) -> ParsedResponse[TextFormatT]: - solved_t = solve_response_format_t(text_format) output_list: List[ParsedResponseOutputItem[TextFormatT]] = [] for output in response.output: @@ -69,7 +68,7 @@ def parse_response( content_list.append( construct_type_unchecked( - type_=cast(Any, ParsedResponseOutputText)[solved_t], + type_=ParsedResponseOutputText[TextFormatT], value={ **item.to_dict(), "parsed": parse_text(item.text, text_format=text_format), @@ -79,7 +78,7 @@ def parse_response( output_list.append( construct_type_unchecked( - type_=cast(Any, ParsedResponseOutputMessage)[solved_t], + type_=ParsedResponseOutputMessage[TextFormatT], value={ **output.to_dict(), "content": content_list, @@ -118,15 +117,12 @@ def parse_response( else: output_list.append(output) - return cast( - ParsedResponse[TextFormatT], - construct_type_unchecked( - type_=cast(Any, ParsedResponse)[solved_t], - value={ - **response.to_dict(), - "output": output_list, - }, - ), + return construct_type_unchecked( + type_=ParsedResponse[TextFormatT], + value={ + **response.to_dict(), + "output": output_list, + }, ) diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py index c4610e2120..04086ebfbd 100644 --- a/src/openai/lib/streaming/chat/_completions.py +++ b/src/openai/lib/streaming/chat/_completions.py @@ -33,7 +33,6 @@ maybe_parse_content, parse_chat_completion, get_input_tool_by_name, - solve_response_format_t, parse_function_tool_arguments, ) from ...._streaming import Stream, AsyncStream @@ -658,13 +657,7 @@ def _content_done_events( events_to_fire.append( build( - # we do this dance so that when the `ContentDoneEvent` instance - # is printed at runtime the class name will include the solved - # type variable, e.g. `ContentDoneEvent[MyModelType]` - cast( # pyright: ignore[reportUnnecessaryCast] - "type[ContentDoneEvent[ResponseFormatT]]", - cast(Any, ContentDoneEvent)[solve_response_format_t(response_format)], - ), + ContentDoneEvent[ResponseFormatT], type="content.done", content=choice_snapshot.message.content, parsed=parsed, From abcb863cbce0f79bf34503e37721d151cda60d79 Mon Sep 17 00:00:00 2001 From: mousberg Date: Thu, 12 Feb 2026 00:20:15 +0000 Subject: [PATCH 5/6] Fix lint and update test snapshots --- src/openai/lib/streaming/chat/_completions.py | 2 +- tests/lib/chat/test_completions.py | 72 +++++++++---------- tests/lib/chat/test_completions_streaming.py | 66 ++++++++--------- tests/lib/utils.py | 27 +------ 4 files changed, 72 insertions(+), 95 deletions(-) diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py index 04086ebfbd..e29450177b 100644 --- a/src/openai/lib/streaming/chat/_completions.py +++ b/src/openai/lib/streaming/chat/_completions.py @@ -2,7 +2,7 @@ import inspect from types import TracebackType -from typing import TYPE_CHECKING, Any, Generic, Callable, Iterable, Awaitable, AsyncIterator, cast +from typing import TYPE_CHECKING, Generic, Callable, Iterable, Awaitable, AsyncIterator, cast from typing_extensions import Self, Iterator, assert_never from jiter import from_json diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py index afad5a1391..85bab4f095 100644 --- a/tests/lib/chat/test_completions.py +++ b/tests/lib/chat/test_completions.py @@ -50,13 +50,13 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte assert print_obj(completion, monkeypatch) == snapshot( """\ -ParsedChatCompletion[NoneType]( +ParsedChatCompletion( choices=[ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I @@ -120,13 +120,13 @@ class Location(BaseModel): assert print_obj(completion, monkeypatch) == snapshot( """\ -ParsedChatCompletion[Location]( +ParsedChatCompletion( choices=[ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":65,"units":"f"}', @@ -191,13 +191,13 @@ class Location(BaseModel): assert print_obj(completion, monkeypatch) == snapshot( """\ -ParsedChatCompletion[Location]( +ParsedChatCompletion( choices=[ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":65,"units":"f"}', @@ -266,11 +266,11 @@ class ColorDetection(BaseModel): assert print_obj(completion.choices[0], monkeypatch) == snapshot( """\ -ParsedChoice[ColorDetection]( +ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[ColorDetection]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"color":"red","hex_color_code":"#FF0000"}', @@ -317,11 +317,11 @@ class Location(BaseModel): assert print_obj(completion.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":64,"units":"f"}', @@ -332,11 +332,11 @@ class Location(BaseModel): tool_calls=None ) ), - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=1, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":65,"units":"f"}', @@ -347,11 +347,11 @@ class Location(BaseModel): tool_calls=None ) ), - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=2, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":63.0,"units":"f"}', @@ -397,13 +397,13 @@ class CalendarEvent: assert print_obj(completion, monkeypatch) == snapshot( """\ -ParsedChatCompletion[CalendarEvent]( +ParsedChatCompletion( choices=[ - ParsedChoice[CalendarEvent]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[CalendarEvent]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}', @@ -462,11 +462,11 @@ def test_pydantic_tool_model_all_types(client: OpenAI, respx_mock: MockRouter, m assert print_obj(completion.choices[0], monkeypatch) == snapshot( """\ -ParsedChoice[Query]( +ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Query]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -576,11 +576,11 @@ class Location(BaseModel): assert print_obj(completion.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -627,11 +627,11 @@ class GetWeatherArgs(BaseModel): assert print_obj(completion.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -701,11 +701,11 @@ class GetStockPrice(BaseModel): assert print_obj(completion.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -784,11 +784,11 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: assert print_obj(completion.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -866,13 +866,13 @@ class Location(BaseModel): assert isinstance(message.parsed.city, str) assert print_obj(completion, monkeypatch) == snapshot( """\ -ParsedChatCompletion[Location]( +ParsedChatCompletion( choices=[ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":58,"units":"f"}', @@ -943,13 +943,13 @@ class Location(BaseModel): assert isinstance(message.parsed.city, str) assert print_obj(completion, monkeypatch) == snapshot( """\ -ParsedChatCompletion[Location]( +ParsedChatCompletion( choices=[ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":65,"units":"f"}', diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py index 548416dfe2..eb3a0973ac 100644 --- a/tests/lib/chat/test_completions_streaming.py +++ b/tests/lib/chat/test_completions_streaming.py @@ -63,11 +63,11 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I @@ -84,7 +84,7 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte ) assert print_obj(listener.get_event_by_type("content.done"), monkeypatch) == snapshot( """\ -ContentDoneEvent[NoneType]( +ContentDoneEvent( content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I recommend checking a reliable weather website or a weather app.", parsed=None, @@ -140,13 +140,13 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream assert print_obj(listener.stream.get_final_completion(), monkeypatch) == snapshot( """\ -ParsedChatCompletion[Location]( +ParsedChatCompletion( choices=[ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":61,"units":"f"}', @@ -181,7 +181,7 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream ) assert print_obj(listener.get_event_by_type("content.done"), monkeypatch) == snapshot( """\ -ContentDoneEvent[Location]( +ContentDoneEvent( content='{"city":"San Francisco","temperature":61,"units":"f"}', parsed=Location(city='San Francisco', temperature=61.0, units='f'), type='content.done' @@ -320,11 +320,11 @@ class Location(BaseModel): assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":65,"units":"f"}', @@ -335,11 +335,11 @@ class Location(BaseModel): tool_calls=None ) ), - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=1, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":61,"units":"f"}', @@ -350,11 +350,11 @@ class Location(BaseModel): tool_calls=None ) ), - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=2, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='{"city":"San Francisco","temperature":59,"units":"f"}', @@ -426,11 +426,11 @@ class Location(BaseModel): assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -495,7 +495,7 @@ def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot("""\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='stop', index=0, logprobs=ChoiceLogprobs( @@ -505,7 +505,7 @@ def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp ], refusal=None ), - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='Foo!', @@ -563,7 +563,7 @@ class Location(BaseModel): assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot("""\ [ - ParsedChoice[Location]( + ParsedChoice( finish_reason='stop', index=0, logprobs=ChoiceLogprobs( @@ -617,7 +617,7 @@ class Location(BaseModel): ChatCompletionTokenLogprob(bytes=[46], logprob=-0.57687104, token='.', top_logprobs=[]) ] ), - message=ParsedChatCompletionMessage[Location]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -660,11 +660,11 @@ class GetWeatherArgs(BaseModel): assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[object]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[object]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -693,11 +693,11 @@ class GetWeatherArgs(BaseModel): assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -765,11 +765,11 @@ class GetStockPrice(BaseModel): assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[object]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[object]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -874,11 +874,11 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[object]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[object]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -926,11 +926,11 @@ def test_non_pydantic_response_format(client: OpenAI, respx_mock: MockRouter, mo assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content='\\n {\\n "location": "San Francisco, CA",\\n "weather": {\\n "temperature": "18°C",\\n @@ -987,11 +987,11 @@ def test_allows_non_strict_tools_but_no_parsing( assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='tool_calls', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content=None, @@ -1047,11 +1047,11 @@ def streamer(client: OpenAI) -> Iterator[ChatCompletionChunk]: assert print_obj(state.get_final_completion().choices, monkeypatch) == snapshot( """\ [ - ParsedChoice[NoneType]( + ParsedChoice( finish_reason='stop', index=0, logprobs=None, - message=ParsedChatCompletionMessage[NoneType]( + message=ParsedChatCompletionMessage( annotations=None, audio=None, content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I diff --git a/tests/lib/utils.py b/tests/lib/utils.py index e6b6a29434..f2ae6469f3 100644 --- a/tests/lib/utils.py +++ b/tests/lib/utils.py @@ -1,6 +1,6 @@ from __future__ import annotations -import inspect +import re from typing import Any, Iterable from typing_extensions import TypeAlias @@ -28,27 +28,4 @@ def __repr_args__(self: pydantic.BaseModel) -> ReprArgs: string = rich_print_str(obj) - # we remove all `fn_name..` occurrences - # so that we can share the same snapshots between - # pydantic v1 and pydantic v2 as their output for - # generic models differs, e.g. - # - # v2: `ParsedChatCompletion[test_parse_pydantic_model..Location]` - # v1: `ParsedChatCompletion[Location]` - return clear_locals(string, stacklevel=2) - - -def get_caller_name(*, stacklevel: int = 1) -> str: - frame = inspect.currentframe() - assert frame is not None - - for i in range(stacklevel): - frame = frame.f_back - assert frame is not None, f"no {i}th frame" - - return frame.f_code.co_name - - -def clear_locals(string: str, *, stacklevel: int) -> str: - caller = get_caller_name(stacklevel=stacklevel + 1) - return string.replace(f"{caller}..", "") + return re.sub(r"([A-Za-z_]\w*)\[[^\[\]]+\](?=\()", r"\1", string) From 36ed57985a94484b972d08a38e71fe0d31487046 Mon Sep 17 00:00:00 2001 From: mousberg Date: Thu, 12 Feb 2026 17:41:40 +0000 Subject: [PATCH 6/6] Remove schema cache and memory leak test --- src/openai/lib/_parsing/_completions.py | 15 +------- tests/lib/_parsing/test_memory_leak.py | 50 ------------------------- 2 files changed, 1 insertion(+), 64 deletions(-) delete mode 100644 tests/lib/_parsing/test_memory_leak.py diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index bea6e35893..bb16368f4a 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -2,7 +2,6 @@ import json import logging -import weakref from typing import TYPE_CHECKING, Any, Iterable, cast from typing_extensions import TypeVar, TypeGuard, assert_never @@ -31,9 +30,6 @@ from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam from ...types.chat.chat_completion_message_function_tool_call import Function -# Cache to store weak references to schema objects -_schema_cache: weakref.WeakKeyDictionary[type, ResponseFormatParam] = weakref.WeakKeyDictionary() - ResponseFormatT = TypeVar( "ResponseFormatT", # if it isn't given then we don't do any parsing @@ -285,10 +281,6 @@ def type_to_response_format_param( # can only be a `type` response_format = cast(type, response_format) - # Check if we already have a schema for this type in the cache - if response_format in _schema_cache: - return _schema_cache[response_format] - json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None if is_basemodel_type(response_format): @@ -300,7 +292,7 @@ def type_to_response_format_param( else: raise TypeError(f"Unsupported response_format type - {response_format}") - schema_param: ResponseFormatParam = { + return { "type": "json_schema", "json_schema": { "schema": to_strict_json_schema(json_schema_type), @@ -308,8 +300,3 @@ def type_to_response_format_param( "strict": True, }, } - - # Store a weak reference to the schema parameter - _schema_cache[response_format] = schema_param - - return schema_param diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py deleted file mode 100644 index e60dcf8953..0000000000 --- a/tests/lib/_parsing/test_memory_leak.py +++ /dev/null @@ -1,50 +0,0 @@ -import gc -from typing import List - -import pytest -from pydantic import Field, create_model - -from openai.lib._parsing import type_to_response_format_param -from openai.lib._parsing._completions import _schema_cache - - -@pytest.mark.asyncio -async def test_async_completions_parse_memory() -> None: - """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models""" - # Create a base step model - StepModel = create_model( - "Step", - explanation=(str, Field()), - output=(str, Field()), - ) - - # Clear the cache before testing - _schema_cache.clear() - - # Simulate the issue by creating multiple models and making calls - models: list[type] = [] - for i in range(10): - # Create a new dynamic model each time - new_model = create_model( - f"MathResponse{i}", - steps=(List[StepModel], Field()), # type: ignore[valid-type] - final_answer=(str, Field()), - ) - models.append(new_model) - - # Convert to response format and check if it's in the cache - type_to_response_format_param(new_model) - assert new_model in _schema_cache - - # Record cache size with all models referenced - cache_size_with_references = len(_schema_cache) - - # Let the models go out of scope and trigger garbage collection - del models - gc.collect() - - # After garbage collection, the cache should be significantly reduced - cache_size_after_gc = len(_schema_cache) - assert cache_size_after_gc < cache_size_with_references - # The cache size should be close to the initial size (with some tolerance) - assert cache_size_after_gc < cache_size_with_references / 2 \ No newline at end of file