From f1e3865d1e0ea2213e57e717aa770756b19e7e36 Mon Sep 17 00:00:00 2001
From: mousberg <davidgelberg@gmail.com>
Date: Thu, 27 Feb 2025 17:17:31 +0000
Subject: [PATCH 1/6] Fix memory leak in AsyncCompletions.parse() with
 dynamically created models

This commit fixes a memory leak issue in AsyncCompletions.parse() when repeatedly
called with Pydantic models created via create_model(). The issue was occurring
because schema representations of models were being retained indefinitely.

The fix implements a WeakKeyDictionary cache that allows the schema objects to be
garbage-collected when the model types are no longer referenced elsewhere in code.

Added test cases to verify the fix prevents memory leaks with dynamically created
models.
---
 src/openai/lib/_parsing/_completions.py |  15 +++-
 tests/lib/_parsing/test_memory_leak.py  | 101 ++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 tests/lib/_parsing/test_memory_leak.py

diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index c160070b66..fc7e6e5ab8 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import weakref
 from typing import TYPE_CHECKING, Any, Iterable, cast
 from typing_extensions import TypeVar, TypeGuard, assert_never
 
@@ -28,6 +29,9 @@
 from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
 from ...types.chat.chat_completion_message_tool_call import Function
 
+# Cache to store weak references to schema objects
+_schema_cache = weakref.WeakKeyDictionary()
+
 ResponseFormatT = TypeVar(
     "ResponseFormatT",
     # if it isn't given then we don't do any parsing
@@ -243,6 +247,10 @@ def type_to_response_format_param(
     # can only be a `type`
     response_format = cast(type, response_format)
 
+    # Check if we already have a schema for this type in the cache
+    if response_format in _schema_cache:
+        return _schema_cache[response_format]
+
     json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None
 
     if is_basemodel_type(response_format):
@@ -254,7 +262,7 @@ def type_to_response_format_param(
     else:
         raise TypeError(f"Unsupported response_format type - {response_format}")
 
-    return {
+    schema_param = {
         "type": "json_schema",
         "json_schema": {
             "schema": to_strict_json_schema(json_schema_type),
@@ -262,3 +270,8 @@ def type_to_response_format_param(
             "strict": True,
         },
     }
+
+    # Store a weak reference to the schema parameter
+    _schema_cache[response_format] = schema_param
+
+    return schema_param
diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py
new file mode 100644
index 0000000000..3e7dd46a22
--- /dev/null
+++ b/tests/lib/_parsing/test_memory_leak.py
@@ -0,0 +1,101 @@
+import unittest
+import gc
+import sys
+from unittest.mock import AsyncMock, patch, MagicMock
+from typing import List
+
+import pytest
+from pydantic import Field, create_model
+
+from openai.resources.beta.chat.completions import AsyncCompletions
+from openai.lib._parsing import type_to_response_format_param
+from openai.lib._parsing._completions import _schema_cache
+
+class TestMemoryLeak(unittest.TestCase):
+    def setUp(self):
+        # Clear the schema cache before each test
+        _schema_cache.clear()
+        
+    def test_schema_cache_with_models(self):
+        """Test if schema cache properly handles dynamic models and prevents memory leak"""
+        
+        StepModel = create_model(
+            "Step",
+            explanation=(str, Field()),
+            output=(str, Field()),
+        )
+        
+        # Create several models and ensure they're cached properly
+        models = []
+        for i in range(5):
+            model = create_model(
+                f"MathResponse{i}",
+                steps=(List[StepModel], Field()),
+                final_answer=(str, Field()),
+            )
+            models.append(model)
+            
+            # Convert model to response format param
+            param = type_to_response_format_param(model)
+            
+            # Check if the model is in the cache
+            self.assertIn(model, _schema_cache)
+        
+        # Test that all models are in the cache
+        self.assertEqual(len(_schema_cache), 5)
+        
+        # Let the models go out of scope and trigger garbage collection
+        models = None
+        gc.collect()
+        
+        # After garbage collection, the cache should be empty or reduced
+        # since we're using weakref.WeakKeyDictionary
+        self.assertLess(len(_schema_cache), 5)
+
+@pytest.mark.asyncio
+async def test_async_completions_parse_memory():
+    """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models"""
+    StepModel = create_model(
+        "Step",
+        explanation=(str, Field()),
+        output=(str, Field()),
+    )
+    
+    # Clear the cache and record initial state
+    _schema_cache.clear()
+    initial_cache_size = len(_schema_cache)
+    
+    # Create a mock client 
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock()
+    
+    # Create the AsyncCompletions instance with our mock client
+    completions = AsyncCompletions(mock_client)
+    
+    # Simulate the issue by creating multiple models and making calls
+    models = []
+    for i in range(10):
+        # Create a new dynamic model each time
+        new_model = create_model(
+            f"MathResponse{i}",
+            steps=(List[StepModel], Field()),
+            final_answer=(str, Field()),
+        )
+        models.append(new_model)
+        
+        # Convert to response format and check if it's in the cache
+        type_to_response_format_param(new_model)
+        assert new_model in _schema_cache
+    
+    # Record cache size with all models referenced
+    cache_size_with_references = len(_schema_cache)
+    
+    # Let the models go out of scope and trigger garbage collection
+    models = None
+    gc.collect()
+    
+    # After garbage collection, the cache should be significantly reduced
+    cache_size_after_gc = len(_schema_cache)
+    assert cache_size_after_gc < cache_size_with_references
+    # The cache size should be close to the initial size (with some tolerance)
+    assert cache_size_after_gc < cache_size_with_references / 2
\ No newline at end of file

From d5957256ef48060e44ed8b61b425d89726fee6ff Mon Sep 17 00:00:00 2001
From: mousberg <davidgelberg@gmail.com>
Date: Fri, 28 Mar 2025 22:53:39 +0000
Subject: [PATCH 2/6] Fix lint issues, remove unittest class, add proper type
 annotations

---
 src/openai/lib/_parsing/_completions.py |  2 +-
 tests/lib/_parsing/test_memory_leak.py  | 51 -------------------------
 2 files changed, 1 insertion(+), 52 deletions(-)

diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index fc7e6e5ab8..2f4dd47ba4 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -262,7 +262,7 @@ def type_to_response_format_param(
     else:
         raise TypeError(f"Unsupported response_format type - {response_format}")
 
-    schema_param = {
+    schema_param: ResponseFormatParam = {
         "type": "json_schema",
         "json_schema": {
             "schema": to_strict_json_schema(json_schema_type),
diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py
index 3e7dd46a22..4a83f4ec0b 100644
--- a/tests/lib/_parsing/test_memory_leak.py
+++ b/tests/lib/_parsing/test_memory_leak.py
@@ -1,56 +1,12 @@
-import unittest
 import gc
-import sys
-from unittest.mock import AsyncMock, patch, MagicMock
 from typing import List
 
 import pytest
 from pydantic import Field, create_model
 
-from openai.resources.beta.chat.completions import AsyncCompletions
 from openai.lib._parsing import type_to_response_format_param
 from openai.lib._parsing._completions import _schema_cache
 
-class TestMemoryLeak(unittest.TestCase):
-    def setUp(self):
-        # Clear the schema cache before each test
-        _schema_cache.clear()
-        
-    def test_schema_cache_with_models(self):
-        """Test if schema cache properly handles dynamic models and prevents memory leak"""
-        
-        StepModel = create_model(
-            "Step",
-            explanation=(str, Field()),
-            output=(str, Field()),
-        )
-        
-        # Create several models and ensure they're cached properly
-        models = []
-        for i in range(5):
-            model = create_model(
-                f"MathResponse{i}",
-                steps=(List[StepModel], Field()),
-                final_answer=(str, Field()),
-            )
-            models.append(model)
-            
-            # Convert model to response format param
-            param = type_to_response_format_param(model)
-            
-            # Check if the model is in the cache
-            self.assertIn(model, _schema_cache)
-        
-        # Test that all models are in the cache
-        self.assertEqual(len(_schema_cache), 5)
-        
-        # Let the models go out of scope and trigger garbage collection
-        models = None
-        gc.collect()
-        
-        # After garbage collection, the cache should be empty or reduced
-        # since we're using weakref.WeakKeyDictionary
-        self.assertLess(len(_schema_cache), 5)
 
 @pytest.mark.asyncio
 async def test_async_completions_parse_memory():
@@ -65,13 +21,6 @@ async def test_async_completions_parse_memory():
     _schema_cache.clear()
     initial_cache_size = len(_schema_cache)
     
-    # Create a mock client 
-    mock_client = MagicMock()
-    mock_client.chat.completions.create = AsyncMock()
-    
-    # Create the AsyncCompletions instance with our mock client
-    completions = AsyncCompletions(mock_client)
-    
     # Simulate the issue by creating multiple models and making calls
     models = []
     for i in range(10):

From bcb75a8ed336cd9f8744c448308c4006fd45be49 Mon Sep 17 00:00:00 2001
From: mousberg <davidgelberg@gmail.com>
Date: Thu, 23 Oct 2025 20:37:09 +0100
Subject: [PATCH 3/6] Fix lint errors and add type annotations

- Remove unused initial_cache_size variable
- Add type annotations for _schema_cache, test function return type
- Use del instead of assignment to None for better garbage collection
- Add type ignore comment for Pydantic dynamic model typing
- All ruff checks pass
---
 src/openai/lib/_parsing/_completions.py |  2 +-
 tests/lib/_parsing/test_memory_leak.py  | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index 6bc8c0b2e7..e946c7edf8 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -32,7 +32,7 @@
 from ...types.chat.chat_completion_message_function_tool_call import Function
 
 # Cache to store weak references to schema objects
-_schema_cache = weakref.WeakKeyDictionary()
+_schema_cache: weakref.WeakKeyDictionary[type, ResponseFormatParam] = weakref.WeakKeyDictionary()
 
 ResponseFormatT = TypeVar(
     "ResponseFormatT",
diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py
index 4a83f4ec0b..e60dcf8953 100644
--- a/tests/lib/_parsing/test_memory_leak.py
+++ b/tests/lib/_parsing/test_memory_leak.py
@@ -9,25 +9,25 @@
 
 
 @pytest.mark.asyncio
-async def test_async_completions_parse_memory():
+async def test_async_completions_parse_memory() -> None:
     """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models"""
+    # Create a base step model
     StepModel = create_model(
         "Step",
         explanation=(str, Field()),
         output=(str, Field()),
     )
     
-    # Clear the cache and record initial state
+    # Clear the cache before testing
     _schema_cache.clear()
-    initial_cache_size = len(_schema_cache)
-    
+
     # Simulate the issue by creating multiple models and making calls
-    models = []
+    models: list[type] = []
     for i in range(10):
         # Create a new dynamic model each time
         new_model = create_model(
             f"MathResponse{i}",
-            steps=(List[StepModel], Field()),
+            steps=(List[StepModel], Field()),  # type: ignore[valid-type]
             final_answer=(str, Field()),
         )
         models.append(new_model)
@@ -40,7 +40,7 @@ async def test_async_completions_parse_memory():
     cache_size_with_references = len(_schema_cache)
     
     # Let the models go out of scope and trigger garbage collection
-    models = None
+    del models
     gc.collect()
     
     # After garbage collection, the cache should be significantly reduced

From 35376a5e5ac11603abd5aecaf0b0b85293906e44 Mon Sep 17 00:00:00 2001
From: mousberg <davidgelberg@gmail.com>
Date: Wed, 11 Feb 2026 23:51:36 +0000
Subject: [PATCH 4/6] Fix memory leak in parse methods

Avoid creating new type objects at runtime in construct_type_unchecked calls.
Python's type cache holds references indefinitely, causing memory growth
when parsing responses in long-running processes.

Fixed across completions, responses, and streaming paths.
---
 src/openai/lib/_parsing/_completions.py       | 17 ++++++-------
 src/openai/lib/_parsing/_responses.py         | 24 ++++++++-----------
 src/openai/lib/streaming/chat/_completions.py |  9 +------
 3 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index e946c7edf8..bea6e35893 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -142,7 +142,7 @@ def parse_chat_completion(
 
         choices.append(
             construct_type_unchecked(
-                type_=cast(Any, ParsedChoice)[solve_response_format_t(response_format)],
+                type_=ParsedChoice[ResponseFormatT],
                 value={
                     **choice.to_dict(),
                     "message": {
@@ -157,15 +157,12 @@ def parse_chat_completion(
             )
         )
 
-    return cast(
-        ParsedChatCompletion[ResponseFormatT],
-        construct_type_unchecked(
-            type_=cast(Any, ParsedChatCompletion)[solve_response_format_t(response_format)],
-            value={
-                **chat_completion.to_dict(),
-                "choices": choices,
-            },
-        ),
+    return construct_type_unchecked(
+        type_=ParsedChatCompletion[ResponseFormatT],
+        value={
+            **chat_completion.to_dict(),
+            "choices": choices,
+        },
     )
 
 
diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py
index 8a1bf3cf2c..dc7dcc5c27 100644
--- a/src/openai/lib/_parsing/_responses.py
+++ b/src/openai/lib/_parsing/_responses.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING, Any, List, Iterable, cast
+from typing import TYPE_CHECKING, List, Iterable, cast
 from typing_extensions import TypeVar, assert_never
 
 import pydantic
@@ -12,7 +12,7 @@
 from ..._compat import PYDANTIC_V1, model_parse_json
 from ..._models import construct_type_unchecked
 from .._pydantic import is_basemodel_type, is_dataclass_like_type
-from ._completions import solve_response_format_t, type_to_response_format_param
+from ._completions import type_to_response_format_param
 from ...types.responses import (
     Response,
     ToolParam,
@@ -56,7 +56,6 @@ def parse_response(
     input_tools: Iterable[ToolParam] | Omit | None,
     response: Response | ParsedResponse[object],
 ) -> ParsedResponse[TextFormatT]:
-    solved_t = solve_response_format_t(text_format)
     output_list: List[ParsedResponseOutputItem[TextFormatT]] = []
 
     for output in response.output:
@@ -69,7 +68,7 @@ def parse_response(
 
                 content_list.append(
                     construct_type_unchecked(
-                        type_=cast(Any, ParsedResponseOutputText)[solved_t],
+                        type_=ParsedResponseOutputText[TextFormatT],
                         value={
                             **item.to_dict(),
                             "parsed": parse_text(item.text, text_format=text_format),
@@ -79,7 +78,7 @@ def parse_response(
 
             output_list.append(
                 construct_type_unchecked(
-                    type_=cast(Any, ParsedResponseOutputMessage)[solved_t],
+                    type_=ParsedResponseOutputMessage[TextFormatT],
                     value={
                         **output.to_dict(),
                         "content": content_list,
@@ -118,15 +117,12 @@ def parse_response(
         else:
             output_list.append(output)
 
-    return cast(
-        ParsedResponse[TextFormatT],
-        construct_type_unchecked(
-            type_=cast(Any, ParsedResponse)[solved_t],
-            value={
-                **response.to_dict(),
-                "output": output_list,
-            },
-        ),
+    return construct_type_unchecked(
+        type_=ParsedResponse[TextFormatT],
+        value={
+            **response.to_dict(),
+            "output": output_list,
+        },
     )
 
 
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index c4610e2120..04086ebfbd 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -33,7 +33,6 @@
     maybe_parse_content,
     parse_chat_completion,
     get_input_tool_by_name,
-    solve_response_format_t,
     parse_function_tool_arguments,
 )
 from ...._streaming import Stream, AsyncStream
@@ -658,13 +657,7 @@ def _content_done_events(
 
             events_to_fire.append(
                 build(
-                    # we do this dance so that when the `ContentDoneEvent` instance
-                    # is printed at runtime the class name will include the solved
-                    # type variable, e.g. `ContentDoneEvent[MyModelType]`
-                    cast(  # pyright: ignore[reportUnnecessaryCast]
-                        "type[ContentDoneEvent[ResponseFormatT]]",
-                        cast(Any, ContentDoneEvent)[solve_response_format_t(response_format)],
-                    ),
+                    ContentDoneEvent[ResponseFormatT],
                     type="content.done",
                     content=choice_snapshot.message.content,
                     parsed=parsed,

From abcb863cbce0f79bf34503e37721d151cda60d79 Mon Sep 17 00:00:00 2001
From: mousberg <davidgelberg@gmail.com>
Date: Thu, 12 Feb 2026 00:20:15 +0000
Subject: [PATCH 5/6] Fix lint and update test snapshots

---
 src/openai/lib/streaming/chat/_completions.py |  2 +-
 tests/lib/chat/test_completions.py            | 72 +++++++++----------
 tests/lib/chat/test_completions_streaming.py  | 66 ++++++++---------
 tests/lib/utils.py                            | 27 +------
 4 files changed, 72 insertions(+), 95 deletions(-)

diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index 04086ebfbd..e29450177b 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -2,7 +2,7 @@
 
 import inspect
 from types import TracebackType
-from typing import TYPE_CHECKING, Any, Generic, Callable, Iterable, Awaitable, AsyncIterator, cast
+from typing import TYPE_CHECKING, Generic, Callable, Iterable, Awaitable, AsyncIterator, cast
 from typing_extensions import Self, Iterator, assert_never
 
 from jiter import from_json
diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py
index afad5a1391..85bab4f095 100644
--- a/tests/lib/chat/test_completions.py
+++ b/tests/lib/chat/test_completions.py
@@ -50,13 +50,13 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte
 
     assert print_obj(completion, monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[NoneType](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[NoneType](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[NoneType](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I
@@ -120,13 +120,13 @@ class Location(BaseModel):
 
     assert print_obj(completion, monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[Location](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[Location](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[Location](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content='{"city":"San Francisco","temperature":65,"units":"f"}',
@@ -191,13 +191,13 @@ class Location(BaseModel):
 
     assert print_obj(completion, monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[Location](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[Location](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[Location](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content='{"city":"San Francisco","temperature":65,"units":"f"}',
@@ -266,11 +266,11 @@ class ColorDetection(BaseModel):
 
     assert print_obj(completion.choices[0], monkeypatch) == snapshot(
         """\
-ParsedChoice[ColorDetection](
+ParsedChoice(
     finish_reason='stop',
     index=0,
     logprobs=None,
-    message=ParsedChatCompletionMessage[ColorDetection](
+    message=ParsedChatCompletionMessage(
         annotations=None,
         audio=None,
         content='{"color":"red","hex_color_code":"#FF0000"}',
@@ -317,11 +317,11 @@ class Location(BaseModel):
     assert print_obj(completion.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='{"city":"San Francisco","temperature":64,"units":"f"}',
@@ -332,11 +332,11 @@ class Location(BaseModel):
             tool_calls=None
         )
     ),
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=1,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='{"city":"San Francisco","temperature":65,"units":"f"}',
@@ -347,11 +347,11 @@ class Location(BaseModel):
             tool_calls=None
         )
     ),
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=2,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='{"city":"San Francisco","temperature":63.0,"units":"f"}',
@@ -397,13 +397,13 @@ class CalendarEvent:
 
     assert print_obj(completion, monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[CalendarEvent](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[CalendarEvent](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[CalendarEvent](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content='{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}',
@@ -462,11 +462,11 @@ def test_pydantic_tool_model_all_types(client: OpenAI, respx_mock: MockRouter, m
 
     assert print_obj(completion.choices[0], monkeypatch) == snapshot(
         """\
-ParsedChoice[Query](
+ParsedChoice(
     finish_reason='tool_calls',
     index=0,
     logprobs=None,
-    message=ParsedChatCompletionMessage[Query](
+    message=ParsedChatCompletionMessage(
         annotations=None,
         audio=None,
         content=None,
@@ -576,11 +576,11 @@ class Location(BaseModel):
     assert print_obj(completion.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -627,11 +627,11 @@ class GetWeatherArgs(BaseModel):
     assert print_obj(completion.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -701,11 +701,11 @@ class GetStockPrice(BaseModel):
     assert print_obj(completion.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -784,11 +784,11 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch:
     assert print_obj(completion.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -866,13 +866,13 @@ class Location(BaseModel):
     assert isinstance(message.parsed.city, str)
     assert print_obj(completion, monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[Location](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[Location](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[Location](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content='{"city":"San Francisco","temperature":58,"units":"f"}',
@@ -943,13 +943,13 @@ class Location(BaseModel):
     assert isinstance(message.parsed.city, str)
     assert print_obj(completion, monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[Location](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[Location](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[Location](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content='{"city":"San Francisco","temperature":65,"units":"f"}',
diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py
index 548416dfe2..eb3a0973ac 100644
--- a/tests/lib/chat/test_completions_streaming.py
+++ b/tests/lib/chat/test_completions_streaming.py
@@ -63,11 +63,11 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I 
@@ -84,7 +84,7 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte
     )
     assert print_obj(listener.get_event_by_type("content.done"), monkeypatch) == snapshot(
         """\
-ContentDoneEvent[NoneType](
+ContentDoneEvent(
     content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I recommend 
 checking a reliable weather website or a weather app.",
     parsed=None,
@@ -140,13 +140,13 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream
 
     assert print_obj(listener.stream.get_final_completion(), monkeypatch) == snapshot(
         """\
-ParsedChatCompletion[Location](
+ParsedChatCompletion(
     choices=[
-        ParsedChoice[Location](
+        ParsedChoice(
             finish_reason='stop',
             index=0,
             logprobs=None,
-            message=ParsedChatCompletionMessage[Location](
+            message=ParsedChatCompletionMessage(
                 annotations=None,
                 audio=None,
                 content='{"city":"San Francisco","temperature":61,"units":"f"}',
@@ -181,7 +181,7 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream
     )
     assert print_obj(listener.get_event_by_type("content.done"), monkeypatch) == snapshot(
         """\
-ContentDoneEvent[Location](
+ContentDoneEvent(
     content='{"city":"San Francisco","temperature":61,"units":"f"}',
     parsed=Location(city='San Francisco', temperature=61.0, units='f'),
     type='content.done'
@@ -320,11 +320,11 @@ class Location(BaseModel):
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='{"city":"San Francisco","temperature":65,"units":"f"}',
@@ -335,11 +335,11 @@ class Location(BaseModel):
             tool_calls=None
         )
     ),
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=1,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='{"city":"San Francisco","temperature":61,"units":"f"}',
@@ -350,11 +350,11 @@ class Location(BaseModel):
             tool_calls=None
         )
     ),
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=2,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='{"city":"San Francisco","temperature":59,"units":"f"}',
@@ -426,11 +426,11 @@ class Location(BaseModel):
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -495,7 +495,7 @@ def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp
 
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot("""\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=ChoiceLogprobs(
@@ -505,7 +505,7 @@ def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeyp
             ],
             refusal=None
         ),
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='Foo!',
@@ -563,7 +563,7 @@ class Location(BaseModel):
 
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot("""\
 [
-    ParsedChoice[Location](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=ChoiceLogprobs(
@@ -617,7 +617,7 @@ class Location(BaseModel):
                 ChatCompletionTokenLogprob(bytes=[46], logprob=-0.57687104, token='.', top_logprobs=[])
             ]
         ),
-        message=ParsedChatCompletionMessage[Location](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -660,11 +660,11 @@ class GetWeatherArgs(BaseModel):
     assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[object](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[object](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -693,11 +693,11 @@ class GetWeatherArgs(BaseModel):
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -765,11 +765,11 @@ class GetStockPrice(BaseModel):
     assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[object](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[object](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -874,11 +874,11 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch:
     assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[object](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[object](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -926,11 +926,11 @@ def test_non_pydantic_response_format(client: OpenAI, respx_mock: MockRouter, mo
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content='\\n  {\\n    "location": "San Francisco, CA",\\n    "weather": {\\n      "temperature": "18°C",\\n      
@@ -987,11 +987,11 @@ def test_allows_non_strict_tools_but_no_parsing(
     assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='tool_calls',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content=None,
@@ -1047,11 +1047,11 @@ def streamer(client: OpenAI) -> Iterator[ChatCompletionChunk]:
     assert print_obj(state.get_final_completion().choices, monkeypatch) == snapshot(
         """\
 [
-    ParsedChoice[NoneType](
+    ParsedChoice(
         finish_reason='stop',
         index=0,
         logprobs=None,
-        message=ParsedChatCompletionMessage[NoneType](
+        message=ParsedChatCompletionMessage(
             annotations=None,
             audio=None,
             content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I 
diff --git a/tests/lib/utils.py b/tests/lib/utils.py
index e6b6a29434..f2ae6469f3 100644
--- a/tests/lib/utils.py
+++ b/tests/lib/utils.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import inspect
+import re
 from typing import Any, Iterable
 from typing_extensions import TypeAlias
 
@@ -28,27 +28,4 @@ def __repr_args__(self: pydantic.BaseModel) -> ReprArgs:
 
         string = rich_print_str(obj)
 
-        # we remove all `fn_name.<locals>.` occurrences
-        # so that we can share the same snapshots between
-        # pydantic v1 and pydantic v2 as their output for
-        # generic models differs, e.g.
-        #
-        # v2: `ParsedChatCompletion[test_parse_pydantic_model.<locals>.Location]`
-        # v1: `ParsedChatCompletion[Location]`
-        return clear_locals(string, stacklevel=2)
-
-
-def get_caller_name(*, stacklevel: int = 1) -> str:
-    frame = inspect.currentframe()
-    assert frame is not None
-
-    for i in range(stacklevel):
-        frame = frame.f_back
-        assert frame is not None, f"no {i}th frame"
-
-    return frame.f_code.co_name
-
-
-def clear_locals(string: str, *, stacklevel: int) -> str:
-    caller = get_caller_name(stacklevel=stacklevel + 1)
-    return string.replace(f"{caller}.<locals>.", "")
+        return re.sub(r"([A-Za-z_]\w*)\[[^\[\]]+\](?=\()", r"\1", string)

From 36ed57985a94484b972d08a38e71fe0d31487046 Mon Sep 17 00:00:00 2001
From: mousberg <davidgelberg@gmail.com>
Date: Thu, 12 Feb 2026 17:41:40 +0000
Subject: [PATCH 6/6] Remove schema cache and memory leak test

---
 src/openai/lib/_parsing/_completions.py | 15 +-------
 tests/lib/_parsing/test_memory_leak.py  | 50 -------------------------
 2 files changed, 1 insertion(+), 64 deletions(-)
 delete mode 100644 tests/lib/_parsing/test_memory_leak.py

diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index bea6e35893..bb16368f4a 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -2,7 +2,6 @@
 
 import json
 import logging
-import weakref
 from typing import TYPE_CHECKING, Any, Iterable, cast
 from typing_extensions import TypeVar, TypeGuard, assert_never
 
@@ -31,9 +30,6 @@
 from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
 from ...types.chat.chat_completion_message_function_tool_call import Function
 
-# Cache to store weak references to schema objects
-_schema_cache: weakref.WeakKeyDictionary[type, ResponseFormatParam] = weakref.WeakKeyDictionary()
-
 ResponseFormatT = TypeVar(
     "ResponseFormatT",
     # if it isn't given then we don't do any parsing
@@ -285,10 +281,6 @@ def type_to_response_format_param(
     # can only be a `type`
     response_format = cast(type, response_format)
 
-    # Check if we already have a schema for this type in the cache
-    if response_format in _schema_cache:
-        return _schema_cache[response_format]
-
     json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None
 
     if is_basemodel_type(response_format):
@@ -300,7 +292,7 @@ def type_to_response_format_param(
     else:
         raise TypeError(f"Unsupported response_format type - {response_format}")
 
-    schema_param: ResponseFormatParam = {
+    return {
         "type": "json_schema",
         "json_schema": {
             "schema": to_strict_json_schema(json_schema_type),
@@ -308,8 +300,3 @@ def type_to_response_format_param(
             "strict": True,
         },
     }
-
-    # Store a weak reference to the schema parameter
-    _schema_cache[response_format] = schema_param
-
-    return schema_param
diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py
deleted file mode 100644
index e60dcf8953..0000000000
--- a/tests/lib/_parsing/test_memory_leak.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import gc
-from typing import List
-
-import pytest
-from pydantic import Field, create_model
-
-from openai.lib._parsing import type_to_response_format_param
-from openai.lib._parsing._completions import _schema_cache
-
-
-@pytest.mark.asyncio
-async def test_async_completions_parse_memory() -> None:
-    """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models"""
-    # Create a base step model
-    StepModel = create_model(
-        "Step",
-        explanation=(str, Field()),
-        output=(str, Field()),
-    )
-    
-    # Clear the cache before testing
-    _schema_cache.clear()
-
-    # Simulate the issue by creating multiple models and making calls
-    models: list[type] = []
-    for i in range(10):
-        # Create a new dynamic model each time
-        new_model = create_model(
-            f"MathResponse{i}",
-            steps=(List[StepModel], Field()),  # type: ignore[valid-type]
-            final_answer=(str, Field()),
-        )
-        models.append(new_model)
-        
-        # Convert to response format and check if it's in the cache
-        type_to_response_format_param(new_model)
-        assert new_model in _schema_cache
-    
-    # Record cache size with all models referenced
-    cache_size_with_references = len(_schema_cache)
-    
-    # Let the models go out of scope and trigger garbage collection
-    del models
-    gc.collect()
-    
-    # After garbage collection, the cache should be significantly reduced
-    cache_size_after_gc = len(_schema_cache)
-    assert cache_size_after_gc < cache_size_with_references
-    # The cache size should be close to the initial size (with some tolerance)
-    assert cache_size_after_gc < cache_size_with_references / 2
\ No newline at end of file