From 48e5ea1dfd23a9cb5d118d7af661b026d66743bc Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Wed, 7 Jan 2026 15:39:20 -0800
Subject: [PATCH 01/11] model_patcher: Remove confusing load stat (#11710)

If the loader passes 1e32 as the usable memory size, it means force
the full load. This happens with CPU loads and a few other misc cases.
Removing the confusing number and just leave the other details.
---
 comfy/model_patcher.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 93d26c690665..4528814adc43 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -790,11 +790,12 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                 for param in params:
                     self.pin_weight_to_device("{}.{}".format(n, param))
 
+            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
             if lowvram_counter > 0:
-                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
                 self.model.model_lowvram = True
             else:
-                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
                 self.model.model_lowvram = False
                 if full_load:
                     self.model.to(device_to)

From 1c705f7bfb0fb59f6213dfb85ec5d5dc2ce4300e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Thu, 8 Jan 2026 01:39:59 +0200
Subject: [PATCH 02/11] Add device selection for LTXAVTextEncoderLoader
 (#11700)

---
 comfy_extras/nodes_lt_audio.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_lt_audio.py b/comfy_extras/nodes_lt_audio.py
index 26b0160d2d98..1966fd1bf8f6 100644
--- a/comfy_extras/nodes_lt_audio.py
+++ b/comfy_extras/nodes_lt_audio.py
@@ -185,6 +185,10 @@ def define_schema(cls) -> io.Schema:
                 io.Combo.Input(
                     "ckpt_name",
                     options=folder_paths.get_filename_list("checkpoints"),
+                ),
+                io.Combo.Input(
+                    "device",
+                    options=["default", "cpu"],
                 )
             ],
             outputs=[io.Clip.Output()],
@@ -197,7 +201,11 @@ def execute(cls, text_encoder, ckpt_name, device="default"):
         clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
         clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)
 
-        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
+        model_options = {}
+        if device == "cpu":
+            model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")
+
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
         return io.NodeOutput(clip)
 
 

From 34751fe9f9ade0c715768202c19211dc0c72e760 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 7 Jan 2026 16:12:15 -0800
Subject: [PATCH 03/11] Lower ltxv text encoder vram use. (#11713)

---
 comfy/text_encoders/lt.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py
index 130ebaeaed94..dc0694e0e5b4 100644
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -98,10 +98,13 @@ def encode_token_weights(self, token_weight_pairs):
 
         out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
         out_device = out.device
+        if comfy.model_management.should_use_bf16(self.execution_device):
+            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
         out = out.movedim(1, -1).to(self.execution_device)
         out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
         out = out.reshape((out.shape[0], out.shape[1], -1))
         out = self.text_embedding_projection(out)
+        out = out.float()
         out_vid = self.video_embeddings_connector(out)[0]
         out_audio = self.audio_embeddings_connector(out)[0]
         out = torch.concat((out_vid, out_audio), dim=-1)

From 007b87e7ac29e55ce0ad2c436f5ae68f3a078080 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 7 Jan 2026 16:48:47 -0800
Subject: [PATCH 04/11] Bump required comfy-kitchen version. (#11714)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index bc8346bcf689..13e95afa06c9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.3
+comfy-kitchen>=0.2.5
 
 #non essential dependencies:
 kornia>=0.7.1

From 3cd19e99c10a25cf6e6b51b82e3c16c501733b8c Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 7 Jan 2026 17:04:56 -0800
Subject: [PATCH 05/11] Increase ltxav mem estimation by a bit. (#11715)

---
 comfy/supported_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index ee9a7900190d..d44c0bc370c6 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -845,7 +845,7 @@ class LTXAV(LTXV):
 
     def __init__(self, unet_config):
         super().__init__(unet_config)
-        self.memory_usage_factor = 0.055  # TODO
+        self.memory_usage_factor = 0.061  # TODO
 
     def get_model(self, state_dict, prefix="", device=None):
         out = model_base.LTXAV(self, device=device)

From 25bc1b5b57d61930d6ab60d8cf7e9241d26e4fe9 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 7 Jan 2026 17:11:22 -0800
Subject: [PATCH 06/11] Add memory estimation function to ltxav text encoder.
 (#11716)

---
 comfy/sd.py               | 11 +++++++----
 comfy/text_encoders/lt.py |  8 ++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index 32157e18b24a..efde3839cd96 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -218,7 +218,7 @@ def encode_from_tokens_scheduled(self, tokens, unprojected=False, add_dict: dict
             if unprojected:
                 self.cond_stage_model.set_clip_options({"projected_pooled": False})
 
-            self.load_model()
+            self.load_model(tokens)
             self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
             all_hooks.reset()
             self.patcher.patch_hooks(None)
@@ -266,7 +266,7 @@ def encode_from_tokens(self, tokens, return_pooled=False, return_dict=False):
         if return_pooled == "unprojected":
             self.cond_stage_model.set_clip_options({"projected_pooled": False})
 
-        self.load_model()
+        self.load_model(tokens)
         self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
         o = self.cond_stage_model.encode_token_weights(tokens)
         cond, pooled = o[:2]
@@ -299,8 +299,11 @@ def get_sd(self):
             sd_clip[k] = sd_tokenizer[k]
         return sd_clip
 
-    def load_model(self):
-        model_management.load_model_gpu(self.patcher)
+    def load_model(self, tokens={}):
+        memory_used = 0
+        if hasattr(self.cond_stage_model, "memory_estimation_function"):
+            memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
+        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
         return self.patcher
 
     def get_key_patches(self):
diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py
index dc0694e0e5b4..776e25e975c2 100644
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -121,6 +121,14 @@ def load_sd(self, sd):
 
             return self.load_state_dict(sdo, strict=False)
 
+    def memory_estimation_function(self, token_weight_pairs, device=None):
+        constant = 6.0
+        if comfy.model_management.should_use_bf16(device):
+            constant /= 2.0
+
+        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        return num_tokens * constant * 1024 * 1024
 
 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
     class LTXAVTEModel_(LTXAVTEModel):

From b6c79a648a013f477f514f61580d1a06220b15eb Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Wed, 7 Jan 2026 18:01:16 -0800
Subject: [PATCH 07/11] ops: Fix offloading with FP8MM performance (#11697)

This logic was checking comfy_cast_weights, and going straight to
to the forward_comfy_cast_weights implementation without
attempting to downscale input to fp8 in the event comfy_cast_weights
is set.

The main reason comfy_cast_weights would be set would be for async
offload, which is not a good reason to nix FP8MM.

So instead, and together the underlying exclusions for FP8MM which
are:

* having a weight_function (usually LowVramPatch)
* force_cast_weights (compute dtype override)
* the weight is not Quantized
* the input is already quantized
* the model or layer has MM explictily disabled.

If you get past all of those exclusions, quantize the input tensor.
Then hand the new input, quantized or not off to
forward_comfy_cast_weights to handle it. If the weight is offloaded
but input is quantized you will get an offloaded MM8.
---
 comfy/model_patcher.py |  1 +
 comfy/ops.py           | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 4528814adc43..f6b80a40f1f9 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -718,6 +718,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
                             continue
 
                 cast_weight = self.force_cast_weights
+                m.comfy_force_cast_weights = self.force_cast_weights
                 if lowvram_weight:
                     if hasattr(m, "comfy_cast_weights"):
                         m.weight_function = []
diff --git a/comfy/ops.py b/comfy/ops.py
index cd536e22d7d0..8156c42ffd18 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -654,29 +654,29 @@ def forward(self, input, *args, **kwargs):
                 run_every_op()
 
                 input_shape = input.shape
-                tensor_3d = input.ndim == 3
-
-                if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
-                    return self.forward_comfy_cast_weights(input, *args, **kwargs)
+                reshaped_3d = False
 
                 if (getattr(self, 'layout_type', None) is not None and
-                    not isinstance(input, QuantizedTensor)):
+                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
+                    not getattr(self, 'comfy_force_cast_weights', False) and
+                    len(self.weight_function) == 0 and len(self.bias_function) == 0):
 
                     # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    if tensor_3d:
-                        input = input.reshape(-1, input_shape[2])
-
-                    if input.ndim != 2:
-                        # Fall back to comfy_cast_weights for non-2D tensors
-                        return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)
+                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input
 
-                    # dtype is now implicit in the layout class
-                    input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
+                    # Fall back to non-quantized for non-2D tensors
+                    if input_reshaped.ndim == 2:
+                        reshaped_3d = input.ndim == 3
+                        # dtype is now implicit in the layout class
+                        scale = getattr(self, 'input_scale', None)
+                        if scale is not None:
+                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
+                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)
 
-                output = self._forward(input, self.weight, self.bias)
+                output = self.forward_comfy_cast_weights(input)
 
                 # Reshape output back to 3D if input was 3D
-                if tensor_3d:
+                if reshaped_3d:
                     output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))
 
                 return output

From 21e842508733809354a7b04944b2995ed1169370 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 7 Jan 2026 18:07:26 -0800
Subject: [PATCH 08/11] Add warning for old pytorch. (#11718)

---
 comfy/quant_ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index 5a17bc6f50c2..8324be42a0a9 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -19,6 +19,7 @@
         cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
         if cuda_version < (13,):
             ck.registry.disable("cuda")
+            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
 
     ck.registry.disable("triton")
     for k, v in ck.list_backends().items():

From fcd9a236b091bd4e77b177134ddfcf7d7dbd71fd Mon Sep 17 00:00:00 2001
From: ComfyUI Wiki <contact@comfyui-wiki.com>
Date: Thu, 8 Jan 2026 10:22:23 +0800
Subject: [PATCH 09/11] Update template to 0.7.69 (#11719)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 13e95afa06c9..49567ad61d58 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.67
+comfyui-workflow-templates==0.7.69
 comfyui-embedded-docs==0.3.1
 torch
 torchsde

From ac12f77bed7bbbaf20289533bf7c0bff275e4a41 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 7 Jan 2026 22:10:08 -0500
Subject: [PATCH 10/11] ComfyUI version v0.8.1

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 750673f08208..4eb6070fe55d 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.8.0"
+__version__ = "0.8.1"
diff --git a/pyproject.toml b/pyproject.toml
index 951c2c97862c..0037abd6c1ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.8.0"
+version = "0.8.1"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"

From 50d6e1caf401bf72dca1e9df7e194e722e1bd98b Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 7 Jan 2026 20:07:05 -0800
Subject: [PATCH 11/11] Tweak ltxv vae mem estimation. (#11722)

---
 comfy/sd.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index efde3839cd96..5a7221620290 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -479,8 +479,8 @@ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None)
                 self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
                 self.latent_channels = 128
                 self.latent_dim = 3
-                self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
-                self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.memory_used_decode = lambda shape, dtype: (1200 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (80 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
                 self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32)
                 self.upscale_index_formula = (8, 32, 32)
                 self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)