Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions comfy/text_encoders/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,21 +400,25 @@ def preprocess_embed(self, embed, device):

def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, embeds_info=[]):
grid = None
position_ids = None
offset = 0
for e in embeds_info:
if e.get("type") == "image":
grid = e.get("extra", None)
position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
start = e.get("index")
position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
if position_ids is None:
position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
end = e.get("size") + start
len_max = int(grid.max()) // 2
start_next = len_max + start
position_ids[:, end:] = torch.arange(start_next, start_next + (embeds.shape[1] - end), device=embeds.device)
position_ids[0, start:end] = start
position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
position_ids[0, start:end] = start + offset
max_d = int(grid[0][1]) // 2
position_ids[1, start:end] = torch.arange(start, start + max_d, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
max_d = int(grid[0][2]) // 2
position_ids[2, start:end] = torch.arange(start, start + max_d, device=embeds.device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start]
position_ids[2, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start]
offset += len_max - (end - start)

if grid is None:
position_ids = None
Expand Down
55 changes: 55 additions & 0 deletions comfy_extras/nodes_qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,61 @@ def encode(self, clip, prompt, vae=None, image=None):
return (conditioning, )


class TextEncodeQwenImageEditPlus:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}),
},
"optional": {"vae": ("VAE", ),
"image1": ("IMAGE", ),
"image2": ("IMAGE", ),
"image3": ("IMAGE", ),
}}

RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "encode"

CATEGORY = "advanced/conditioning"

def encode(self, clip, prompt, vae=None, image1=None, image2=None, image3=None):
ref_latents = []
images = [image1, image2, image3]
images_vl = []
llama_template = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
image_prompt = ""

for i, image in enumerate(images):
if image is not None:
samples = image.movedim(-1, 1)
total = int(384 * 384)

scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
width = round(samples.shape[3] * scale_by)
height = round(samples.shape[2] * scale_by)

s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
images_vl.append(s.movedim(1, -1))
if vae is not None:
total = int(1024 * 1024)
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
width = round(samples.shape[3] * scale_by / 8.0) * 8
height = round(samples.shape[2] * scale_by / 8.0) * 8

s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
ref_latents.append(vae.encode(s.movedim(1, -1)[:, :, :, :3]))

image_prompt += "Picture {}: <|vision_start|><|image_pad|><|vision_end|>".format(i + 1)

tokens = clip.tokenize(image_prompt + prompt, images=images_vl, llama_template=llama_template)
conditioning = clip.encode_from_tokens_scheduled(tokens)
if len(ref_latents) > 0:
conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": ref_latents}, append=True)
return (conditioning, )


NODE_CLASS_MAPPINGS = {
"TextEncodeQwenImageEdit": TextEncodeQwenImageEdit,
"TextEncodeQwenImageEditPlus": TextEncodeQwenImageEditPlus,
}
4 changes: 2 additions & 2 deletions comfy_extras/nodes_wan.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,7 @@ def execute(cls, positive, negative, vae, width, height, length, batch_size, con
background_video = background_video[video_frame_offset:]
background_video = comfy.utils.common_upscale(background_video[:length].movedim(-1, 1), width, height, "area", "center").movedim(1, -1)
if background_video.shape[0] > ref_images_num:
image[ref_images_num:background_video.shape[0] - ref_images_num] = background_video[ref_images_num:]
image[ref_images_num:background_video.shape[0]] = background_video[ref_images_num:]

mask_refmotion = torch.ones((1, 1, latent_length * 4, concat_latent_image.shape[-2], concat_latent_image.shape[-1]), device=mask.device, dtype=mask.dtype)
if continue_motion is not None:
Expand All @@ -1229,7 +1229,7 @@ def execute(cls, positive, negative, vae, width, height, length, batch_size, con
character_mask = character_mask.unsqueeze(1)
character_mask = comfy.utils.common_upscale(character_mask[:, :, :length], concat_latent_image.shape[-1], concat_latent_image.shape[-2], "nearest-exact", "center")
if character_mask.shape[2] > ref_images_num:
mask_refmotion[:, :, ref_images_num:character_mask.shape[2] + ref_images_num] = character_mask[:, :, ref_images_num:]
mask_refmotion[:, :, ref_images_num:character_mask.shape[2]] = character_mask[:, :, ref_images_num:]

concat_latent_image = torch.cat((concat_latent_image, vae.encode(image[:, :, :, :3])), dim=2)

Expand Down
9 changes: 8 additions & 1 deletion server.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,14 @@ async def get_history(request):
max_items = request.rel_url.query.get("max_items", None)
if max_items is not None:
max_items = int(max_items)
return web.json_response(self.prompt_queue.get_history(max_items=max_items))

offset = request.rel_url.query.get("offset", None)
if offset is not None:
offset = int(offset)
else:
offset = -1

return web.json_response(self.prompt_queue.get_history(max_items=max_items, offset=offset))

@routes.get("/history/{prompt_id}")
async def get_history_prompt_id(request):
Expand Down
105 changes: 104 additions & 1 deletion tests/execution/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,21 @@ def get_history(self, prompt_id):
with urllib.request.urlopen("http://{}/history/{}".format(self.server_address, prompt_id)) as response:
return json.loads(response.read())

def get_all_history(self, max_items=None, offset=None):
url = "http://{}/history".format(self.server_address)
params = {}
if max_items is not None:
params["max_items"] = max_items
if offset is not None:
params["offset"] = offset

if params:
url_values = urllib.parse.urlencode(params)
url = "{}?{}".format(url, url_values)

with urllib.request.urlopen(url) as response:
return json.loads(response.read())

def set_test_name(self, name):
self.test_name = name

Expand Down Expand Up @@ -498,7 +513,6 @@ def test_output_reuse(self, client: ComfyClient, builder: GraphBuilder):
assert len(images1) == 1, "Should have 1 image"
assert len(images2) == 1, "Should have 1 image"


# This tests that only constant outputs are used in the call to `IS_CHANGED`
def test_is_changed_with_outputs(self, client: ComfyClient, builder: GraphBuilder):
g = builder
Expand Down Expand Up @@ -762,3 +776,92 @@ def test_partial_execution_empty_list(self, client: ComfyClient, builder: GraphB
except urllib.error.HTTPError:
pass # Expected behavior

def _create_history_item(self, client, builder):
g = GraphBuilder(prefix="offset_test")
input_node = g.node(
"StubImage", content="BLACK", height=32, width=32, batch_size=1
)
g.node("SaveImage", images=input_node.out(0))
return client.run(g)

def test_offset_returns_different_items_than_beginning_of_history(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test that offset skips items at the beginning"""
for _ in range(5):
self._create_history_item(client, builder)

first_two = client.get_all_history(max_items=2, offset=0)
next_two = client.get_all_history(max_items=2, offset=2)

assert set(first_two.keys()).isdisjoint(
set(next_two.keys())
), "Offset should skip initial items"

def test_offset_beyond_history_length_returns_empty(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test offset larger than total history returns empty result"""
self._create_history_item(client, builder)

result = client.get_all_history(offset=100)
assert len(result) == 0, "Large offset should return no items"

def test_offset_at_exact_history_length_returns_empty(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test offset equal to history length returns empty"""
for _ in range(3):
self._create_history_item(client, builder)

all_history = client.get_all_history()
result = client.get_all_history(offset=len(all_history))
assert len(result) == 0, "Offset at history length should return empty"

def test_offset_zero_equals_no_offset_parameter(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test offset=0 behaves same as omitting offset"""
self._create_history_item(client, builder)

with_zero = client.get_all_history(offset=0)
without_offset = client.get_all_history()

assert with_zero == without_offset, "offset=0 should equal no offset"

def test_offset_without_max_items_skips_from_beginning(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test offset alone (no max_items) returns remaining items"""
for _ in range(4):
self._create_history_item(client, builder)

all_items = client.get_all_history()
offset_items = client.get_all_history(offset=2)

assert (
len(offset_items) == len(all_items) - 2
), "Offset should skip specified number of items"

def test_offset_with_max_items_returns_correct_window(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test offset + max_items returns correct slice of history"""
for _ in range(6):
self._create_history_item(client, builder)

window = client.get_all_history(max_items=2, offset=1)
assert len(window) <= 2, "Should respect max_items limit"

def test_offset_near_end_returns_remaining_items_only(
self, client: ComfyClient, builder: GraphBuilder
):
"""Test offset near end of history returns only remaining items"""
for _ in range(3):
self._create_history_item(client, builder)

all_history = client.get_all_history()
# Offset to near the end
result = client.get_all_history(max_items=5, offset=len(all_history) - 1)

assert len(result) <= 1, "Should return at most 1 item when offset is near end"
Loading