From 2890dd84807026c96642ac594beda784368723b4 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Wed, 28 Jan 2026 22:23:17 +0100 Subject: [PATCH 01/28] add metadata field to input/output param --- src/diffusers/modular_pipelines/modular_pipeline_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/diffusers/modular_pipelines/modular_pipeline_utils.py b/src/diffusers/modular_pipelines/modular_pipeline_utils.py index f3b12d716160..5481790a9405 100644 --- a/src/diffusers/modular_pipelines/modular_pipeline_utils.py +++ b/src/diffusers/modular_pipelines/modular_pipeline_utils.py @@ -520,6 +520,7 @@ class InputParam: required: bool = False description: str = "" kwargs_type: str = None + metadata: Dict[str, Any] = None def __repr__(self): return f"<{self.name}: {'required' if self.required else 'optional'}, default={self.default}>" @@ -553,6 +554,7 @@ class OutputParam: type_hint: Any = None description: str = "" kwargs_type: str = None + metadata: Dict[str, Any] = None def __repr__(self): return ( From d2bee6a57e3b41273fdd34a1b17501e6967ac44d Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Wed, 28 Jan 2026 22:24:16 +0100 Subject: [PATCH 02/28] refactor mellonparam: move the template outside, add metaclass, define some generic template for custom node --- .../modular_pipelines/mellon_node_utils.py | 760 +++++------------- 1 file changed, 191 insertions(+), 569 deletions(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index f848afe9a3ae..5a0ac83444bc 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -19,22 +19,106 @@ logger = logging.getLogger(__name__) +def _name_to_label(name: str) -> str: + """Convert snake_case name to Title Case label.""" + return name.replace("_", " ").title() + + +# Template definitions for standard diffuser pipeline parameters +MELLON_PARAM_TEMPLATES = { + # Image I/O + "image": {"label": "Image", "type": "image", "display": "input", "required_block_params": ["image"]}, + "images": {"label": "Images", "type": "image", "display": "output", "required_block_params": ["images"]}, + "control_image": {"label": "Control Image", "type": "image", "display": "input", "required_block_params": ["control_image"]}, + # Latents + "latents": {"label": "Latents", "type": "latents", "display": "input", "required_block_params": ["latents"]}, + "image_latents": {"label": "Image Latents", "type": "latents", "display": "input", "required_block_params": ["image_latents"]}, + "first_frame_latents": {"label": "First Frame Latents", "type": "latents", "display": "input", "required_block_params": ["first_frame_latents"]}, + "latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"}, + # Image Latents with Strength + "image_latents_with_strength": { + "name": "image_latents", # name is not same as template key + "label": "Image Latents", + "type": "latents", + "display": "input", + "onChange": {"false": ["height", "width"], "true": ["strength"]}, + "required_block_params": ["image_latents", "strength"], + }, + # Embeddings + "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"}, + "image_embeds": {"label": "Image Embeddings", "type": "image_embeds", "display": "output", "required_block_params": ["image_embeds"]}, + # Text inputs + "prompt": {"label": "Prompt", "type": "string", "display": "textarea", "default": "", "required_block_params": ["prompt"]}, + "negative_prompt": {"label": "Negative Prompt", "type": "string", "display": "textarea", "default": "", "required_block_params": ["negative_prompt"]}, + # Numeric params + "guidance_scale": {"label": "Guidance Scale", "type": "float", "display": "slider", "default": 5.0, "min": 1.0, "max": 30.0, "step": 0.1}, + "strength": {"label": "Strength", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["strength"]}, + "height": {"label": "Height", "type": "int", "default": 1024, "min": 64, "step": 8, "required_block_params": ["height"]}, + "width": {"label": "Width", "type": "int", "default": 1024, "min": 64, "step": 8, "required_block_params": ["width"]}, + "seed": {"label": "Seed", "type": "int", "default": 0, "min": 0, "max": 4294967295, "display": "random", "required_block_params": ["generator"]}, + "num_inference_steps": {"label": "Steps", "type": "int", "default": 25, "min": 1, "max": 100, "display": "slider", "required_block_params": ["num_inference_steps"]}, + "num_frames": {"label": "Frames", "type": "int", "default": 81, "min": 1, "max": 480, "display": "slider", "required_block_params": ["num_frames"]}, + "layers": {"label": "Layers", "type": "int", "default": 4, "min": 1, "max": 10, "display": "slider", "required_block_params": ["layers"]}, + # ControlNet + "controlnet_conditioning_scale": {"label": "Controlnet Conditioning Scale", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["controlnet_conditioning_scale"]}, + "control_guidance_start": {"label": "Control Guidance Start", "type": "float", "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["control_guidance_start"]}, + "control_guidance_end": {"label": "Control Guidance End", "type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["control_guidance_end"]}, + # Video + "videos": {"label": "Videos", "type": "video", "display": "output", "required_block_params": ["videos"]}, + # Models + "vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["vae"]}, + "image_encoder": {"label": "Image Encoder", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["image_encoder"]}, + "unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"}, + "scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"}, + "controlnet": {"label": "ControlNet Model", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["controlnet"]}, + "text_encoders": {"label": "Text Encoders", "type": "diffusers_auto_models", "display": "input", "required_block_params": ["text_encoder"]}, + # Bundles/Custom + "controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "input", "required_block_params": "controlnet_image"}, + "ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"}, + "guider": {"label": "Guider", "type": "custom_guider", "display": "input", "onChange": {False: ["guidance_scale"], True: []}}, + "doc": {"label": "Doc", "type": "string", "display": "output"}, +} + + +class MellonParamMeta(type): + """Metaclass that enables MellonParam.template_name(**overrides) syntax.""" + + def __getattr__(cls, name: str): + if name in MELLON_PARAM_TEMPLATES: + + def factory(**overrides): + template = MELLON_PARAM_TEMPLATES[name] + # Use template's name if specified, otherwise use the key + params = {"name": template.get("name", name), **template, **overrides} + return cls(**params) + + return factory + + raise AttributeError(f"type object 'MellonParam' has no attribute '{name}'") + @dataclass(frozen=True) -class MellonParam: +class MellonParam(metaclass=MellonParamMeta): """ - Parameter definition for Mellon nodes. - - Use factory methods for common params (e.g., MellonParam.seed()) or create custom ones with - MellonParam(name="...", label="...", type="..."). - - Example: - ```python - # Custom param - MellonParam(name="my_param", label="My Param", type="float", default=0.5) - # Output in Mellon node definition: - # "my_param": {"label": "My Param", "type": "float", "default": 0.5} - ``` + Parameter definition for Mellon nodes. + + Usage: +```python + # From template (standard diffuser params) + MellonParam.seed() + MellonParam.prompt(default="a cat") + MellonParam.latents(display="output") + + # Generic inputs (for custom blocks) + MellonParam.Input.slider("my_scale", default=1.0, min=0.0, max=2.0) + MellonParam.Input.dropdown("mode", options=["fast", "slow"]) + + # Generic outputs + MellonParam.Output.image("result_images") + + # Fully custom + MellonParam(name="custom", label="Custom", type="float", default=0.5) +``` """ name: str @@ -53,577 +137,115 @@ class MellonParam: required_block_params: Optional[Union[str, List[str]]] = None def to_dict(self) -> Dict[str, Any]: - """Convert to dict for Mellon schema, excluding None values and name.""" + """Convert to dict for Mellon schema, excluding None values and internal fields.""" data = asdict(self) return {k: v for k, v in data.items() if v is not None and k not in ("name", "required_block_params")} - @classmethod - def image(cls) -> "MellonParam": - """ - Image input parameter. - - Mellon node definition: - "image": {"label": "Image", "type": "image", "display": "input"} - """ - return cls(name="image", label="Image", type="image", display="input", required_block_params=["image"]) - - @classmethod - def images(cls) -> "MellonParam": - """ - Images output parameter. - - Mellon node definition: - "images": {"label": "Images", "type": "image", "display": "output"} - """ - return cls(name="images", label="Images", type="image", display="output", required_block_params=["images"]) - - @classmethod - def control_image(cls, display: str = "input") -> "MellonParam": - """ - Control image parameter for ControlNet. - - Mellon node definition (display="input"): - "control_image": {"label": "Control Image", "type": "image", "display": "input"} - """ - return cls( - name="control_image", - label="Control Image", - type="image", - display=display, - required_block_params=["control_image"], - ) - - @classmethod - def latents(cls, display: str = "input") -> "MellonParam": - """ - Latents parameter. - - Mellon node definition (display="input"): - "latents": {"label": "Latents", "type": "latents", "display": "input"} - - Mellon node definition (display="output"): - "latents": {"label": "Latents", "type": "latents", "display": "output"} - """ - return cls(name="latents", label="Latents", type="latents", display=display, required_block_params=["latents"]) - - @classmethod - def image_latents(cls, display: str = "input") -> "MellonParam": - """ - Image latents parameter for img2img workflows. - - Mellon node definition (display="input"): - "image_latents": {"label": "Image Latents", "type": "latents", "display": "input"} - """ - return cls( - name="image_latents", - label="Image Latents", - type="latents", - display=display, - required_block_params=["image_latents"], - ) - - @classmethod - def first_frame_latents(cls, display: str = "input") -> "MellonParam": - """ - First frame latents for video generation. - - Mellon node definition (display="input"): - "first_frame_latents": {"label": "First Frame Latents", "type": "latents", "display": "input"} - """ - return cls( - name="first_frame_latents", - label="First Frame Latents", - type="latents", - display=display, - required_block_params=["first_frame_latents"], - ) - - @classmethod - def image_latents_with_strength(cls) -> "MellonParam": - """ - Image latents with strength-based onChange behavior. When connected, shows strength slider; when disconnected, - shows height/width. - - Mellon node definition: - "image_latents": { - "label": "Image Latents", "type": "latents", "display": "input", "onChange": {"false": ["height", - "width"], "true": ["strength"]} - } - """ - return cls( - name="image_latents", - label="Image Latents", - type="latents", - display="input", - onChange={"false": ["height", "width"], "true": ["strength"]}, - required_block_params=["image_latents", "strength"], - ) - - @classmethod - def latents_preview(cls) -> "MellonParam": - """ - Latents preview output for visualizing latents in the UI. - - Mellon node definition: - "latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"} - """ - return cls(name="latents_preview", label="Latents Preview", type="latent", display="output") - - @classmethod - def embeddings(cls, display: str = "output") -> "MellonParam": - """ - Text embeddings parameter. - - Mellon node definition (display="output"): - "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"} - - Mellon node definition (display="input"): - "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "input"} - """ - return cls(name="embeddings", label="Text Embeddings", type="embeddings", display=display) - - @classmethod - def image_embeds(cls, display: str = "output") -> "MellonParam": - """ - Image embeddings parameter for IP-Adapter workflows. - - Mellon node definition (display="output"): - "image_embeds": {"label": "Image Embeddings", "type": "image_embeds", "display": "output"} - """ - return cls( - name="image_embeds", - label="Image Embeddings", - type="image_embeds", - display=display, - required_block_params=["image_embeds"], - ) - - @classmethod - def controlnet_conditioning_scale(cls, default: float = 0.5) -> "MellonParam": - """ - ControlNet conditioning scale slider. - - Mellon node definition (default=0.5): - "controlnet_conditioning_scale": { - "label": "Controlnet Conditioning Scale", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, - "step": 0.01 - } - """ - return cls( - name="controlnet_conditioning_scale", - label="Controlnet Conditioning Scale", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["controlnet_conditioning_scale"], - ) - - @classmethod - def control_guidance_start(cls, default: float = 0.0) -> "MellonParam": - """ - Control guidance start timestep. - - Mellon node definition (default=0.0): - "control_guidance_start": { - "label": "Control Guidance Start", "type": "float", "default": 0.0, "min": 0.0, "max": 1.0, "step": - 0.01 - } - """ - return cls( - name="control_guidance_start", - label="Control Guidance Start", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["control_guidance_start"], - ) - - @classmethod - def control_guidance_end(cls, default: float = 1.0) -> "MellonParam": - """ - Control guidance end timestep. - - Mellon node definition (default=1.0): - "control_guidance_end": { - "label": "Control Guidance End", "type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01 - } - """ - return cls( - name="control_guidance_end", - label="Control Guidance End", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["control_guidance_end"], - ) - - @classmethod - def prompt(cls, default: str = "") -> "MellonParam": - """ - Text prompt input as textarea. - - Mellon node definition (default=""): - "prompt": {"label": "Prompt", "type": "string", "default": "", "display": "textarea"} - """ - return cls( - name="prompt", - label="Prompt", - type="string", - default=default, - display="textarea", - required_block_params=["prompt"], - ) - - @classmethod - def negative_prompt(cls, default: str = "") -> "MellonParam": - """ - Negative prompt input as textarea. - - Mellon node definition (default=""): - "negative_prompt": {"label": "Negative Prompt", "type": "string", "default": "", "display": "textarea"} - """ - return cls( - name="negative_prompt", - label="Negative Prompt", - type="string", - default=default, - display="textarea", - required_block_params=["negative_prompt"], - ) - - @classmethod - def strength(cls, default: float = 0.5) -> "MellonParam": - """ - Denoising strength for img2img. - - Mellon node definition (default=0.5): - "strength": {"label": "Strength", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01} - """ - return cls( - name="strength", - label="Strength", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["strength"], - ) - - @classmethod - def guidance_scale(cls, default: float = 5.0) -> "MellonParam": - """ - CFG guidance scale slider. - - Mellon node definition (default=5.0): - "guidance_scale": { - "label": "Guidance Scale", "type": "float", "display": "slider", "default": 5.0, "min": 1.0, "max": - 30.0, "step": 0.1 - } - """ - return cls( - name="guidance_scale", - label="Guidance Scale", - type="float", - display="slider", - default=default, - min=1.0, - max=30.0, - step=0.1, - ) - - @classmethod - def height(cls, default: int = 1024) -> "MellonParam": - """ - Image height in pixels. - - Mellon node definition (default=1024): - "height": {"label": "Height", "type": "int", "default": 1024, "min": 64, "step": 8} - """ - return cls( - name="height", - label="Height", - type="int", - default=default, - min=64, - step=8, - required_block_params=["height"], - ) - - @classmethod - def width(cls, default: int = 1024) -> "MellonParam": - """ - Image width in pixels. - - Mellon node definition (default=1024): - "width": {"label": "Width", "type": "int", "default": 1024, "min": 64, "step": 8} - """ - return cls( - name="width", label="Width", type="int", default=default, min=64, step=8, required_block_params=["width"] - ) - - @classmethod - def seed(cls, default: int = 0) -> "MellonParam": - """ - Random seed with randomize button. - - Mellon node definition (default=0): - "seed": { - "label": "Seed", "type": "int", "default": 0, "min": 0, "max": 4294967295, "display": "random" - } - """ - return cls( - name="seed", - label="Seed", - type="int", - default=default, - min=0, - max=4294967295, - display="random", - required_block_params=["generator"], - ) - - @classmethod - def num_inference_steps(cls, default: int = 25) -> "MellonParam": - """ - Number of denoising steps slider. - - Mellon node definition (default=25): - "num_inference_steps": { - "label": "Steps", "type": "int", "default": 25, "min": 1, "max": 100, "display": "slider" - } - """ - return cls( - name="num_inference_steps", - label="Steps", - type="int", - default=default, - min=1, - max=100, - display="slider", - required_block_params=["num_inference_steps"], - ) - - @classmethod - def num_frames(cls, default: int = 81) -> "MellonParam": - """ - Number of video frames slider. - - Mellon node definition (default=81): - "num_frames": {"label": "Frames", "type": "int", "default": 81, "min": 1, "max": 480, "display": "slider"} - """ - return cls( - name="num_frames", - label="Frames", - type="int", - default=default, - min=1, - max=480, - display="slider", - required_block_params=["num_frames"], - ) - - @classmethod - def layers(cls, default: int = 4) -> "MellonParam": - """ - Number of layers slider (for layered diffusion). - - Mellon node definition (default=4): - "layers": {"label": "Layers", "type": "int", "default": 4, "min": 1, "max": 10, "display": "slider"} - """ - return cls( - name="layers", - label="Layers", - type="int", - default=default, - min=1, - max=10, - display="slider", - required_block_params=["layers"], - ) - - @classmethod - def videos(cls) -> "MellonParam": - """ - Video output parameter. - - Mellon node definition: - "videos": {"label": "Videos", "type": "video", "display": "output"} - """ - return cls(name="videos", label="Videos", type="video", display="output", required_block_params=["videos"]) - - @classmethod - def vae(cls) -> "MellonParam": - """ - VAE model input. - - Mellon node definition: - "vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls( - name="vae", label="VAE", type="diffusers_auto_model", display="input", required_block_params=["vae"] - ) - - @classmethod - def image_encoder(cls) -> "MellonParam": - """ - Image encoder model input. - - Mellon node definition: - "image_encoder": {"label": "Image Encoder", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls( - name="image_encoder", - label="Image Encoder", - type="diffusers_auto_model", - display="input", - required_block_params=["image_encoder"], - ) - - @classmethod - def unet(cls) -> "MellonParam": - """ - Denoising model (UNet/Transformer) input. - - Mellon node definition: - "unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls(name="unet", label="Denoise Model", type="diffusers_auto_model", display="input") - - @classmethod - def scheduler(cls) -> "MellonParam": - """ - Scheduler model input. - - Mellon node definition: - "scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id'. Use - components.get_one(model_id) to retrieve the actual scheduler. - """ - return cls(name="scheduler", label="Scheduler", type="diffusers_auto_model", display="input") - - @classmethod - def controlnet(cls) -> "MellonParam": - """ - ControlNet model input. - - Mellon node definition: - "controlnet": {"label": "ControlNet Model", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls( - name="controlnet", - label="ControlNet Model", - type="diffusers_auto_model", - display="input", - required_block_params=["controlnet"], - ) - - @classmethod - def text_encoders(cls) -> "MellonParam": - """ - Text encoders dict input (multiple encoders). + # ========================================================================= + # Input: Generic input parameter factories (for custom blocks) + # ========================================================================= + class Input: + """Generic input parameter factories for custom blocks.""" + + @classmethod + def image(cls, name: str) -> "MellonParam": + """Generic image input.""" + return MellonParam(name=name, label=_name_to_label(name), type="image", display="input") + + @classmethod + def textbox(cls, name: str, default: str = "") -> "MellonParam": + """Generic text input as textarea.""" + return MellonParam(name=name, label=_name_to_label(name), type="string", display="textarea", default=default) + + @classmethod + def dropdown(cls, name: str, options: List[str] = None, default: str = None) -> "MellonParam": + """Generic dropdown selection.""" + options = options or [] + if default is None and options: + default = options[0] + return MellonParam( + name=name, label=_name_to_label(name), type="string", options=options if options else None, value=default + ) - Mellon node definition: - "text_encoders": {"label": "Text Encoders", "type": "diffusers_auto_models", "display": "input"} + @classmethod + def slider( + cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None + ) -> "MellonParam": + """Generic slider input.""" + is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) + param_type = "float" if is_float else "int" + if min is None: + min = default + if max is None: + max = default + if step is None: + step = 0.01 if is_float else 1 + return MellonParam( + name=name, + label=_name_to_label(name), + type=param_type, + display="slider", + default=default, + min=min, + max=max, + step=step, + ) - Note: The value received is a dict of model info dicts: - { - 'text_encoder': {'model_id': ..., 'execution_device': ..., ...}, 'tokenizer': {'model_id': ..., ...}, - 'repo_id': '...' - } - Use components.get_one(model_id) to retrieve each model. - """ - return cls( - name="text_encoders", - label="Text Encoders", - type="diffusers_auto_models", - display="input", - required_block_params=["text_encoder"], - ) + @classmethod + def number( + cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None + ) -> "MellonParam": + """Generic number input (no slider).""" + is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) + param_type = "float" if is_float else "int" + return MellonParam( + name=name, label=_name_to_label(name), type=param_type, default=default, min=min, max=max, step=step + ) - @classmethod - def controlnet_bundle(cls, display: str = "input") -> "MellonParam": - """ - ControlNet bundle containing model and processed control inputs. Output from ControlNet node, input to Denoise - node. + @classmethod + def seed(cls, name: str = "seed", default: int = 0) -> "MellonParam": + """Generic seed input with randomize button.""" + return MellonParam( + name=name, label=_name_to_label(name), type="int", display="random", default=default, min=0, max=4294967295 + ) - Mellon node definition (display="input"): - "controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "input"} + @classmethod + def checkbox(cls, name: str, default: bool = False) -> "MellonParam": + """Generic boolean checkbox.""" + return MellonParam(name=name, label=_name_to_label(name), type="boolean", default=default) - Mellon node definition (display="output"): - "controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "output"} + @classmethod + def custom_type(cls, name: str, type: str) -> "MellonParam": + """Generic custom type input for node connections.""" + return MellonParam(name=name, label=_name_to_label(name), type=type, display="input") - Note: The value is a dict containing: - { - 'controlnet': {'model_id': ..., ...}, # controlnet model info 'control_image': ..., # processed control - image/embeddings 'controlnet_conditioning_scale': ..., # and other denoise block inputs - } - """ - return cls( - name="controlnet_bundle", - label="ControlNet", - type="custom_controlnet", - display=display, - required_block_params="controlnet_image", - ) + # ========================================================================= + # Output: Generic output parameter factories (for custom blocks) + # ========================================================================= + class Output: + """Generic output parameter factories for custom blocks.""" - @classmethod - def ip_adapter(cls) -> "MellonParam": - """ - IP-Adapter input. + @classmethod + def image(cls, name: str) -> "MellonParam": + """Generic image output.""" + return MellonParam(name=name, label=_name_to_label(name), type="image", display="output") - Mellon node definition: - "ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"} - """ - return cls(name="ip_adapter", label="IP Adapter", type="custom_ip_adapter", display="input") + @classmethod + def video(cls, name: str) -> "MellonParam": + """Generic video output.""" + return MellonParam(name=name, label=_name_to_label(name), type="video", display="output") - @classmethod - def guider(cls) -> "MellonParam": - """ - Custom guider input. When connected, hides the guidance_scale slider. + @classmethod + def text(cls, name: str) -> "MellonParam": + """Generic text output.""" + return MellonParam(name=name, label=_name_to_label(name), type="string", display="output") - Mellon node definition: - "guider": { - "label": "Guider", "type": "custom_guider", "display": "input", "onChange": {false: ["guidance_scale"], - true: []} - } - """ - return cls( - name="guider", - label="Guider", - type="custom_guider", - display="input", - onChange={False: ["guidance_scale"], True: []}, - ) + @classmethod + def custom_type(cls, name: str, type: str) -> "MellonParam": + """Generic custom type output for node connections.""" + return MellonParam(name=name, label=_name_to_label(name), type=type, display="output") - @classmethod - def doc(cls) -> "MellonParam": - """ - Documentation output for inspecting the underlying modular pipeline. - Mellon node definition: - "doc": {"label": "Doc", "type": "string", "display": "output"} - """ - return cls(name="doc", label="Doc", type="string", display="output") DEFAULT_NODE_SPECS = { From c5c732b87b93dff48ba471a6b5ae301b5f228b11 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 02:05:53 +0100 Subject: [PATCH 03/28] add from_custom_block --- .../modular_pipelines/mellon_node_utils.py | 173 ++++++++++++++++-- 1 file changed, 155 insertions(+), 18 deletions(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 5a0ac83444bc..5a80fbc264bf 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -15,6 +15,7 @@ ) from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT +from .modular_pipeline_utils import InputParam, OutputParam logger = logging.getLogger(__name__) @@ -86,10 +87,12 @@ class MellonParamMeta(type): def __getattr__(cls, name: str): if name in MELLON_PARAM_TEMPLATES: - def factory(**overrides): + def factory(default=None, **overrides): template = MELLON_PARAM_TEMPLATES[name] # Use template's name if specified, otherwise use the key params = {"name": template.get("name", name), **template, **overrides} + if default is not None: + params["default"] = default return cls(**params) return factory @@ -145,33 +148,36 @@ def to_dict(self) -> Dict[str, Any]: # Input: Generic input parameter factories (for custom blocks) # ========================================================================= class Input: - """Generic input parameter factories for custom blocks.""" + """input UI elements for custom blocks.""" @classmethod def image(cls, name: str) -> "MellonParam": - """Generic image input.""" + """image input.""" return MellonParam(name=name, label=_name_to_label(name), type="image", display="input") @classmethod def textbox(cls, name: str, default: str = "") -> "MellonParam": - """Generic text input as textarea.""" + """text input as textarea.""" return MellonParam(name=name, label=_name_to_label(name), type="string", display="textarea", default=default) @classmethod def dropdown(cls, name: str, options: List[str] = None, default: str = None) -> "MellonParam": - """Generic dropdown selection.""" - options = options or [] - if default is None and options: + """dropdown selection.""" + if options and not default: default = options[0] + if not default: + default = "" + if not options: + options = [default] return MellonParam( - name=name, label=_name_to_label(name), type="string", options=options if options else None, value=default + name=name, label=_name_to_label(name), type="string", options=options, value=default ) @classmethod def slider( cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None ) -> "MellonParam": - """Generic slider input.""" + """slider input.""" is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) param_type = "float" if is_float else "int" if min is None: @@ -195,7 +201,7 @@ def slider( def number( cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None ) -> "MellonParam": - """Generic number input (no slider).""" + """number input (no slider).""" is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) param_type = "float" if is_float else "int" return MellonParam( @@ -204,49 +210,125 @@ def number( @classmethod def seed(cls, name: str = "seed", default: int = 0) -> "MellonParam": - """Generic seed input with randomize button.""" + """seed input with randomize button.""" return MellonParam( name=name, label=_name_to_label(name), type="int", display="random", default=default, min=0, max=4294967295 ) @classmethod def checkbox(cls, name: str, default: bool = False) -> "MellonParam": - """Generic boolean checkbox.""" + """boolean checkbox.""" return MellonParam(name=name, label=_name_to_label(name), type="boolean", default=default) @classmethod def custom_type(cls, name: str, type: str) -> "MellonParam": - """Generic custom type input for node connections.""" + """custom type input for node connections.""" return MellonParam(name=name, label=_name_to_label(name), type=type, display="input") + @classmethod + def model(cls, name: str) -> "MellonParam": + """model input for diffusers components.""" + return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="input") + # ========================================================================= # Output: Generic output parameter factories (for custom blocks) # ========================================================================= class Output: - """Generic output parameter factories for custom blocks.""" + """output UI elements for custom blocks.""" @classmethod def image(cls, name: str) -> "MellonParam": - """Generic image output.""" + """image output.""" return MellonParam(name=name, label=_name_to_label(name), type="image", display="output") @classmethod def video(cls, name: str) -> "MellonParam": - """Generic video output.""" + """video output.""" return MellonParam(name=name, label=_name_to_label(name), type="video", display="output") @classmethod def text(cls, name: str) -> "MellonParam": - """Generic text output.""" + """text output.""" return MellonParam(name=name, label=_name_to_label(name), type="string", display="output") @classmethod def custom_type(cls, name: str, type: str) -> "MellonParam": - """Generic custom type output for node connections.""" + """custom type output for node connections.""" return MellonParam(name=name, label=_name_to_label(name), type=type, display="output") + @classmethod + def model(cls, name: str) -> "MellonParam": + """model output for diffusers components.""" + return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="output") + +def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: + """ + Convert an InputParam to a MellonParam using metadata. + + Args: + input_param: An InputParam with optional metadata={"mellon": ""} where type is one of: + image, video, text, textbox, checkbox, number, slider, dropdown, seed, model. + If metadata is None or unknown, maps to "custom". + + Returns: + MellonParam instance + """ + name = input_param.name + metadata = input_param.metadata + mellon_type = metadata.get("mellon") if metadata else None + default = input_param.default + + if mellon_type == "image": + return MellonParam.Input.image(name) + elif mellon_type == "textbox": + return MellonParam.Input.textbox(name, default=default or "") + elif mellon_type == "dropdown": + return MellonParam.Input.dropdown(name, default=default or "") + elif mellon_type == "slider": + return MellonParam.Input.slider(name, default=default or 0) + elif mellon_type == "number": + return MellonParam.Input.number(name, default=default or 0) + elif mellon_type == "seed": + return MellonParam.Input.seed(name, default=default or 0) + elif mellon_type == "checkbox": + return MellonParam.Input.checkbox(name, default=default or False) + elif mellon_type == "model": + return MellonParam.Input.model(name) + else: + # None or unknown -> custom + return MellonParam.Input.custom_type(name, type="custom") + + +def output_param_to_mellon_param(output_param: "OutputParam") -> MellonParam: + """ + Convert an OutputParam to a MellonParam using metadata. + + Args: + output_param: An OutputParam with optional metadata={"mellon": ""} where type is one of: + image, video, text, model. + If metadata is None or unknown, maps to "custom". + + Returns: + MellonParam instance + """ + name = output_param.name + metadata = output_param.metadata + mellon_type = metadata.get("mellon") if metadata else None + + if mellon_type == "image": + return MellonParam.Output.image(name) + elif mellon_type == "video": + return MellonParam.Output.video(name) + elif mellon_type == "text": + return MellonParam.Output.text(name) + elif mellon_type == "model": + return MellonParam.Output.model(name) + else: + # None or unknown -> custom + return MellonParam.Output.custom_type(name, type="custom") + DEFAULT_NODE_SPECS = { "controlnet": None, @@ -772,3 +854,58 @@ def filter_spec_for_block(template_spec: Dict[str, Any], block) -> Optional[Dict default_repo=default_repo, default_dtype=default_dtype, ) + + + @classmethod + def from_custom_block(cls, block, node_label: str = None) -> "MellonPipelineConfig": + """ + Create a MellonPipelineConfig from a custom block. + + Args: + block: A block instance with `inputs`, `outputs`, and `expected_components`/`component_names` properties. + Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: + image, video, text, checkbox, number, slider, dropdown, model. + If metadata is None, maps to "custom". + node_label: The display label for the node. Defaults to block class name with spaces. + + Returns: + MellonPipelineConfig instance + """ + if node_label is None: + class_name = block.__class__.__name__ + node_label = "".join([" " + c if c.isupper() else c for c in class_name]).strip() + + inputs = [] + model_inputs = [] + outputs = [] + + # Process block inputs + for input_param in block.inputs: + print(f" processing input: {input_param.name}, metadata: {input_param.metadata}") + inputs.append(input_param_to_mellon_param(input_param)) + + # Process block outputs + for output_param in block.outputs: + outputs.append(output_param_to_mellon_param(output_param)) + + # Process expected components (all map to model inputs) + component_names = block.component_names + for component_name in component_names: + model_inputs.append(MellonParam.Input.model(component_name)) + + # Always add doc output + outputs.append(MellonParam.doc()) + + node_spec = { + "inputs": inputs, + "model_inputs": model_inputs, + "outputs": outputs, + "required_inputs": [], + "required_model_inputs": [], + "block_name": "custom", + } + + return cls( + node_specs={"custom": node_spec}, + label=node_label, + ) From ffc5708b780266ba6e246b37ed113dcd73521d91 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 02:07:56 +0100 Subject: [PATCH 04/28] style --- .../modular_pipelines/mellon_node_utils.py | 280 +++++++++++++----- 1 file changed, 213 insertions(+), 67 deletions(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 5a80fbc264bf..26620a55acc8 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -20,6 +20,7 @@ logger = logging.getLogger(__name__) + def _name_to_label(name: str) -> str: """Convert snake_case name to Title Case label.""" return name.replace("_", " ").title() @@ -30,15 +31,30 @@ def _name_to_label(name: str) -> str: # Image I/O "image": {"label": "Image", "type": "image", "display": "input", "required_block_params": ["image"]}, "images": {"label": "Images", "type": "image", "display": "output", "required_block_params": ["images"]}, - "control_image": {"label": "Control Image", "type": "image", "display": "input", "required_block_params": ["control_image"]}, + "control_image": { + "label": "Control Image", + "type": "image", + "display": "input", + "required_block_params": ["control_image"], + }, # Latents "latents": {"label": "Latents", "type": "latents", "display": "input", "required_block_params": ["latents"]}, - "image_latents": {"label": "Image Latents", "type": "latents", "display": "input", "required_block_params": ["image_latents"]}, - "first_frame_latents": {"label": "First Frame Latents", "type": "latents", "display": "input", "required_block_params": ["first_frame_latents"]}, + "image_latents": { + "label": "Image Latents", + "type": "latents", + "display": "input", + "required_block_params": ["image_latents"], + }, + "first_frame_latents": { + "label": "First Frame Latents", + "type": "latents", + "display": "input", + "required_block_params": ["first_frame_latents"], + }, "latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"}, # Image Latents with Strength "image_latents_with_strength": { - "name": "image_latents", # name is not same as template key + "name": "image_latents", # name is not same as template key "label": "Image Latents", "type": "latents", "display": "input", @@ -47,36 +63,164 @@ def _name_to_label(name: str) -> str: }, # Embeddings "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"}, - "image_embeds": {"label": "Image Embeddings", "type": "image_embeds", "display": "output", "required_block_params": ["image_embeds"]}, + "image_embeds": { + "label": "Image Embeddings", + "type": "image_embeds", + "display": "output", + "required_block_params": ["image_embeds"], + }, # Text inputs - "prompt": {"label": "Prompt", "type": "string", "display": "textarea", "default": "", "required_block_params": ["prompt"]}, - "negative_prompt": {"label": "Negative Prompt", "type": "string", "display": "textarea", "default": "", "required_block_params": ["negative_prompt"]}, + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "", + "required_block_params": ["prompt"], + }, + "negative_prompt": { + "label": "Negative Prompt", + "type": "string", + "display": "textarea", + "default": "", + "required_block_params": ["negative_prompt"], + }, # Numeric params - "guidance_scale": {"label": "Guidance Scale", "type": "float", "display": "slider", "default": 5.0, "min": 1.0, "max": 30.0, "step": 0.1}, - "strength": {"label": "Strength", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["strength"]}, - "height": {"label": "Height", "type": "int", "default": 1024, "min": 64, "step": 8, "required_block_params": ["height"]}, - "width": {"label": "Width", "type": "int", "default": 1024, "min": 64, "step": 8, "required_block_params": ["width"]}, - "seed": {"label": "Seed", "type": "int", "default": 0, "min": 0, "max": 4294967295, "display": "random", "required_block_params": ["generator"]}, - "num_inference_steps": {"label": "Steps", "type": "int", "default": 25, "min": 1, "max": 100, "display": "slider", "required_block_params": ["num_inference_steps"]}, - "num_frames": {"label": "Frames", "type": "int", "default": 81, "min": 1, "max": 480, "display": "slider", "required_block_params": ["num_frames"]}, - "layers": {"label": "Layers", "type": "int", "default": 4, "min": 1, "max": 10, "display": "slider", "required_block_params": ["layers"]}, + "guidance_scale": { + "label": "Guidance Scale", + "type": "float", + "display": "slider", + "default": 5.0, + "min": 1.0, + "max": 30.0, + "step": 0.1, + }, + "strength": { + "label": "Strength", + "type": "float", + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["strength"], + }, + "height": { + "label": "Height", + "type": "int", + "default": 1024, + "min": 64, + "step": 8, + "required_block_params": ["height"], + }, + "width": { + "label": "Width", + "type": "int", + "default": 1024, + "min": 64, + "step": 8, + "required_block_params": ["width"], + }, + "seed": { + "label": "Seed", + "type": "int", + "default": 0, + "min": 0, + "max": 4294967295, + "display": "random", + "required_block_params": ["generator"], + }, + "num_inference_steps": { + "label": "Steps", + "type": "int", + "default": 25, + "min": 1, + "max": 100, + "display": "slider", + "required_block_params": ["num_inference_steps"], + }, + "num_frames": { + "label": "Frames", + "type": "int", + "default": 81, + "min": 1, + "max": 480, + "display": "slider", + "required_block_params": ["num_frames"], + }, + "layers": { + "label": "Layers", + "type": "int", + "default": 4, + "min": 1, + "max": 10, + "display": "slider", + "required_block_params": ["layers"], + }, # ControlNet - "controlnet_conditioning_scale": {"label": "Controlnet Conditioning Scale", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["controlnet_conditioning_scale"]}, - "control_guidance_start": {"label": "Control Guidance Start", "type": "float", "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["control_guidance_start"]}, - "control_guidance_end": {"label": "Control Guidance End", "type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "required_block_params": ["control_guidance_end"]}, + "controlnet_conditioning_scale": { + "label": "Controlnet Conditioning Scale", + "type": "float", + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["controlnet_conditioning_scale"], + }, + "control_guidance_start": { + "label": "Control Guidance Start", + "type": "float", + "default": 0.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["control_guidance_start"], + }, + "control_guidance_end": { + "label": "Control Guidance End", + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["control_guidance_end"], + }, # Video "videos": {"label": "Videos", "type": "video", "display": "output", "required_block_params": ["videos"]}, # Models "vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["vae"]}, - "image_encoder": {"label": "Image Encoder", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["image_encoder"]}, + "image_encoder": { + "label": "Image Encoder", + "type": "diffusers_auto_model", + "display": "input", + "required_block_params": ["image_encoder"], + }, "unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"}, "scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"}, - "controlnet": {"label": "ControlNet Model", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["controlnet"]}, - "text_encoders": {"label": "Text Encoders", "type": "diffusers_auto_models", "display": "input", "required_block_params": ["text_encoder"]}, + "controlnet": { + "label": "ControlNet Model", + "type": "diffusers_auto_model", + "display": "input", + "required_block_params": ["controlnet"], + }, + "text_encoders": { + "label": "Text Encoders", + "type": "diffusers_auto_models", + "display": "input", + "required_block_params": ["text_encoder"], + }, # Bundles/Custom - "controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "input", "required_block_params": "controlnet_image"}, + "controlnet_bundle": { + "label": "ControlNet", + "type": "custom_controlnet", + "display": "input", + "required_block_params": "controlnet_image", + }, "ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"}, - "guider": {"label": "Guider", "type": "custom_guider", "display": "input", "onChange": {False: ["guidance_scale"], True: []}}, + "guider": { + "label": "Guider", + "type": "custom_guider", + "display": "input", + "onChange": {False: ["guidance_scale"], True: []}, + }, "doc": {"label": "Doc", "type": "string", "display": "output"}, } @@ -103,25 +247,25 @@ def factory(default=None, **overrides): @dataclass(frozen=True) class MellonParam(metaclass=MellonParamMeta): """ - Parameter definition for Mellon nodes. + Parameter definition for Mellon nodes. - Usage: -```python - # From template (standard diffuser params) - MellonParam.seed() - MellonParam.prompt(default="a cat") - MellonParam.latents(display="output") + Usage: + ```python + # From template (standard diffuser params) + MellonParam.seed() + MellonParam.prompt(default="a cat") + MellonParam.latents(display="output") - # Generic inputs (for custom blocks) - MellonParam.Input.slider("my_scale", default=1.0, min=0.0, max=2.0) - MellonParam.Input.dropdown("mode", options=["fast", "slow"]) + # Generic inputs (for custom blocks) + MellonParam.Input.slider("my_scale", default=1.0, min=0.0, max=2.0) + MellonParam.Input.dropdown("mode", options=["fast", "slow"]) - # Generic outputs - MellonParam.Output.image("result_images") + # Generic outputs + MellonParam.Output.image("result_images") - # Fully custom - MellonParam(name="custom", label="Custom", type="float", default=0.5) -``` + # Fully custom + MellonParam(name="custom", label="Custom", type="float", default=0.5) + ``` """ name: str @@ -158,7 +302,9 @@ def image(cls, name: str) -> "MellonParam": @classmethod def textbox(cls, name: str, default: str = "") -> "MellonParam": """text input as textarea.""" - return MellonParam(name=name, label=_name_to_label(name), type="string", display="textarea", default=default) + return MellonParam( + name=name, label=_name_to_label(name), type="string", display="textarea", default=default + ) @classmethod def dropdown(cls, name: str, options: List[str] = None, default: str = None) -> "MellonParam": @@ -169,9 +315,7 @@ def dropdown(cls, name: str, options: List[str] = None, default: str = None) -> default = "" if not options: options = [default] - return MellonParam( - name=name, label=_name_to_label(name), type="string", options=options, value=default - ) + return MellonParam(name=name, label=_name_to_label(name), type="string", options=options, value=default) @classmethod def slider( @@ -212,7 +356,13 @@ def number( def seed(cls, name: str = "seed", default: int = 0) -> "MellonParam": """seed input with randomize button.""" return MellonParam( - name=name, label=_name_to_label(name), type="int", display="random", default=default, min=0, max=4294967295 + name=name, + label=_name_to_label(name), + type="int", + display="random", + default=default, + min=0, + max=4294967295, ) @classmethod @@ -262,16 +412,15 @@ def model(cls, name: str) -> "MellonParam": return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="output") - def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: """ Convert an InputParam to a MellonParam using metadata. - + Args: input_param: An InputParam with optional metadata={"mellon": ""} where type is one of: - image, video, text, textbox, checkbox, number, slider, dropdown, seed, model. - If metadata is None or unknown, maps to "custom". - + image, video, text, textbox, checkbox, number, slider, dropdown, seed, model. If metadata is None or + unknown, maps to "custom". + Returns: MellonParam instance """ @@ -279,7 +428,7 @@ def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: metadata = input_param.metadata mellon_type = metadata.get("mellon") if metadata else None default = input_param.default - + if mellon_type == "image": return MellonParam.Input.image(name) elif mellon_type == "textbox": @@ -304,19 +453,18 @@ def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: def output_param_to_mellon_param(output_param: "OutputParam") -> MellonParam: """ Convert an OutputParam to a MellonParam using metadata. - + Args: output_param: An OutputParam with optional metadata={"mellon": ""} where type is one of: - image, video, text, model. - If metadata is None or unknown, maps to "custom". - + image, video, text, model. If metadata is None or unknown, maps to "custom". + Returns: MellonParam instance """ name = output_param.name metadata = output_param.metadata mellon_type = metadata.get("mellon") if metadata else None - + if mellon_type == "image": return MellonParam.Output.image(name) elif mellon_type == "video": @@ -855,47 +1003,45 @@ def filter_spec_for_block(template_spec: Dict[str, Any], block) -> Optional[Dict default_dtype=default_dtype, ) - @classmethod def from_custom_block(cls, block, node_label: str = None) -> "MellonPipelineConfig": """ Create a MellonPipelineConfig from a custom block. - + Args: block: A block instance with `inputs`, `outputs`, and `expected_components`/`component_names` properties. - Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: - image, video, text, checkbox, number, slider, dropdown, model. - If metadata is None, maps to "custom". + Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: image, + video, text, checkbox, number, slider, dropdown, model. If metadata is None, maps to "custom". node_label: The display label for the node. Defaults to block class name with spaces. - + Returns: MellonPipelineConfig instance """ if node_label is None: class_name = block.__class__.__name__ node_label = "".join([" " + c if c.isupper() else c for c in class_name]).strip() - + inputs = [] model_inputs = [] outputs = [] - + # Process block inputs for input_param in block.inputs: print(f" processing input: {input_param.name}, metadata: {input_param.metadata}") inputs.append(input_param_to_mellon_param(input_param)) - + # Process block outputs for output_param in block.outputs: outputs.append(output_param_to_mellon_param(output_param)) - + # Process expected components (all map to model inputs) component_names = block.component_names for component_name in component_names: model_inputs.append(MellonParam.Input.model(component_name)) - + # Always add doc output outputs.append(MellonParam.doc()) - + node_spec = { "inputs": inputs, "model_inputs": model_inputs, @@ -904,7 +1050,7 @@ def from_custom_block(cls, block, node_label: str = None) -> "MellonPipelineConf "required_model_inputs": [], "block_name": "custom", } - + return cls( node_specs={"custom": node_spec}, label=node_label, From 5ad83903f9a23b22d0c17d97cb5366e586737b02 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 03:45:11 +0100 Subject: [PATCH 05/28] up up fix --- .../modular_pipelines/mellon_node_utils.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 26620a55acc8..ce7109c2718e 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -6,7 +6,7 @@ from dataclasses import asdict, dataclass from typing import Any, Dict, List, Optional, Union -from huggingface_hub import create_repo, hf_hub_download, upload_folder +from huggingface_hub import create_repo, hf_hub_download, upload_file from huggingface_hub.utils import ( EntryNotFoundError, HfHubHTTPError, @@ -656,10 +656,15 @@ def node_spec_to_mellon_dict(node_spec: Dict[str, Any], node_type: str) -> Dict[ params[p.name] = param_dict model_input_names.append(p.name) - # Process outputs + # Process outputs: add a prefix to the output name if it already exists as an input for p in node_spec.get("outputs", []): - params[p.name] = p.to_dict() - output_names.append(p.name) + if p.name in input_names: + # rename to out_ + output_name = f"out_{p.name}" + else: + output_name = p.name + params[output_name] = p.to_dict() + output_names.append(output_name) return { "params": params, @@ -811,7 +816,7 @@ def from_json_file(cls, json_file_path: Union[str, os.PathLike]) -> "MellonPipel return cls.from_dict(data) def save(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): - """Save the pipeline config to a directory.""" + """Save the mellon pipeline config to a directory.""" if os.path.isfile(save_directory): raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") @@ -821,21 +826,21 @@ def save(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = Fals logger.info(f"Pipeline config saved to {output_path}") if push_to_hub: + commit_message = kwargs.pop("commit_message", None) private = kwargs.pop("private", None) create_pr = kwargs.pop("create_pr", False) token = kwargs.pop("token", None) repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id - subfolder = kwargs.pop("subfolder", None) - upload_folder( + upload_file( + path_or_fileobj=output_path, + path_in_repo=self.config_name, repo_id=repo_id, - folder_path=save_directory, token=token, commit_message=commit_message or "Upload MellonPipelineConfig", create_pr=create_pr, - path_in_repo=subfolder, ) logger.info(f"Pipeline config pushed to hub: {repo_id}") From 29c5741c2a5080708ff07ff4b15fc3f7d9449b9c Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 05:20:56 +0100 Subject: [PATCH 06/28] add mellon guide --- docs/source/en/modular_diffusers/mellon.md | 218 +++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 docs/source/en/modular_diffusers/mellon.md diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md new file mode 100644 index 000000000000..597a327df8d5 --- /dev/null +++ b/docs/source/en/modular_diffusers/mellon.md @@ -0,0 +1,218 @@ + + + +## Using Custom Blocks with Mellon + +[Mellon](https://github.com/cubiq/Mellon) is a visual workflow interface (similar to ComfyUI) that integrates with Modular Diffusers. This guide shows how to add Mellon support to your custom blocks so they can be used in the Mellon UI. + +## Overview + +To use a custom block in Mellon, you need to: + +1. **Add `metadata` field to your block's `InputParam` and `OutputParam` definitions** - This is a simple string that tells Mellon what UI component to use (e.g., `"textbox"`, `"dropdown"`, `"image"`) +2. **Generate `mellon_pipeline_config.json`** - We provide a utility to generate a default template and push it to your Hub repository +3. **(Optional) Manually adjust the template** - Adjust the generated config for your specific needs + +## Step 1: Add Metadata to Input/Output Parameters + +The `metadata` field tells Mellon how to render each parameter in the UI. Add `metadata={"mellon": ""}` to your `InputParam` and `OutputParam` definitions. + +> [!NOTE] +> If you don't specify metadata for a parameter, it will default to `"custom"` type, which renders as a simple connection dot in the UI. You can always adjust this later in the generated config. + +### Supported Mellon Types + +| Type | Input/Output | Description | +|------|--------------|-------------| +| `image` | Both | Image (PIL Image) | +| `video` | Both | Video | +| `text` | Both | Text display | +| `textbox` | Input | Text input | +| `dropdown` | Input | Dropdown selection menu | +| `slider` | Input | Slider for numeric values | +| `number` | Input | Numeric input | +| `checkbox` | Input | Boolean toggle | + +### Example: Adding Metadata + +Here's an example using the [Gemini Prompt Expander](https://huggingface.co/diffusers-internal-dev/gemini-prompt-expander) block: + +```python +class GeminiPromptExpander(ModularPipelineBlocks): + + @property + def inputs(self) -> List[InputParam]: + return [ + InputParam( + "prompt", + type_hint=str, + required=True, + description="Prompt to use", + metadata={"mellon": "textbox"}, # Text input + ) + ] + + @property + def intermediate_outputs(self) -> List[OutputParam]: + return [ + OutputParam( + "prompt", + type_hint=str, + description="Expanded prompt by the LLM", + metadata={"mellon": "text"}, # Text output + ), + OutputParam( + "old_prompt", + type_hint=str, + description="Old prompt provided by the user", + # No metadata - we don't want to render this in UI + ) + ] +``` + + +## Step 2: Generate and Push the Mellon Config + +After adding metadata to your block, generate the default Mellon configuration template and push it to the Hub: + +```python +from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig + +# Generate the default config template (assume you already loaded your custom blocks into `blocks`) +mellon_config = MellonPipelineConfig.from_custom_block(blocks) +# push the default template to `repo_id`, you will also need to provide a local_dir for it to save it locally +mellon_config.save( + local_dir="/path/local/folder", + repo_id= repo_id, + push_to_hub=True +) +``` + +This creates a `mellon_pipeline_config.json` file in your repository. + +## Step 3: Review and Adjust the Config (Optional) + +The generated template is a starting point - you may want to adjust it for your needs. Let's walk through the generated config for the Gemini Prompt Expander: + +```json +{ + "label": "Gemini Prompt Expander", + "default_repo": "", + "default_dtype": "", + "node_params": { + "custom": { + "params": { + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "" + }, + "out_prompt": { + "label": "Prompt", + "type": "string", + "display": "output" + }, + "old_prompt": { + "label": "Old Prompt", + "type": "custom", + "display": "output" + }, + "doc": { + "label": "Doc", + "type": "string", + "display": "output" + } + }, + "input_names": ["prompt"], + "model_input_names": [], + "output_names": ["out_prompt", "old_prompt", "doc"], + "block_name": "custom", + "node_type": "custom" + } + } +} +``` + +### Understanding the Structure + +The `params` dict defines how each UI element renders. The `input_names`, `model_input_names`, and `output_names` lists map these UI elements to the underlying [`ModularPipelineBlocks`]'s I/O interface: + +| Mellon Config | ModularPipelineBlocks | +|---------------|----------------------| +| `input_names` | `inputs` property | +| `model_input_names` | `expected_components` property | +| `output_names` | `intermediate_outputs` property | + +In this example: `prompt` is the only input, there are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`. + +Now let's look at the `params` dict: + +**`prompt`** is an input parameter. It has `display: "textarea"` which renders as a text input box, `label: "Prompt"` shown in the UI, and `default: ""` so it starts empty. The `type: "string"` field is important in Mellon because it determines which nodes can connect together - only matching types can be linked with "noodles". + +**`out_prompt`** is the expanded prompt output. The `out_` prefix was automatically added because the input and output share the same name (`prompt`), avoiding naming conflicts in the config. It has `display: "output"` which renders as an output socket. + +**`old_prompt`** has `type: "custom"` because we didn't specify metadata. This renders as a simple dot in the UI. Since we don't actually want to expose this in the UI, we can remove it. + +**`doc`** is the documentation output, automatically added to all custom blocks. + +### Making Adjustments + +For the Gemini Prompt Expander, we don't need `old_prompt` in the UI. Remove it from both `params` and `output_names`: + +```json +{ + "label": "Gemini Prompt Expander", + "default_repo": "", + "default_dtype": "", + "node_params": { + "custom": { + "params": { + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "" + }, + "out_prompt": { + "label": "Prompt", + "type": "string", + "display": "output" + }, + "doc": { + "label": "Doc", + "type": "string", + "display": "output" + } + }, + "input_names": ["prompt"], + "model_input_names": [], + "output_names": ["out_prompt", "doc"], + "block_name": "custom", + "node_type": "custom" + } + } +} +``` + +See the final config at [YiYiXu/gemini-prompt-expander](https://huggingface.co/YiYiXu/gemini-prompt-expander). + +## Use in Mellon + +1. Start Mellon (see [Mellon installation guide](https://github.com/cubiq/Mellon)) + +2. In Mellon: + - Drag a **Dynamic Block Node** from the ModularDiffusers section + - Enter your `repo_id` (e.g., `YiYiXu/gemini-prompt-expander`) + - Click **Load Custom Block** + - The node will transform to show your block's inputs and outputs \ No newline at end of file From 26f59f1aa95e1093b150738543698b99ca8335c5 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 05:21:47 +0100 Subject: [PATCH 07/28] add to toctree --- docs/source/en/_toctree.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 46e241d817b5..a95949f33a39 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -114,6 +114,8 @@ title: Guiders - local: modular_diffusers/custom_blocks title: Building Custom Blocks + - local: modular_diffusers/mellon + title: Mellon Guide title: Modular Diffusers - isExpanded: false sections: From a71d86b9ae693d2d224d714d5249f3a4674425ee Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 05:30:24 +0100 Subject: [PATCH 08/28] style --- src/diffusers/modular_pipelines/mellon_node_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index ce7109c2718e..907b0cb9f46b 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -826,7 +826,6 @@ def save(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = Fals logger.info(f"Pipeline config saved to {output_path}") if push_to_hub: - commit_message = kwargs.pop("commit_message", None) private = kwargs.pop("private", None) create_pr = kwargs.pop("create_pr", False) From 48160f6f5e8fed6095f5f4353db7a55638f36025 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 21:26:11 +0100 Subject: [PATCH 09/28] add mellon_types --- .../modular_pipelines/mellon_node_utils.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 907b0cb9f46b..b23ece725903 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -1,6 +1,7 @@ import json import logging import os +import copy # Simple typed wrapper for parameter overrides from dataclasses import asdict, dataclass @@ -1008,7 +1009,12 @@ def filter_spec_for_block(template_spec: Dict[str, Any], block) -> Optional[Dict ) @classmethod - def from_custom_block(cls, block, node_label: str = None) -> "MellonPipelineConfig": + def from_custom_block( + cls, + block, + node_label: str = None, + mellon_types: Optional[Dict[str, str]] = None, + ) -> "MellonPipelineConfig": """ Create a MellonPipelineConfig from a custom block. @@ -1017,6 +1023,8 @@ def from_custom_block(cls, block, node_label: str = None) -> "MellonPipelineConf Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: image, video, text, checkbox, number, slider, dropdown, model. If metadata is None, maps to "custom". node_label: The display label for the node. Defaults to block class name with spaces. + mellon_types: Optional dict mapping param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "textbox", "image": "image", "out_prompt": "text"} Returns: MellonPipelineConfig instance @@ -1025,17 +1033,26 @@ def from_custom_block(cls, block, node_label: str = None) -> "MellonPipelineConf class_name = block.__class__.__name__ node_label = "".join([" " + c if c.isupper() else c for c in class_name]).strip() + if mellon_types is None: + mellon_types = {} + inputs = [] model_inputs = [] outputs = [] # Process block inputs for input_param in block.inputs: + if input_param.name in mellon_types: + input_param = copy.copy(input_param) + input_param.metadata = {"mellon": mellon_types[input_param.name]} print(f" processing input: {input_param.name}, metadata: {input_param.metadata}") inputs.append(input_param_to_mellon_param(input_param)) # Process block outputs for output_param in block.outputs: + if output_param.name in mellon_types: + output_param = copy.copy(output_param) + output_param.metadata = {"mellon": mellon_types[output_param.name]} outputs.append(output_param_to_mellon_param(output_param)) # Process expected components (all map to model inputs) From 3fe2711691ee19c7f448891baea2f05ba9da1d0f Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 29 Jan 2026 21:26:49 +0100 Subject: [PATCH 10/28] style --- src/diffusers/modular_pipelines/mellon_node_utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index b23ece725903..34808accf9c3 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -1,7 +1,7 @@ +import copy import json import logging import os -import copy # Simple typed wrapper for parameter overrides from dataclasses import asdict, dataclass @@ -1010,8 +1010,8 @@ def filter_spec_for_block(template_spec: Dict[str, Any], block) -> Optional[Dict @classmethod def from_custom_block( - cls, - block, + cls, + block, node_label: str = None, mellon_types: Optional[Dict[str, str]] = None, ) -> "MellonPipelineConfig": @@ -1023,8 +1023,9 @@ def from_custom_block( Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: image, video, text, checkbox, number, slider, dropdown, model. If metadata is None, maps to "custom". node_label: The display label for the node. Defaults to block class name with spaces. - mellon_types: Optional dict mapping param names to mellon types. Overrides the block's metadata if provided. - Example: {"prompt": "textbox", "image": "image", "out_prompt": "text"} + mellon_types: + Optional dict mapping param names to mellon types. Overrides the block's metadata if provided. Example: + {"prompt": "textbox", "image": "image", "out_prompt": "text"} Returns: MellonPipelineConfig instance From d4f2a8979fd0c449c0a06f32497d5c59619bd198 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Fri, 30 Jan 2026 02:10:19 +0100 Subject: [PATCH 11/28] mellon_type -> inpnt_types + output_types --- .../modular_pipelines/mellon_node_utils.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 34808accf9c3..2db2d207156e 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -1013,7 +1013,8 @@ def from_custom_block( cls, block, node_label: str = None, - mellon_types: Optional[Dict[str, str]] = None, + input_types: Optional[Dict[str, str]] = None, + output_types: Optional[Dict[str, str]] = None, ) -> "MellonPipelineConfig": """ Create a MellonPipelineConfig from a custom block. @@ -1023,9 +1024,12 @@ def from_custom_block( Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: image, video, text, checkbox, number, slider, dropdown, model. If metadata is None, maps to "custom". node_label: The display label for the node. Defaults to block class name with spaces. - mellon_types: - Optional dict mapping param names to mellon types. Overrides the block's metadata if provided. Example: - {"prompt": "textbox", "image": "image", "out_prompt": "text"} + input_types: + Optional dict mapping input param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "textbox", "image": "image"} + output_types: + Optional dict mapping output param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "text", "images": "image"} Returns: MellonPipelineConfig instance @@ -1034,8 +1038,10 @@ def from_custom_block( class_name = block.__class__.__name__ node_label = "".join([" " + c if c.isupper() else c for c in class_name]).strip() - if mellon_types is None: - mellon_types = {} + if input_types is None: + input_types = {} + if output_types is None: + output_types = {} inputs = [] model_inputs = [] @@ -1043,17 +1049,17 @@ def from_custom_block( # Process block inputs for input_param in block.inputs: - if input_param.name in mellon_types: + if input_param.name in input_types: input_param = copy.copy(input_param) - input_param.metadata = {"mellon": mellon_types[input_param.name]} + input_param.metadata = {"mellon": input_types[input_param.name]} print(f" processing input: {input_param.name}, metadata: {input_param.metadata}") inputs.append(input_param_to_mellon_param(input_param)) # Process block outputs for output_param in block.outputs: - if output_param.name in mellon_types: + if output_param.name in output_types: output_param = copy.copy(output_param) - output_param.metadata = {"mellon": mellon_types[output_param.name]} + output_param.metadata = {"mellon": output_types[output_param.name]} outputs.append(output_param_to_mellon_param(output_param)) # Process expected components (all map to model inputs) From 46a713a6fa7113cabbe4807b96b4b0f613b8c7cf Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Fri, 30 Jan 2026 02:10:31 +0100 Subject: [PATCH 12/28] update doc --- docs/source/en/modular_diffusers/mellon.md | 43 +++++++++++++++------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index 597a327df8d5..c684f86df94e 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -17,18 +17,15 @@ specific language governing permissions and limitations under the License. ## Overview -To use a custom block in Mellon, you need to: +To use a custom block in Mellon, you need a `mellon_pipeline_config.json` file that defines how your block's parameters map to Mellon UI components. Here's how to create one: -1. **Add `metadata` field to your block's `InputParam` and `OutputParam` definitions** - This is a simple string that tells Mellon what UI component to use (e.g., `"textbox"`, `"dropdown"`, `"image"`) -2. **Generate `mellon_pipeline_config.json`** - We provide a utility to generate a default template and push it to your Hub repository -3. **(Optional) Manually adjust the template** - Adjust the generated config for your specific needs +1. **Add a "Mellon type" to your block's parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). You can specify types via metadata in your block definitions, or pass them when generating the config. +2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a default template and push it to your Hub repository +3. **(Optional) Manually adjust the template** - Fine-tune the generated config for your specific needs -## Step 1: Add Metadata to Input/Output Parameters +## Step 1: Specify Mellon Types for Parameters -The `metadata` field tells Mellon how to render each parameter in the UI. Add `metadata={"mellon": ""}` to your `InputParam` and `OutputParam` definitions. - -> [!NOTE] -> If you don't specify metadata for a parameter, it will default to `"custom"` type, which renders as a simple connection dot in the UI. You can always adjust this later in the generated config. +Mellon types determine how each parameter renders in the UI. If you don't specify a type for a parameter, it will default to `"custom"`, which renders as a simple connection dot. You can always adjust this later in the generated config. ### Supported Mellon Types @@ -43,10 +40,9 @@ The `metadata` field tells Mellon how to render each parameter in the UI. Add `m | `number` | Input | Numeric input | | `checkbox` | Input | Boolean toggle | -### Example: Adding Metadata - -Here's an example using the [Gemini Prompt Expander](https://huggingface.co/diffusers-internal-dev/gemini-prompt-expander) block: +### Method 1: Using `metadata` in Block Definitions +If you're defining a custom block from scratch, you can add `metadata={"mellon": ""}` directly to your `InputParam` and `OutputParam` definitions: ```python class GeminiPromptExpander(ModularPipelineBlocks): @@ -80,17 +76,36 @@ class GeminiPromptExpander(ModularPipelineBlocks): ] ``` +### Method 2: Using `input_types` and `output_types` When Generating Config + +If you're working with an existing pipeline or prefer to keep your block definitions clean, you can specify types when generating the config using the `input_types/output_types` argument: +```python +from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig + +mellon_config = MellonPipelineConfig.from_custom_block( + blocks, + input_types={"prompt": "textbox"}, + output_types={"prompt": "text"} +) +``` + +> [!NOTE] +> If you specify both `metadata` and `input_types`/`output_types`, the arguments take precedence, allowing you to override metadata when needed. ## Step 2: Generate and Push the Mellon Config After adding metadata to your block, generate the default Mellon configuration template and push it to the Hub: ```python +from diffusers import ModularPipelineBlocks from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig -# Generate the default config template (assume you already loaded your custom blocks into `blocks`) +# load your custom blocks from your local dir +blocks = ModularPipelineBlocks.from_pretrained("/path/local/folder", trust_remote_code=True) + +# Generate the default config template mellon_config = MellonPipelineConfig.from_custom_block(blocks) -# push the default template to `repo_id`, you will also need to provide a local_dir for it to save it locally +# push the default template to `repo_id`, you will need to pass the same local folder path so that it will save the config locally first mellon_config.save( local_dir="/path/local/folder", repo_id= repo_id, From 8c5b119e52e6a74a4adfbdf2d076957240f33e4a Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Fri, 30 Jan 2026 10:05:05 +0100 Subject: [PATCH 13/28] add quant info to components manager --- .../modular_pipelines/components_manager.py | 20 ++++++++++++++++--- .../modular_pipelines/mellon_node_utils.py | 4 ++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index e16abb382313..d4c6b903b842 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -324,6 +324,7 @@ class ComponentsManager: "has_hook", "execution_device", "ip_adapter", + "quantization", ] def __init__(self): @@ -356,7 +357,9 @@ def _lookup_ids( ids_by_name.add(component_id) else: ids_by_name = set(components.keys()) - if collection: + if collection and collection not in self.collections: + return set() + elif collection and collection in self.collections: ids_by_collection = set() for component_id, component in components.items(): if component_id in self.collections[collection]: @@ -760,7 +763,6 @@ def disable_auto_cpu_offload(self): self.model_hooks = None self._auto_offload_enabled = False - # YiYi TODO: (1) add quantization info def get_model_info( self, component_id: str, @@ -836,6 +838,16 @@ def get_model_info( if scales: info["ip_adapter"] = summarize_dict_by_value_and_parts(scales) + # Check for quantization + hf_quantizer = getattr(component, "hf_quantizer", None) + if hf_quantizer is not None: + quant_method = hf_quantizer.quantization_config.quant_method + if hasattr(quant_method, "value"): + quant_method = quant_method.value + info["quantization"] = quant_method + else: + info["quantization"] = None + # If fields specified, filter info if fields is not None: return {k: v for k, v in info.items() if k in fields} @@ -966,12 +978,14 @@ def format_device(component, info): output += "\nAdditional Component Info:\n" + "=" * 50 + "\n" for name in self.components: info = self.get_model_info(name) - if info is not None and (info.get("adapters") is not None or info.get("ip_adapter")): + if info is not None and (info.get("adapters") is not None or info.get("ip_adapter") or info.get("quantization")): output += f"\n{name}:\n" if info.get("adapters") is not None: output += f" Adapters: {info['adapters']}\n" if info.get("ip_adapter"): output += " IP-Adapter: Enabled\n" + if info.get("quantization"): + output += f" Quantization: {info['quantization']}\n" return output diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 2db2d207156e..0a4c30cc5c88 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -1049,6 +1049,8 @@ def from_custom_block( # Process block inputs for input_param in block.inputs: + if input_param.name is None: + continue if input_param.name in input_types: input_param = copy.copy(input_param) input_param.metadata = {"mellon": input_types[input_param.name]} @@ -1057,6 +1059,8 @@ def from_custom_block( # Process block outputs for output_param in block.outputs: + if output_param.name is None: + continue if output_param.name in output_types: output_param = copy.copy(output_param) output_param.metadata = {"mellon": output_types[output_param.name]} From 3985c43031c9052aab66be7cce23c2e0c93c13ac Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Fri, 30 Jan 2026 13:09:46 +0100 Subject: [PATCH 14/28] fix more --- src/diffusers/modular_pipelines/components_manager.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index d4c6b903b842..acc50ccce82c 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -841,10 +841,8 @@ def get_model_info( # Check for quantization hf_quantizer = getattr(component, "hf_quantizer", None) if hf_quantizer is not None: - quant_method = hf_quantizer.quantization_config.quant_method - if hasattr(quant_method, "value"): - quant_method = quant_method.value - info["quantization"] = quant_method + quant_config = hf_quantizer.quantization_config + info["quantization"] = quant_config.to_dict() else: info["quantization"] = None From 391c410368c8b84f6ab70b0776dffb37f41f5851 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 31 Jan 2026 02:19:27 +0100 Subject: [PATCH 15/28] up up --- src/diffusers/modular_pipelines/components_manager.py | 5 ++++- src/diffusers/modular_pipelines/mellon_node_utils.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index acc50ccce82c..0a23ecc8bca1 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -842,7 +842,10 @@ def get_model_info( hf_quantizer = getattr(component, "hf_quantizer", None) if hf_quantizer is not None: quant_config = hf_quantizer.quantization_config - info["quantization"] = quant_config.to_dict() + if hasattr(quant_config, "to_diff_dict"): + info["quantization"] = quant_config.to_diff_dict() + else: + info["quantization"] = quant_config.to_dict() else: info["quantization"] = None diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 0a4c30cc5c88..13acec36924e 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -369,7 +369,7 @@ def seed(cls, name: str = "seed", default: int = 0) -> "MellonParam": @classmethod def checkbox(cls, name: str, default: bool = False) -> "MellonParam": """boolean checkbox.""" - return MellonParam(name=name, label=_name_to_label(name), type="boolean", default=default) + return MellonParam(name=name, label=_name_to_label(name), type="boolean", value=default) @classmethod def custom_type(cls, name: str, type: str) -> "MellonParam": From e3a4cc5730580f2ba124d51e9061b1b77d10258d Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 31 Jan 2026 03:53:29 +0100 Subject: [PATCH 16/28] fix components manager --- src/diffusers/modular_pipelines/components_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index 0a23ecc8bca1..1ff6b6fd78d5 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -426,7 +426,8 @@ def add(self, name: str, component: Any, collection: Optional[str] = None): # add component to components manager self.components[component_id] = component - self.added_time[component_id] = time.time() + if is_new_component: + self.added_time[component_id] = time.time() if collection: if collection not in self.collections: From bf6a07e665319482a95c66b3c7b7a714405a1284 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 31 Jan 2026 09:36:26 +0100 Subject: [PATCH 17/28] update custom block guide --- .../en/modular_diffusers/custom_blocks.md | 181 +++++++++++++----- 1 file changed, 133 insertions(+), 48 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 6ef8db613f7f..fcdf27a79bd1 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -15,8 +15,6 @@ specific language governing permissions and limitations under the License. [ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. This guide demonstrates how to create and use a custom block. -> [!TIP] -> Explore the [Modular Diffusers Custom Blocks](https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks) collection for official custom modular blocks like Nano Banana. ## Project Structure @@ -31,6 +29,44 @@ Your custom block project should use the following structure: - `block.py` contains the custom block implementation - `modular_config.json` contains the metadata needed to load the block +## Quick Start with Template + +The fastest way to create a custom block is to start from our template: + +### 1. Download the template +```python +from diffusers import ModularPipelineBlocks + +model_id = "diffusers/custom-block-template" +local_dir = model_id.split("/")[-1] + +blocks = ModularPipelineBlocks.from_pretrained( + model_id, + trust_remote_code=True, + local_dir=local_dir +) +``` + +This saves the template files to `custom-block-template/` locally. Feel free to use a custom `local_dir`. + +### 2. Edit locally + +Open `block.py` and implement your custom block. The template includes commented examples showing how to define each property. See the [Florence 2 example](#example-florence-2-inpainting-block) below for a complete implementation. + +### 3. Test your block +```python +from diffusers import ModularPipelineBlocks + +blocks = ModularPipelineBlocks.from_pretrained(local_dir, trust_remote_code=True) +pipeline = blocks.init_pipeline() +output = pipeline(...) # your inputs here +``` + +### 4. Upload to the Hub +```python +pipeline.save_pretrained(local_dir, repo_id="your-username/your-block-name", push_to_hub=True) +``` + ## Example: Florence 2 Inpainting Block In this example we will create a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. @@ -403,56 +439,62 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ``` -Once we have defined our custom block, we can save it to the Hub, using either the CLI or the [`push_to_hub`] method. This will make it easy to share and reuse our custom block with other pipelines. - - - - -```shell -# In the folder with the `block.py` file, run: -diffusers-cli custom_block -``` - -Then upload the block to the Hub: - -```shell -hf upload . . -``` - - - -```py -from block import Florence2ImageAnnotatorBlock -block = Florence2ImageAnnotatorBlock() -block.push_to_hub("") -``` - - - +Once we have defined our custom block, we can save it to the Hub. This will make it easy to share and reuse our custom block with other pipelines. ## Using Custom Blocks -Load the custom block with [`~ModularPipelineBlocks.from_pretrained`] and set `trust_remote_code=True`. +Load the custom block into a pipeline with [`~ModularPipeline.from_pretrained`] and set `trust_remote_code=True`. ```py import torch -from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS +from diffusers import ModularPipeline from diffusers.utils import load_image # Fetch the Florence2 image annotator block that will create our mask -image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True) +image_annotator_node = ModularPipeline.from_pretrained("diffusers/Florence2-image-Annotator", trust_remote_code=True) +# check the docstring +print(image_annotator_node.block.doc) +``` -my_blocks = INPAINT_BLOCKS.copy() -# insert the annotation block before the image encoding step -my_blocks.insert("image_annotator", image_annotator_block, 1) +```out +class Florence2ImageAnnotatorBlock + + Components: + image_annotator (`Florence2ForConditionalGeneration`) [pretrained_model_name_or_path=florence-community/Florence-2-base-ft] + image_annotator_processor (`AutoProcessor`) [pretrained_model_name_or_path=florence-community/Florence-2-base-ft] + + Inputs: + image (`Union[Image, List]`): + Image(s) to annotate + annotation_task (`Union[str, List]`, *optional*, defaults to ): + Annotation Task to perform on the image. Supported Tasks: + + + annotation_prompt (`Union[str, List]`): + Annotation Prompt to provide more context to the task. Can be used to detect or segment out specific elements in + the image + annotation_output_type (`str`, *optional*, defaults to mask_image): + Output type from annotation predictions. Availabe options are annotation: - raw annotation predictions from the + model based on task type. mask_image: -black and white mask image for the given image based on the task type + mask_overlay: - white mask overlayed on the original image bounding_box: - bounding boxes drawn on the original + image + annotation_overlay (`bool`): + TODO: Add description. + fill (`str`, *optional*, defaults to white): + TODO: Add description. + + Outputs: + annotations (`dict`): + Annotations Predictions for input Image(s) + images (`PIL.Image`): + Annotated input Image(s) +``` -# Create our initial set of inpainting blocks -blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) +we can use it to generate a mask and then pass to an inpainting pipeline -repo_id = "diffusers/modular-stable-diffusion-xl-base-1.0" -pipe = blocks.init_pipeline(repo_id) -pipe.load_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True) +```py +image_annotator_node.load_components(torch_dtype=torch.bfloat16) +image_annotator_node.to("cuda") image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true") image = image.resize((1024, 1024)) @@ -461,6 +503,28 @@ prompt = ["A red car"] annotation_task = "" annotation_prompt = ["the car"] +mask_image = image_annotator_node( + prompt=prompt, + image=image, + annotation_task=annotation_task, + annotation_prompt=annotation_prompt, + annotation_output_type="mask_image", +).images +mask_image[0].save("florence-mask.png") +``` +you can use this as an input for a inpaint pipeline; + +or you can take the block, combine it with other blocks to make a new inpaint pipeline, + +```py +image_annotator_blocks = image_annotator_node.blocks + +inpaint_blocks = ModularPipeline.from_pretrained("Qwen/Qwen-Image").blocks.get_workflow("inpainting") +# insert the annotation block before the image encoding step +inpaint_blocks.sub_blocks.insert("image_annotator", image_annotator_block, 0) +pipe = blocks.init_pipeline("Qwen/Qwen-Image") +pipe.load_components(torch_dtype=torch.float16, device_map="cuda") + output = pipe( prompt=prompt, image=image, @@ -477,16 +541,37 @@ output[0].save("florence-inpainting.png") ## Editing Custom Blocks -By default, custom blocks are saved in your cache directory. Use the `local_dir` argument to download and edit a custom block in a specific folder. +You can edit any existing custom block by downloading it locally. This follows the same workflow as the [Quick Start with Template](#quick-start-with-template), but starting from an existing block instead of the template. +Use the `local_dir` argument to download and edit a custom block in a specific folder: ```py -import torch -from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS -from diffusers.utils import load_image +from diffusers.modular_pipelines import ModularPipelineBlocks -# Fetch the Florence2 image annotator block that will create our mask -image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True, local_dir="/my-local-folder") +# Download to a local folder for editing +image_annotator_block = ModularPipelineBlocks.from_pretrained( + "diffusers/Florence2-image-Annotator", + trust_remote_code=True, + local_dir="./my-florence-block" +) +``` + +Any changes made to the block files in this folder will be reflected when you load the block again. When you're ready to share your changes, upload to a new repository: +```python +pipeline = image_annotator_block.init_pipeline() +pipeline.save_pretrained("./my-florence-block", repo_id="your-username/my-custom-florence", push_to_hub=True) ``` -Any changes made to the block files in this folder will be reflected when you load the block again. +## Next Steps + + + + +Make your custom block work with Mellon's visual interface - no UI code required. See the [Mellon Custom Blocks](./mellon_custom_blocks) guide. + + + + +Browse the [Modular Diffusers Custom Blocks](https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks) collection for inspiration and ready-to-use blocks. + + + From 9d7f6db9ec66029773bfb04634211d518c12efa2 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 31 Jan 2026 09:51:13 +0100 Subject: [PATCH 18/28] update --- .../en/modular_diffusers/custom_blocks.md | 401 ++++-------------- 1 file changed, 75 insertions(+), 326 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index fcdf27a79bd1..2fa94970640e 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -15,6 +15,8 @@ specific language governing permissions and limitations under the License. [ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. This guide demonstrates how to create and use a custom block. +> [!TIP] +> Explore the [Modular Diffusers Custom Blocks](https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks) collection for official custom blocks. ## Project Structure @@ -34,6 +36,7 @@ Your custom block project should use the following structure: The fastest way to create a custom block is to start from our template: ### 1. Download the template + ```python from diffusers import ModularPipelineBlocks @@ -51,9 +54,10 @@ This saves the template files to `custom-block-template/` locally. Feel free to ### 2. Edit locally -Open `block.py` and implement your custom block. The template includes commented examples showing how to define each property. See the [Florence 2 example](#example-florence-2-inpainting-block) below for a complete implementation. +Open `block.py` and implement your custom block. The template includes commented examples showing how to define each property. See the [Florence-2 example](#example-florence-2-image-annotator) below for a complete implementation. ### 3. Test your block + ```python from diffusers import ModularPipelineBlocks @@ -63,22 +67,22 @@ output = pipeline(...) # your inputs here ``` ### 4. Upload to the Hub + ```python pipeline.save_pretrained(local_dir, repo_id="your-username/your-block-name", push_to_hub=True) ``` -## Example: Florence 2 Inpainting Block +## Example: Florence-2 Image Annotator -In this example we will create a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. +This example creates a custom block that uses [Florence-2](https://huggingface.co/docs/transformers/model_doc/florence2) to process an input image and generate a mask for inpainting. -The first step is to define the components that the block will use. In this case, we will need to use the `Florence2ForConditionalGeneration` model and its corresponding processor `AutoProcessor`. When defining components, we must specify the name of the component within our pipeline, model class via `type_hint`, and provide a `pretrained_model_name_or_path` for the component if we intend to load the model weights from a specific repository on the Hub. +### Define components -```py +First, define the components the block needs. Here we use `Florence2ForConditionalGeneration` and its processor. When defining components, specify the `name` (how you'll access it in code), `type_hint` (the model class), and `pretrained_model_name_or_path` (where to load weights from). + +```python # Inside block.py -from diffusers.modular_pipelines import ( - ModularPipelineBlocks, - ComponentSpec, -) +from diffusers.modular_pipelines import ModularPipelineBlocks, ComponentSpec from transformers import AutoProcessor, Florence2ForConditionalGeneration @@ -100,40 +104,19 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ] ``` -Next, we define the inputs and outputs of the block. The inputs include the image to be annotated, the annotation task, and the annotation prompt. The outputs include the generated mask image and annotations. +### Define inputs and outputs -```py +Next, define the block's interface. Inputs include the image, annotation task, and prompt. Outputs include the generated mask and annotations. + +```python from typing import List, Union -from PIL import Image, ImageDraw -import torch -import numpy as np - -from diffusers.modular_pipelines import ( - PipelineState, - ModularPipelineBlocks, - InputParam, - ComponentSpec, - OutputParam, -) -from transformers import AutoProcessor, Florence2ForConditionalGeneration +from PIL import Image +from diffusers.modular_pipelines import InputParam, OutputParam class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): - @property - def expected_components(self): - return [ - ComponentSpec( - name="image_annotator", - type_hint=Florence2ForConditionalGeneration, - pretrained_model_name_or_path="florence-community/Florence-2-base-ft", - ), - ComponentSpec( - name="image_annotator_processor", - type_hint=AutoProcessor, - pretrained_model_name_or_path="florence-community/Florence-2-base-ft", - ), - ] + # ... expected_components from above ... @property def inputs(self) -> List[InputParam]: @@ -146,51 +129,21 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ), InputParam( "annotation_task", - type_hint=Union[str, List[str]], - required=True, + type_hint=str, default="", - description="""Annotation Task to perform on the image. - Supported Tasks: - - - - - - - - - - - """, + description="Annotation task to perform (e.g., , , )", ), InputParam( "annotation_prompt", - type_hint=Union[str, List[str]], + type_hint=str, required=True, - description="""Annotation Prompt to provide more context to the task. - Can be used to detect or segment out specific elements in the image - """, + description="Prompt to provide context for the annotation task", ), InputParam( "annotation_output_type", type_hint=str, - required=True, default="mask_image", - description="""Output type from annotation predictions. Available options are - mask_image: - -black and white mask image for the given image based on the task type - mask_overlay: - - mask overlayed on the original image - bounding_box: - - bounding boxes drawn on the original image - """, - ), - InputParam( - "annotation_overlay", - type_hint=bool, - required=True, - default=False, - description="", + description="Output type: 'mask_image', 'mask_overlay', or 'bounding_box'", ), ] @@ -199,225 +152,45 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): return [ OutputParam( "mask_image", - type_hint=Image, - description="Inpainting Mask for input Image(s)", + type_hint=Image.Image, + description="Inpainting mask for the input image", ), OutputParam( "annotations", type_hint=dict, - description="Annotations Predictions for input Image(s)", + description="Raw annotation predictions", ), OutputParam( "image", - type_hint=Image, - description="Annotated input Image(s)", + type_hint=Image.Image, + description="Annotated image", ), ] - ``` -Now we implement the `__call__` method, which contains the logic for processing the input image and generating the mask. +### Implement the `__call__` method -```py -from typing import List, Union -from PIL import Image, ImageDraw +The `__call__` method contains the block's logic. Access inputs via `block_state`, run your computation, and set outputs back to `block_state`. + +```python import torch -import numpy as np - -from diffusers.modular_pipelines import ( - PipelineState, - ModularPipelineBlocks, - InputParam, - ComponentSpec, - OutputParam, -) -from transformers import AutoProcessor, Florence2ForConditionalGeneration +from diffusers.modular_pipelines import PipelineState class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): - @property - def expected_components(self): - return [ - ComponentSpec( - name="image_annotator", - type_hint=Florence2ForConditionalGeneration, - pretrained_model_name_or_path="florence-community/Florence-2-base-ft", - ), - ComponentSpec( - name="image_annotator_processor", - type_hint=AutoProcessor, - pretrained_model_name_or_path="florence-community/Florence-2-base-ft", - ), - ] - - @property - def inputs(self) -> List[InputParam]: - return [ - InputParam( - "image", - type_hint=Union[Image.Image, List[Image.Image]], - required=True, - description="Image(s) to annotate", - ), - InputParam( - "annotation_task", - type_hint=Union[str, List[str]], - required=True, - default="", - description="""Annotation Task to perform on the image. - Supported Tasks: - - - - - - - - - - - """, - ), - InputParam( - "annotation_prompt", - type_hint=Union[str, List[str]], - required=True, - description="""Annotation Prompt to provide more context to the task. - Can be used to detect or segment out specific elements in the image - """, - ), - InputParam( - "annotation_output_type", - type_hint=str, - required=True, - default="mask_image", - description="""Output type from annotation predictions. Available options are - mask_image: - -black and white mask image for the given image based on the task type - mask_overlay: - - mask overlayed on the original image - bounding_box: - - bounding boxes drawn on the original image - """, - ), - InputParam( - "annotation_overlay", - type_hint=bool, - required=True, - default=False, - description="", - ), - ] - - @property - def intermediate_outputs(self) -> List[OutputParam]: - return [ - OutputParam( - "mask_image", - type_hint=Image, - description="Inpainting Mask for input Image(s)", - ), - OutputParam( - "annotations", - type_hint=dict, - description="Annotations Predictions for input Image(s)", - ), - OutputParam( - "image", - type_hint=Image, - description="Annotated input Image(s)", - ), - ] - - def get_annotations(self, components, images, prompts, task): - task_prompts = [task + prompt for prompt in prompts] - - inputs = components.image_annotator_processor( - text=task_prompts, images=images, return_tensors="pt" - ).to(components.image_annotator.device, components.image_annotator.dtype) - - generated_ids = components.image_annotator.generate( - input_ids=inputs["input_ids"], - pixel_values=inputs["pixel_values"], - max_new_tokens=1024, - early_stopping=False, - do_sample=False, - num_beams=3, - ) - annotations = components.image_annotator_processor.batch_decode( - generated_ids, skip_special_tokens=False - ) - outputs = [] - for image, annotation in zip(images, annotations): - outputs.append( - components.image_annotator_processor.post_process_generation( - annotation, task=task, image_size=(image.width, image.height) - ) - ) - return outputs - - def prepare_mask(self, images, annotations, overlay=False, fill="white"): - masks = [] - for image, annotation in zip(images, annotations): - mask_image = image.copy() if overlay else Image.new("L", image.size, 0) - draw = ImageDraw.Draw(mask_image) - - for _, _annotation in annotation.items(): - if "polygons" in _annotation: - for polygon in _annotation["polygons"]: - polygon = np.array(polygon).reshape(-1, 2) - if len(polygon) < 3: - continue - polygon = polygon.reshape(-1).tolist() - draw.polygon(polygon, fill=fill) - - elif "bbox" in _annotation: - bbox = _annotation["bbox"] - draw.rectangle(bbox, fill="white") - - masks.append(mask_image) - - return masks - - def prepare_bounding_boxes(self, images, annotations): - outputs = [] - for image, annotation in zip(images, annotations): - image_copy = image.copy() - draw = ImageDraw.Draw(image_copy) - for _, _annotation in annotation.items(): - bbox = _annotation["bbox"] - label = _annotation["label"] - - draw.rectangle(bbox, outline="red", width=3) - draw.text((bbox[0], bbox[1] - 20), label, fill="red") - - outputs.append(image_copy) - - return outputs - - def prepare_inputs(self, images, prompts): - prompts = prompts or "" - - if isinstance(images, Image.Image): - images = [images] - if isinstance(prompts, str): - prompts = [prompts] - - if len(images) != len(prompts): - raise ValueError("Number of images and annotation prompts must match.") - - return images, prompts + # ... expected_components, inputs, intermediate_outputs from above ... @torch.no_grad() def __call__(self, components, state: PipelineState) -> PipelineState: block_state = self.get_block_state(state) + images, annotation_task_prompt = self.prepare_inputs( block_state.image, block_state.annotation_prompt ) task = block_state.annotation_task fill = block_state.fill - + annotations = self.get_annotations( components, images, annotation_task_prompt, task ) @@ -436,69 +209,40 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): self.set_block_state(state, block_state) return components, state - + + # Helper methods for mask/bounding box generation... ``` -Once we have defined our custom block, we can save it to the Hub. This will make it easy to share and reuse our custom block with other pipelines. +> [!TIP] +> See the complete implementation at [diffusers/Florence2-image-Annotator](https://huggingface.co/diffusers/Florence2-image-Annotator). ## Using Custom Blocks -Load the custom block into a pipeline with [`~ModularPipeline.from_pretrained`] and set `trust_remote_code=True`. +Load a custom block with [`~ModularPipeline.from_pretrained`] and set `trust_remote_code=True`. ```py import torch from diffusers import ModularPipeline from diffusers.utils import load_image -# Fetch the Florence2 image annotator block that will create our mask -image_annotator_node = ModularPipeline.from_pretrained("diffusers/Florence2-image-Annotator", trust_remote_code=True) -# check the docstring -print(image_annotator_node.block.doc) -``` +# Load the Florence-2 annotator pipeline +image_annotator = ModularPipeline.from_pretrained( + "diffusers/Florence2-image-Annotator", + trust_remote_code=True +) -```out -class Florence2ImageAnnotatorBlock - - Components: - image_annotator (`Florence2ForConditionalGeneration`) [pretrained_model_name_or_path=florence-community/Florence-2-base-ft] - image_annotator_processor (`AutoProcessor`) [pretrained_model_name_or_path=florence-community/Florence-2-base-ft] - - Inputs: - image (`Union[Image, List]`): - Image(s) to annotate - annotation_task (`Union[str, List]`, *optional*, defaults to ): - Annotation Task to perform on the image. Supported Tasks: - - - annotation_prompt (`Union[str, List]`): - Annotation Prompt to provide more context to the task. Can be used to detect or segment out specific elements in - the image - annotation_output_type (`str`, *optional*, defaults to mask_image): - Output type from annotation predictions. Availabe options are annotation: - raw annotation predictions from the - model based on task type. mask_image: -black and white mask image for the given image based on the task type - mask_overlay: - white mask overlayed on the original image bounding_box: - bounding boxes drawn on the original - image - annotation_overlay (`bool`): - TODO: Add description. - fill (`str`, *optional*, defaults to white): - TODO: Add description. - - Outputs: - annotations (`dict`): - Annotations Predictions for input Image(s) - images (`PIL.Image`): - Annotated input Image(s) +# Check the docstring to see inputs/outputs +print(image_annotator.blocks.doc) ``` -we can use it to generate a mask and then pass to an inpainting pipeline +Use the block to generate a mask: -```py -image_annotator_node.load_components(torch_dtype=torch.bfloat16) -image_annotator_node.to("cuda") +```python +image_annotator.load_components(torch_dtype=torch.bfloat16) +image_annotator.to("cuda") -image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true") +image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg") image = image.resize((1024, 1024)) - prompt = ["A red car"] annotation_task = "" annotation_prompt = ["the car"] @@ -510,21 +254,24 @@ mask_image = image_annotator_node( annotation_prompt=annotation_prompt, annotation_output_type="mask_image", ).images -mask_image[0].save("florence-mask.png") +mask_image[0].save("car-mask.png") ``` -you can use this as an input for a inpaint pipeline; -or you can take the block, combine it with other blocks to make a new inpaint pipeline, +You can also compose it with other blocks to create a new pipeline: -```py -image_annotator_blocks = image_annotator_node.blocks +```python +# Get the annotator block +annotator_block = image_annotator.blocks +# Get an inpainting workflow and insert the annotator at the beginning inpaint_blocks = ModularPipeline.from_pretrained("Qwen/Qwen-Image").blocks.get_workflow("inpainting") -# insert the annotation block before the image encoding step -inpaint_blocks.sub_blocks.insert("image_annotator", image_annotator_block, 0) -pipe = blocks.init_pipeline("Qwen/Qwen-Image") -pipe.load_components(torch_dtype=torch.float16, device_map="cuda") +inpaint_blocks.sub_blocks.insert("image_annotator", annotator_block, 0) + +# Initialize the combined pipeline +pipe = inpaint_blocks.init_pipeline() +pipe.load_components(torch_dtype=torch.float16, device="cuda") +# Now the pipeline automatically generates masks from prompts output = pipe( prompt=prompt, image=image, @@ -543,12 +290,13 @@ output[0].save("florence-inpainting.png") You can edit any existing custom block by downloading it locally. This follows the same workflow as the [Quick Start with Template](#quick-start-with-template), but starting from an existing block instead of the template. -Use the `local_dir` argument to download and edit a custom block in a specific folder: -```py -from diffusers.modular_pipelines import ModularPipelineBlocks +Use the `local_dir` argument to download a custom block to a specific folder: + +```python +from diffusers import ModularPipelineBlocks # Download to a local folder for editing -image_annotator_block = ModularPipelineBlocks.from_pretrained( +annotator_block = ModularPipelineBlocks.from_pretrained( "diffusers/Florence2-image-Annotator", trust_remote_code=True, local_dir="./my-florence-block" @@ -556,8 +304,9 @@ image_annotator_block = ModularPipelineBlocks.from_pretrained( ``` Any changes made to the block files in this folder will be reflected when you load the block again. When you're ready to share your changes, upload to a new repository: + ```python -pipeline = image_annotator_block.init_pipeline() +pipeline = annotator_block.init_pipeline() pipeline.save_pretrained("./my-florence-block", repo_id="your-username/my-custom-florence", push_to_hub=True) ``` @@ -574,4 +323,4 @@ Make your custom block work with Mellon's visual interface - no UI code required Browse the [Modular Diffusers Custom Blocks](https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks) collection for inspiration and ready-to-use blocks. - + \ No newline at end of file From 3cd9ff42963b1872e67ee2a0f7fe1507e93dced9 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 31 Jan 2026 09:51:35 +0100 Subject: [PATCH 19/28] style --- src/diffusers/modular_pipelines/components_manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index 1ff6b6fd78d5..4a7ea8502c86 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -980,7 +980,9 @@ def format_device(component, info): output += "\nAdditional Component Info:\n" + "=" * 50 + "\n" for name in self.components: info = self.get_model_info(name) - if info is not None and (info.get("adapters") is not None or info.get("ip_adapter") or info.get("quantization")): + if info is not None and ( + info.get("adapters") is not None or info.get("ip_adapter") or info.get("quantization") + ): output += f"\n{name}:\n" if info.get("adapters") is not None: output += f" Adapters: {info['adapters']}\n" From 741685def14215b87ce30b3db0fd0d61ca6f8fe0 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 31 Jan 2026 10:09:31 +0100 Subject: [PATCH 20/28] add a warn for mellon and add new guides to overview --- docs/source/en/modular_diffusers/mellon.md | 5 ++++- docs/source/en/modular_diffusers/overview.md | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index c684f86df94e..29caa0cc1ec6 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -13,7 +13,10 @@ specific language governing permissions and limitations under the License. ## Using Custom Blocks with Mellon -[Mellon](https://github.com/cubiq/Mellon) is a visual workflow interface (similar to ComfyUI) that integrates with Modular Diffusers. This guide shows how to add Mellon support to your custom blocks so they can be used in the Mellon UI. +[Mellon](https://github.com/cubiq/Mellon) is a visual workflow interface that integrates with Modular Diffusers and is designed for node-based workflows. + +> [!WARNING] +> Mellon is in early development and not yet ready for production use. Consider this a sneak peek of how the integration works! Custom blocks built with Modular Diffusers work with Mellon out of the box - no UI code required - and we'll ensure compatibility as Mellon evolves. ## Overview diff --git a/docs/source/en/modular_diffusers/overview.md b/docs/source/en/modular_diffusers/overview.md index 7d07c4b73434..b7f20cf884b5 100644 --- a/docs/source/en/modular_diffusers/overview.md +++ b/docs/source/en/modular_diffusers/overview.md @@ -33,9 +33,14 @@ The Modular Diffusers docs are organized as shown below. - [SequentialPipelineBlocks](./sequential_pipeline_blocks) is a type of block that chains multiple blocks so they run one after another, passing data along the chain. This guide shows you how to create [`~modular_pipelines.SequentialPipelineBlocks`] and how they connect and work together. - [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks) is a type of block that runs a series of blocks in a loop. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. - [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`~modular_pipelines.AutoPipelineBlocks`]. +- [Building Custom Blocks](./custom_blocks) shows you how to create your own custom blocks and share them on the Hub. ## ModularPipeline - [ModularPipeline](./modular_pipeline) shows you how to create and convert pipeline blocks into an executable [`ModularPipeline`]. - [ComponentsManager](./components_manager) shows you how to manage and reuse components across multiple pipelines. -- [Guiders](./guiders) shows you how to use different guidance methods in the pipeline. \ No newline at end of file +- [Guiders](./guiders) shows you how to use different guidance methods in the pipeline. + +## Mellon Integration + +- [Using Custom Blocks with Mellon](./mellon) shows you how to make your custom blocks work with [Mellon](https://github.com/cubiq/Mellon), a visual node-based interface for building workflows. \ No newline at end of file From 6ee36c537ff56ad9d6e84197cba499cf7dbda276 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Tue, 3 Feb 2026 05:39:12 -1000 Subject: [PATCH 21/28] Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/_toctree.yml | 2 +- .../en/modular_diffusers/custom_blocks.md | 24 ++++++------- docs/source/en/modular_diffusers/mellon.md | 36 ++++++++++--------- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index a95949f33a39..64a4222845b0 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -115,7 +115,7 @@ - local: modular_diffusers/custom_blocks title: Building Custom Blocks - local: modular_diffusers/mellon - title: Mellon Guide + title: Using Custom Blocks with Mellon title: Modular Diffusers - isExpanded: false sections: diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 2fa94970640e..bb0ff1375e41 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -33,9 +33,9 @@ Your custom block project should use the following structure: ## Quick Start with Template -The fastest way to create a custom block is to start from our template: +The fastest way to create a custom block is to start from our template. -### 1. Download the template +### Download the template ```python from diffusers import ModularPipelineBlocks @@ -50,13 +50,13 @@ blocks = ModularPipelineBlocks.from_pretrained( ) ``` -This saves the template files to `custom-block-template/` locally. Feel free to use a custom `local_dir`. +This saves the template files to `custom-block-template/` locally or you could use `local_dir` to save to a specific location. -### 2. Edit locally +### Edit locally Open `block.py` and implement your custom block. The template includes commented examples showing how to define each property. See the [Florence-2 example](#example-florence-2-image-annotator) below for a complete implementation. -### 3. Test your block +### Test your block ```python from diffusers import ModularPipelineBlocks @@ -66,7 +66,7 @@ pipeline = blocks.init_pipeline() output = pipeline(...) # your inputs here ``` -### 4. Upload to the Hub +### Upload to the Hub ```python pipeline.save_pretrained(local_dir, repo_id="your-username/your-block-name", push_to_hub=True) @@ -74,11 +74,11 @@ pipeline.save_pretrained(local_dir, repo_id="your-username/your-block-name", pus ## Example: Florence-2 Image Annotator -This example creates a custom block that uses [Florence-2](https://huggingface.co/docs/transformers/model_doc/florence2) to process an input image and generate a mask for inpainting. +This example creates a custom block with [Florence-2](https://huggingface.co/docs/transformers/model_doc/florence2) to process an input image and generate a mask for inpainting. ### Define components -First, define the components the block needs. Here we use `Florence2ForConditionalGeneration` and its processor. When defining components, specify the `name` (how you'll access it in code), `type_hint` (the model class), and `pretrained_model_name_or_path` (where to load weights from). +Define the components the block needs, `Florence2ForConditionalGeneration` and its processor. When defining components, specify the `name` (how you'll access it in code), `type_hint` (the model class), and `pretrained_model_name_or_path` (where to load weights from). ```python # Inside block.py @@ -106,7 +106,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ### Define inputs and outputs -Next, define the block's interface. Inputs include the image, annotation task, and prompt. Outputs include the generated mask and annotations. +Inputs include the image, annotation task, and prompt. Outputs include the generated mask and annotations. ```python from typing import List, Union @@ -257,7 +257,7 @@ mask_image = image_annotator_node( mask_image[0].save("car-mask.png") ``` -You can also compose it with other blocks to create a new pipeline: +Compose it with other blocks to create a new pipeline: ```python # Get the annotator block @@ -286,9 +286,9 @@ output = pipe( output[0].save("florence-inpainting.png") ``` -## Editing Custom Blocks +## Editing custom blocks -You can edit any existing custom block by downloading it locally. This follows the same workflow as the [Quick Start with Template](#quick-start-with-template), but starting from an existing block instead of the template. +Edit custom blocks by downloading it locally. This is the same workflow as the [Quick Start with Template](#quick-start-with-template), but starting from an existing block instead of the template. Use the `local_dir` argument to download a custom block to a specific folder: diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index 29caa0cc1ec6..03a47a0a240e 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -16,17 +16,17 @@ specific language governing permissions and limitations under the License. [Mellon](https://github.com/cubiq/Mellon) is a visual workflow interface that integrates with Modular Diffusers and is designed for node-based workflows. > [!WARNING] -> Mellon is in early development and not yet ready for production use. Consider this a sneak peek of how the integration works! Custom blocks built with Modular Diffusers work with Mellon out of the box - no UI code required - and we'll ensure compatibility as Mellon evolves. +> Mellon is in early development and not ready for production use yet. Consider this a sneak peek of how the integration works! ## Overview -To use a custom block in Mellon, you need a `mellon_pipeline_config.json` file that defines how your block's parameters map to Mellon UI components. Here's how to create one: +Create a `mellon_pipeline_config.json` file to define how a custom block's parameters map to Mellon UI components. -1. **Add a "Mellon type" to your block's parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). You can specify types via metadata in your block definitions, or pass them when generating the config. -2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a default template and push it to your Hub repository -3. **(Optional) Manually adjust the template** - Fine-tune the generated config for your specific needs +1. **Add a "Mellon type" to your block's parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). Specify types via metadata in your block definitions, or pass them when generating the config. +2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a default template and push it to your Hub repository. +3. **(Optional) Manually adjust the template** - Fine-tune the generated config for your specific needs. -## Step 1: Specify Mellon Types for Parameters +## Specify Mellon types for parameters Mellon types determine how each parameter renders in the UI. If you don't specify a type for a parameter, it will default to `"custom"`, which renders as a simple connection dot. You can always adjust this later in the generated config. @@ -43,9 +43,11 @@ Mellon types determine how each parameter renders in the UI. If you don't specif | `number` | Input | Numeric input | | `checkbox` | Input | Boolean toggle | -### Method 1: Using `metadata` in Block Definitions +Choose one of the methods below to specify a Mellon type. -If you're defining a custom block from scratch, you can add `metadata={"mellon": ""}` directly to your `InputParam` and `OutputParam` definitions: +### Using `metadata` in block definitions + +If you're defining a custom block from scratch, add `metadata={"mellon": ""}` directly to your `InputParam` and `OutputParam` definitions: ```python class GeminiPromptExpander(ModularPipelineBlocks): @@ -79,9 +81,9 @@ class GeminiPromptExpander(ModularPipelineBlocks): ] ``` -### Method 2: Using `input_types` and `output_types` When Generating Config +### Using `input_types` and `output_types` when Generating Config -If you're working with an existing pipeline or prefer to keep your block definitions clean, you can specify types when generating the config using the `input_types/output_types` argument: +If you're working with an existing pipeline or prefer to keep your block definitions clean, specify types when generating the config using the `input_types/output_types` argument: ```python from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig @@ -93,9 +95,9 @@ mellon_config = MellonPipelineConfig.from_custom_block( ``` > [!NOTE] -> If you specify both `metadata` and `input_types`/`output_types`, the arguments take precedence, allowing you to override metadata when needed. +> When both `metadata` and `input_types`/`output_types` are specified, the arguments overrides `metadata`. -## Step 2: Generate and Push the Mellon Config +## Generate and push the Mellon config After adding metadata to your block, generate the default Mellon configuration template and push it to the Hub: @@ -118,7 +120,7 @@ mellon_config.save( This creates a `mellon_pipeline_config.json` file in your repository. -## Step 3: Review and Adjust the Config (Optional) +## Review and adjust the config The generated template is a starting point - you may want to adjust it for your needs. Let's walk through the generated config for the Gemini Prompt Expander: @@ -162,7 +164,7 @@ The generated template is a starting point - you may want to adjust it for your } ``` -### Understanding the Structure +### Understanding the structure The `params` dict defines how each UI element renders. The `input_names`, `model_input_names`, and `output_names` lists map these UI elements to the underlying [`ModularPipelineBlocks`]'s I/O interface: @@ -172,7 +174,7 @@ The `params` dict defines how each UI element renders. The `input_names`, `model | `model_input_names` | `expected_components` property | | `output_names` | `intermediate_outputs` property | -In this example: `prompt` is the only input, there are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`. +In this example, `prompt` is the only input. There are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`. Now let's look at the `params` dict: @@ -184,9 +186,9 @@ Now let's look at the `params` dict: **`doc`** is the documentation output, automatically added to all custom blocks. -### Making Adjustments +### Making adjustments -For the Gemini Prompt Expander, we don't need `old_prompt` in the UI. Remove it from both `params` and `output_names`: +Remove `old_prompt` from both `params` and `output_names` because you won't need to use it. ```json { From 529761d71d6fa3c891a8452897c928d66740e1c5 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Tue, 3 Feb 2026 07:01:10 -1000 Subject: [PATCH 22/28] Update docs/source/en/modular_diffusers/mellon.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/mellon.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index 03a47a0a240e..87a41510e055 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -18,7 +18,6 @@ specific language governing permissions and limitations under the License. > [!WARNING] > Mellon is in early development and not ready for production use yet. Consider this a sneak peek of how the integration works! -## Overview Create a `mellon_pipeline_config.json` file to define how a custom block's parameters map to Mellon UI components. From 6115abcb3b3420737ee010ff19ed33aefd60530a Mon Sep 17 00:00:00 2001 From: "yiyi@huggingface.co" Date: Tue, 3 Feb 2026 17:08:49 +0000 Subject: [PATCH 23/28] more update on custom block guide --- docs/source/en/modular_diffusers/custom_blocks.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index bb0ff1375e41..b412e0e58abc 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -33,7 +33,7 @@ Your custom block project should use the following structure: ## Quick Start with Template -The fastest way to create a custom block is to start from our template. +The fastest way to create a custom block is to start from our template. The template provides a pre-configured project structure with `block.py` and `modular_config.json` files, plus commented examples showing how to define components, inputs, outputs, and the `__call__` method—so you can focus on your custom logic instead of boilerplate setup. ### Download the template @@ -313,9 +313,18 @@ pipeline.save_pretrained("./my-florence-block", repo_id="your-username/my-custom ## Next Steps + + +This guide covered creating a single custom block. Learn how to compose multiple blocks together: + +- [SequentialPipelineBlocks](./sequential_pipeline_blocks): Chain blocks to execute in sequence +- [ConditionalPipelineBlocks](./auto_pipeline_blocks): Create conditional blocks that select different execution paths +- [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks): Define an iterative workflows like the denoising loop + + -Make your custom block work with Mellon's visual interface - no UI code required. See the [Mellon Custom Blocks](./mellon_custom_blocks) guide. +Make your custom block work with Mellon's visual interface. See the [Mellon Custom Blocks](./mellon) guide. From a01dc17faf0e1b335c0739a8b3bb292398a8b256 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Tue, 3 Feb 2026 07:11:04 -1000 Subject: [PATCH 24/28] Update docs/source/en/modular_diffusers/mellon.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/mellon.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index 87a41510e055..30569b61e20d 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -29,7 +29,6 @@ Create a `mellon_pipeline_config.json` file to define how a custom block's param Mellon types determine how each parameter renders in the UI. If you don't specify a type for a parameter, it will default to `"custom"`, which renders as a simple connection dot. You can always adjust this later in the generated config. -### Supported Mellon Types | Type | Input/Output | Description | |------|--------------|-------------| From da18b0688cb2a48ac3a7d7cbbdab98513086ff70 Mon Sep 17 00:00:00 2001 From: "yiyi@huggingface.co" Date: Tue, 3 Feb 2026 17:14:15 +0000 Subject: [PATCH 25/28] a few mamual --- docs/source/en/modular_diffusers/mellon.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index 30569b61e20d..fcaa9854ccad 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -162,7 +162,7 @@ The generated template is a starting point - you may want to adjust it for your } ``` -### Understanding the structure +### Understanding the Structure The `params` dict defines how each UI element renders. The `input_names`, `model_input_names`, and `output_names` lists map these UI elements to the underlying [`ModularPipelineBlocks`]'s I/O interface: @@ -172,7 +172,7 @@ The `params` dict defines how each UI element renders. The `input_names`, `model | `model_input_names` | `expected_components` property | | `output_names` | `intermediate_outputs` property | -In this example, `prompt` is the only input. There are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`. +In this example: `prompt` is the only input. There are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`. Now let's look at the `params` dict: @@ -184,7 +184,7 @@ Now let's look at the `params` dict: **`doc`** is the documentation output, automatically added to all custom blocks. -### Making adjustments +### Making Adjustments Remove `old_prompt` from both `params` and `output_names` because you won't need to use it. From 7f283cb736316e2959c299cbddb9c99a646c0525 Mon Sep 17 00:00:00 2001 From: "yiyi@huggingface.co" Date: Tue, 3 Feb 2026 17:25:17 +0000 Subject: [PATCH 26/28] apply suggestion: turn into bullets --- docs/source/en/modular_diffusers/mellon.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index fcaa9854ccad..7961abbfc77f 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -176,13 +176,13 @@ In this example: `prompt` is the only input. There are no model components, and Now let's look at the `params` dict: -**`prompt`** is an input parameter. It has `display: "textarea"` which renders as a text input box, `label: "Prompt"` shown in the UI, and `default: ""` so it starts empty. The `type: "string"` field is important in Mellon because it determines which nodes can connect together - only matching types can be linked with "noodles". +- **`prompt`**: An input parameter with `display: "textarea"` (renders as a text input box), `label: "Prompt"` (shown in the UI), and `default: ""` (starts empty). The `type: "string"` field is important in Mellon because it determines which nodes can connect together - only matching types can be linked with "noodles". -**`out_prompt`** is the expanded prompt output. The `out_` prefix was automatically added because the input and output share the same name (`prompt`), avoiding naming conflicts in the config. It has `display: "output"` which renders as an output socket. +- **`out_prompt`**: The expanded prompt output. The `out_` prefix was automatically added because the input and output share the same name (`prompt`), avoiding naming conflicts in the config. It has `display: "output"` which renders as an output socket. -**`old_prompt`** has `type: "custom"` because we didn't specify metadata. This renders as a simple dot in the UI. Since we don't actually want to expose this in the UI, we can remove it. +- **`old_prompt`**: Has `type: "custom"` because we didn't specify metadata. This renders as a simple dot in the UI. Since we don't actually want to expose this in the UI, we can remove it. -**`doc`** is the documentation output, automatically added to all custom blocks. +- **`doc`**: The documentation output, automatically added to all custom blocks. ### Making Adjustments From f99ef1b185bfd1c7a3f5a8737f4eee9c391aac78 Mon Sep 17 00:00:00 2001 From: "yiyi@huggingface.co" Date: Tue, 3 Feb 2026 19:53:50 +0000 Subject: [PATCH 27/28] support define mellon meta with MellonParam directly, and update doc --- docs/source/en/modular_diffusers/mellon.md | 66 ++++++++++++++----- .../modular_pipelines/mellon_node_utils.py | 14 ++-- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index 7961abbfc77f..f1a5748292dc 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -19,11 +19,25 @@ specific language governing permissions and limitations under the License. > Mellon is in early development and not ready for production use yet. Consider this a sneak peek of how the integration works! -Create a `mellon_pipeline_config.json` file to define how a custom block's parameters map to Mellon UI components. +Custom blocks work in Mellon out of the box - just need to add a `mellon_pipeline_config.json` to your repository. This config file tells Mellon how to render your block's parameters as UI components. -1. **Add a "Mellon type" to your block's parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). Specify types via metadata in your block definitions, or pass them when generating the config. -2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a default template and push it to your Hub repository. -3. **(Optional) Manually adjust the template** - Fine-tune the generated config for your specific needs. +Here's what it looks like in action with the [Gemini Prompt Expander](https://huggingface.co/diffusers/gemini-prompt-expander-mellon) block: + + + +To use a modular diffusers custom block in Mellon: +1. Drag a **Dynamic Block Node** from the ModularDiffusers section +2. Enter the `repo_id` (e.g., `diffusers/gemini-prompt-expander-mellon`) +3. Click **Load Custom Block** +4. The node transforms to show your block's inputs and outputs + +Now let's walk through how to create this config for your own custom block. + +## Steps to create a Mellon config + +1. **Specify Mellon types for your parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). +2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a config template and push it to your Hub repository. +3. **(Optional) Manually adjust the config** - Fine-tune the generated config for your specific needs. ## Specify Mellon types for parameters @@ -41,11 +55,29 @@ Mellon types determine how each parameter renders in the UI. If you don't specif | `number` | Input | Numeric input | | `checkbox` | Input | Boolean toggle | +For parameters that need more configuration (like dropdowns with options, or sliders with min/max values), pass a `MellonParam` instance directly instead of a string. You can use one of the class methods below, or create a fully custom one with `MellonParam(name, label, type, ...)`. + +| Method | Description | +|--------|-------------| +| `MellonParam.Input.image(name)` | Image input | +| `MellonParam.Input.textbox(name, default)` | Text input as textarea | +| `MellonParam.Input.dropdown(name, options, default)` | Dropdown selection | +| `MellonParam.Input.slider(name, default, min, max, step)` | Slider for numeric values | +| `MellonParam.Input.number(name, default, min, max, step)` | Numeric input (no slider) | +| `MellonParam.Input.seed(name, default)` | Seed input with randomize button | +| `MellonParam.Input.checkbox(name, default)` | Boolean checkbox | +| `MellonParam.Input.model(name)` | Model input for diffusers components | +| `MellonParam.Output.image(name)` | Image output | +| `MellonParam.Output.video(name)` | Video output | +| `MellonParam.Output.text(name)` | Text output | +| `MellonParam.Output.model(name)` | Model output for diffusers components | + Choose one of the methods below to specify a Mellon type. ### Using `metadata` in block definitions -If you're defining a custom block from scratch, add `metadata={"mellon": ""}` directly to your `InputParam` and `OutputParam` definitions: +If you're defining a custom block from scratch, add `metadata={"mellon": ""}` directly to your `InputParam` and `OutputParam` definitions. If you're editing an existing custom block from the Hub, see [Editing custom blocks](./custom_blocks#editing-custom-blocks) for how to download it locally. + ```python class GeminiPromptExpander(ModularPipelineBlocks): @@ -79,6 +111,18 @@ class GeminiPromptExpander(ModularPipelineBlocks): ] ``` +For full control over UI configuration, pass a `MellonParam` instance directly: +```python +from diffusers.modular_pipelines.mellon_node_utils import MellonParam + +InputParam( + "mode", + type_hint=str, + default="balanced", + metadata={"mellon": MellonParam.Input.dropdown("mode", options=["fast", "balanced", "quality"])}, +) +``` + ### Using `input_types` and `output_types` when Generating Config If you're working with an existing pipeline or prefer to keep your block definitions clean, specify types when generating the config using the `input_types/output_types` argument: @@ -223,14 +267,4 @@ Remove `old_prompt` from both `params` and `output_names` because you won't need } ``` -See the final config at [YiYiXu/gemini-prompt-expander](https://huggingface.co/YiYiXu/gemini-prompt-expander). - -## Use in Mellon - -1. Start Mellon (see [Mellon installation guide](https://github.com/cubiq/Mellon)) - -2. In Mellon: - - Drag a **Dynamic Block Node** from the ModularDiffusers section - - Enter your `repo_id` (e.g., `YiYiXu/gemini-prompt-expander`) - - Click **Load Custom Block** - - The node will transform to show your block's inputs and outputs \ No newline at end of file +See the final config at [diffusers/gemini-prompt-expander-mellon](https://huggingface.co/diffusers/gemini-prompt-expander-mellon). \ No newline at end of file diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index 13acec36924e..35241023f3fc 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -418,18 +418,24 @@ def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: Convert an InputParam to a MellonParam using metadata. Args: - input_param: An InputParam with optional metadata={"mellon": ""} where type is one of: - image, video, text, textbox, checkbox, number, slider, dropdown, seed, model. If metadata is None or - unknown, maps to "custom". + input_param: An InputParam with optional metadata containing either: + - {"mellon": ""} for simple types (image, textbox, slider, etc.) + - {"mellon": MellonParam(...)} for full control over UI configuration Returns: MellonParam instance """ name = input_param.name metadata = input_param.metadata - mellon_type = metadata.get("mellon") if metadata else None + mellon_value = metadata.get("mellon") if metadata else None default = input_param.default + # If it's already a MellonParam, return it directly + if isinstance(mellon_value, MellonParam): + return mellon_value + + mellon_type = mellon_value + if mellon_type == "image": return MellonParam.Input.image(name) elif mellon_type == "textbox": From 71e458d91a0e8f69956a892cd9856f1ff15540a0 Mon Sep 17 00:00:00 2001 From: "yiyi@huggingface.co" Date: Tue, 3 Feb 2026 23:03:58 +0000 Subject: [PATCH 28/28] add the video --- docs/source/en/modular_diffusers/mellon.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md index f1a5748292dc..808e62ad7966 100644 --- a/docs/source/en/modular_diffusers/mellon.md +++ b/docs/source/en/modular_diffusers/mellon.md @@ -23,7 +23,7 @@ Custom blocks work in Mellon out of the box - just need to add a `mellon_pipelin Here's what it looks like in action with the [Gemini Prompt Expander](https://huggingface.co/diffusers/gemini-prompt-expander-mellon) block: - +![Mellon custom block demo](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/modular_demo_dynamic.gif) To use a modular diffusers custom block in Mellon: 1. Drag a **Dynamic Block Node** from the ModularDiffusers section