Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/Model Support.md
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,23 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended
- **Sampler:** Defaults to `ER-SDE-Solver`, but all common samplers work. They officially recommend also trying out `Euler Ancestral` or `DPM++ 2M SDE`
- **Scheduler:** Default is fine (`Simple`), or you can experiment at will. The model is adaptable.

# Ernie

- [Ernie and Ernie Turbo](<https://huggingface.co/baidu/ERNIE-Image-Turbo>) are supported in SwarmUI!
- It is an 8B model, with both a strong base and an official turbo designed to run extremely fast while competing at the top level of image models
- The "Turbo" model can be downloaded here [Comfy-Org/ERNIE-Image](<https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/diffusion_models/ernie-image-turbo.safetensors>)
- Or the BF16 fat version [Comfy-Org/ERNIE-Image](<https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/diffusion_models/ernie-image.safetensors>)
- Save in `diffusion_models`
- Uses the Flux.2 VAE, will be downloaded and handled automatically
- Uses the Ministral 3 3b text encoder, will be downloaded and handled automatically
- **Parameters:**
- **Prompt:** Supports general prompting in any format just fine. Speaks English and Chinese deeply.
- **Sampler:** Default is fine.
- **Scheduler:** Default is fine.
- **CFG Scale:** For Turbo, `1`, for base normal CFG ranges (eg 4 or 7)
- **Steps:** For Turbo `8` is recommended. For Base, 20+ steps as normal.
- **Resolution:** Side length `1024` is the standard.

# Video Models

- Video models are documented in [Video Model Support](/docs/Video%20Model%20Support.md).
Expand Down
2 changes: 1 addition & 1 deletion src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ public string CreateKSampler(JArray model, JArray pos, JArray neg, JArray latent
latent = [srCond, 2];
}
}
else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma())
else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie())
{
defscheduler ??= "simple";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ public bool IsSD3()
/// <summary>Returns true if the current model is any Black Forest Labs' Flux.2 variant.</summary>
public bool IsAnyFlux2() => IsFlux2Dev() || IsFlux2Klein4B() || IsFlux2Klein9B();

/// <summary>Returns true if the current model is Ernie Image.</summary>
public bool IsErnie() => IsModelCompatClass(T2IModelClassSorter.CompatErnieImage);

/// <summary>Returns true if the current model is AuraFlow.</summary>
public bool IsAuraFlow() => IsModelCompatClass(T2IModelClassSorter.CompatAuraFlow);

Expand Down Expand Up @@ -263,7 +266,7 @@ public WGNodeData EmptyImage(int width, int height, int batchSize, string id = n
["width"] = width
}, id));
}
else if (IsAnyFlux2())
else if (IsAnyFlux2() || IsErnie())
{
return resultImage(CreateNode("EmptyFlux2LatentImage", new JObject()
{
Expand Down Expand Up @@ -578,6 +581,11 @@ public string GetMistralFlux2Model()
return RequireClipModel("mistral_3_small_flux2.safetensors", "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/text_encoders/mistral_3_small_flux2_fp4_mixed.safetensors", "1ee1ff334d78228d73049ef0ee4fcd21c1700536b5a45c06547af057f92463a7", T2IParamTypes.MistralModel);
}

public string GetMinistral3_3bModel()
{
return RequireClipModel("ministral-3-3b.safetensors", "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ministral-3-3b.safetensors", "49a750a128863854eac7d85e1a277a7b44bf6ec3646405b84686dfeeca3708ca", T2IParamTypes.MistralModel);
}

public string GetClipLModel()
{
if (g.UserInput.TryGet(T2IParamTypes.ClipLModel, out T2IModel model))
Expand Down Expand Up @@ -1032,6 +1040,11 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC)
LoadingModel = [kvcached, 0];
}
}
else if (IsErnie())
{
helpers.LoadClip("flux2", helpers.GetMinistral3_3bModel());
helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFlux2VAE, "flux-2", "flux2-vae");
}
else if (IsFlux() && (LoadingClip is null || LoadingVAE is null || UserInput.Get(T2IParamTypes.T5XXLModel) is not null || UserInput.Get(T2IParamTypes.ClipLModel) is not null))
{
helpers.LoadClip2("flux", helpers.GetT5XXLModel(), helpers.GetClipLModel());
Expand Down
7 changes: 7 additions & 0 deletions src/Text2Image/T2IModelClassSorter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public static T2IModelCompatClass
CompatFlux2 = RegisterCompat(new() { ID = "flux-2", ShortCode = "Flux2", LorasTargetTextEnc = false }),
CompatFlux2Klein4B = RegisterCompat(new() { ID = "flux-2-klein-4b", ShortCode = "Fl2K4", LorasTargetTextEnc = false }),
CompatFlux2Klein9B = RegisterCompat(new() { ID = "flux-2-klein-9b", ShortCode = "Fl2K9", LorasTargetTextEnc = false }),
CompatErnieImage = RegisterCompat(new() { ID = "ernie-image", ShortCode = "Ernie", LorasTargetTextEnc = false }),
CompatLtxv2 = RegisterCompat(new() { ID = "lightricks-ltx-video-2", ShortCode = "LTXV2", IsText2Video = true, IsImage2Video = true }),
CompatZImage = RegisterCompat(new() { ID = "z-image", ShortCode = "ZImg", LorasTargetTextEnc = false }),
CompatZetaChroma = RegisterCompat(new() { ID = "zeta-chroma", ShortCode = "ZChr", LorasTargetTextEnc = false }),
Expand Down Expand Up @@ -215,6 +216,7 @@ bool isQwenImageLora(JObject h) => (hasLoraKey(h, "transformer_blocks.0.attn.add
bool isHyVid15Lora(JObject h) => hasKey(h, "cond_type_embedding.lora_down.weight") && hasKey(h, "byt5_in.fc1.lora_down.weight") && hasKey(h, "vision_in.proj.1.lora_down.weight");
bool isHyImgRefiner(JObject h) => h.ContainsKey("double_blocks.0.img_attn_k_norm.weight") && h.TryGetValue("time_r_in.mlp.0.bias", out JToken timeTok) && timeTok["shape"].ToArray()[0].Value<long>() == 3328;
bool isAuraFlow(JObject h) => h.ContainsKey("model.cond_seq_linear.weight") && h.ContainsKey("model.double_layers.0.attn.w1k.weight");
bool isErnie(JObject h) => hasKey(h, "layers.0.mlp.linear_fc2.weight") && hasKey(h, "x_embedder.proj.weight") && hasKey(h, "layers.0.adaLN_sa_ln.weight");
bool isKandinsky5(JObject h) => hasKey(h, "pooled_text_embeddings.in_layer.weight") && hasKey(h, "text_transformer_blocks.0.feed_forward.in_layer.weight");
bool tryGetKan5IdKey(JObject h, out JToken tok) => h.TryGetValue("text_embeddings.in_layer.weight", out tok);
bool isKan5VidLite(JObject h) => tryGetKan5IdKey(h, out JToken tok) && tok["shape"].ToArray()[0].Value<long>() == 1792;
Expand Down Expand Up @@ -628,6 +630,11 @@ JToken GetEmbeddingKey(JObject h)
{
return isQwenImageLora(h);
}});
// ====================== Ernie Image ======================
Register(new() { ID = "ernie-image", CompatClass = CompatErnieImage, Name = "Ernie Image", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) =>
{
return isErnie(h);
}});
// ====================== Kandinsky5 ======================
Register(new() { ID = "kandinsky5-image-lite", CompatClass = CompatKandinsky5ImgLite, Name = "Kandinsky5 Image Lite", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) =>
{
Expand Down
Loading