mcmonkeyprojects · mcmonkey4eva · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/docs/Model Support.md b/docs/Model Support.md
@@ -562,6 +562,23 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended
     - **Sampler:** Defaults to `ER-SDE-Solver`, but all common samplers work. They officially recommend also trying out `Euler Ancestral` or `DPM++ 2M SDE`
     - **Scheduler:** Default is fine (`Simple`), or you can experiment at will. The model is adaptable.
 
+# Ernie
+
+- [Ernie and Ernie Turbo](<https://huggingface.co/baidu/ERNIE-Image-Turbo>) are supported in SwarmUI!
+- It is an 8B model, with both a strong base and an official turbo designed to run extremely fast while competing at the top level of image models
+    - The "Turbo" model can be downloaded here [Comfy-Org/ERNIE-Image](<https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/diffusion_models/ernie-image-turbo.safetensors>)
+    - Or the BF16 fat version [Comfy-Org/ERNIE-Image](<https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/diffusion_models/ernie-image.safetensors>)
+    - Save in `diffusion_models`
+- Uses the Flux.2 VAE, will be downloaded and handled automatically
+- Uses the Ministral 3 3b text encoder, will be downloaded and handled automatically
+- **Parameters:**
+    - **Prompt:** Supports general prompting in any format just fine. Speaks English and Chinese deeply.
+    - **Sampler:** Default is fine.
+    - **Scheduler:** Default is fine.
+    - **CFG Scale:** For Turbo, `1`, for base normal CFG ranges (eg 4 or 7)
+    - **Steps:** For Turbo `8` is recommended. For Base, 20+ steps as normal.
+    - **Resolution:** Side length `1024` is the standard.
+
 # Video Models
 
 - Video models are documented in [Video Model Support](/docs/Video%20Model%20Support.md).

diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs
@@ -830,7 +830,7 @@ public string CreateKSampler(JArray model, JArray pos, JArray neg, JArray latent
                 latent = [srCond, 2];
             }
         }
-        else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma())
+        else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie())
         {
             defscheduler ??= "simple";
         }

diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs
@@ -63,6 +63,9 @@ public bool IsSD3()
     /// <summary>Returns true if the current model is any Black Forest Labs' Flux.2 variant.</summary>
     public bool IsAnyFlux2() => IsFlux2Dev() || IsFlux2Klein4B() || IsFlux2Klein9B();
 
+    /// <summary>Returns true if the current model is Ernie Image.</summary>
+    public bool IsErnie() => IsModelCompatClass(T2IModelClassSorter.CompatErnieImage);
+
     /// <summary>Returns true if the current model is AuraFlow.</summary>
     public bool IsAuraFlow() => IsModelCompatClass(T2IModelClassSorter.CompatAuraFlow);
 
@@ -263,7 +266,7 @@ public WGNodeData EmptyImage(int width, int height, int batchSize, string id = n
                 ["width"] = width
             }, id));
         }
-        else if (IsAnyFlux2())
+        else if (IsAnyFlux2() || IsErnie())
         {
             return resultImage(CreateNode("EmptyFlux2LatentImage", new JObject()
             {
@@ -578,6 +581,11 @@ public string GetMistralFlux2Model()
             return RequireClipModel("mistral_3_small_flux2.safetensors", "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/text_encoders/mistral_3_small_flux2_fp4_mixed.safetensors", "1ee1ff334d78228d73049ef0ee4fcd21c1700536b5a45c06547af057f92463a7", T2IParamTypes.MistralModel);
         }
 
+        public string GetMinistral3_3bModel()
+        {
+            return RequireClipModel("ministral-3-3b.safetensors", "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ministral-3-3b.safetensors", "49a750a128863854eac7d85e1a277a7b44bf6ec3646405b84686dfeeca3708ca", T2IParamTypes.MistralModel);
+        }
+
         public string GetClipLModel()
         {
             if (g.UserInput.TryGet(T2IParamTypes.ClipLModel, out T2IModel model))
@@ -1032,6 +1040,11 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC)
                 LoadingModel = [kvcached, 0];
             }
         }
+        else if (IsErnie())
+        {
+            helpers.LoadClip("flux2", helpers.GetMinistral3_3bModel());
+            helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFlux2VAE, "flux-2", "flux2-vae");
+        }
         else if (IsFlux() && (LoadingClip is null || LoadingVAE is null || UserInput.Get(T2IParamTypes.T5XXLModel) is not null || UserInput.Get(T2IParamTypes.ClipLModel) is not null))
         {
             helpers.LoadClip2("flux", helpers.GetT5XXLModel(), helpers.GetClipLModel());

diff --git a/src/Text2Image/T2IModelClassSorter.cs b/src/Text2Image/T2IModelClassSorter.cs
@@ -65,6 +65,7 @@ public static T2IModelCompatClass
         CompatFlux2 = RegisterCompat(new() { ID = "flux-2", ShortCode = "Flux2", LorasTargetTextEnc = false }),
         CompatFlux2Klein4B = RegisterCompat(new() { ID = "flux-2-klein-4b", ShortCode = "Fl2K4", LorasTargetTextEnc = false }),
         CompatFlux2Klein9B = RegisterCompat(new() { ID = "flux-2-klein-9b", ShortCode = "Fl2K9", LorasTargetTextEnc = false }),
+        CompatErnieImage = RegisterCompat(new() { ID = "ernie-image", ShortCode = "Ernie", LorasTargetTextEnc = false }),
         CompatLtxv2 = RegisterCompat(new() { ID = "lightricks-ltx-video-2", ShortCode = "LTXV2", IsText2Video = true, IsImage2Video = true }),
         CompatZImage = RegisterCompat(new() { ID = "z-image", ShortCode = "ZImg", LorasTargetTextEnc = false }),
         CompatZetaChroma = RegisterCompat(new() { ID = "zeta-chroma", ShortCode = "ZChr", LorasTargetTextEnc = false }),
@@ -215,6 +216,7 @@ bool isQwenImageLora(JObject h) => (hasLoraKey(h, "transformer_blocks.0.attn.add
         bool isHyVid15Lora(JObject h) => hasKey(h, "cond_type_embedding.lora_down.weight") && hasKey(h, "byt5_in.fc1.lora_down.weight") && hasKey(h, "vision_in.proj.1.lora_down.weight");
         bool isHyImgRefiner(JObject h) => h.ContainsKey("double_blocks.0.img_attn_k_norm.weight") && h.TryGetValue("time_r_in.mlp.0.bias", out JToken timeTok) && timeTok["shape"].ToArray()[0].Value<long>() == 3328;
         bool isAuraFlow(JObject h) => h.ContainsKey("model.cond_seq_linear.weight") && h.ContainsKey("model.double_layers.0.attn.w1k.weight");
+        bool isErnie(JObject h) => hasKey(h, "layers.0.mlp.linear_fc2.weight") && hasKey(h, "x_embedder.proj.weight") && hasKey(h, "layers.0.adaLN_sa_ln.weight");
         bool isKandinsky5(JObject h) => hasKey(h, "pooled_text_embeddings.in_layer.weight") && hasKey(h, "text_transformer_blocks.0.feed_forward.in_layer.weight");
         bool tryGetKan5IdKey(JObject h, out JToken tok) => h.TryGetValue("text_embeddings.in_layer.weight", out tok);
         bool isKan5VidLite(JObject h) => tryGetKan5IdKey(h, out JToken tok) && tok["shape"].ToArray()[0].Value<long>() == 1792;
@@ -628,6 +630,11 @@ JToken GetEmbeddingKey(JObject h)
         {
             return isQwenImageLora(h);
         }});
+        // ====================== Ernie Image ======================
+        Register(new() { ID = "ernie-image", CompatClass = CompatErnieImage, Name = "Ernie Image", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) =>
+        {
+            return isErnie(h);
+        }});
         // ====================== Kandinsky5 ======================
         Register(new() { ID = "kandinsky5-image-lite", CompatClass = CompatKandinsky5ImgLite, Name = "Kandinsky5 Image Lite", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) =>
         {