From a1365f22534fb06e84b6c582f111e2aa3ec2f871 Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Mon, 1 Jun 2026 12:10:43 -0400
Subject: [PATCH 1/4] Export/import the yarn rope scaling factor in the Llama
 converter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The yarn branch of the rope config converter omitted the `factor` key (Fast-LLM's
`YarnRotaryConfig.scale_factor`), unlike the llama3 branch right above it. transformers' yarn rope
validation requires it, so exporting a yarn config produced an HF config that failed to instantiate
(`Missing required keys in rope_parameters for 'rope_type'='yarn': {'factor'}`) — the diffusion_llama
conversion failure. Add the symmetric factor <-> scale_factor mapping on both export and import.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 fast_llm/models/gpt/conversion/llama.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fast_llm/models/gpt/conversion/llama.py b/fast_llm/models/gpt/conversion/llama.py
index d57665e6c..59e30d9e1 100644
--- a/fast_llm/models/gpt/conversion/llama.py
+++ b/fast_llm/models/gpt/conversion/llama.py
@@ -91,6 +91,7 @@ def _llama_rotary_export(config: AttentionConfig) -> dict:
         rope_parameters.update(
             {
                 "rope_type": "yarn",
+                "factor": rotary.scale_factor,
                 "attention_factor": rotary.attention_factor,
                 "beta_fast": rotary.beta_fast,
                 "beta_slow": rotary.beta_slow,
@@ -132,6 +133,7 @@ def _llama_rotary_import(hf_dict: dict) -> dict:
     elif rope_type == "yarn":
         rotary_config.update(
             {
+                "scale_factor": rope_params["factor"],
                 "attention_factor": rope_params["attention_factor"],
                 "beta_fast": rope_params["beta_fast"],
                 "beta_slow": rope_params["beta_slow"],

From 5f614604d066227d75d3f6f88e45805b4193580c Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Mon, 1 Jun 2026 12:13:08 -0400
Subject: [PATCH 2/4] Allowlist mask_token_id in HF coverage check

Diffusion configs (Dream, DiffusionLlama) carry a mask_token_id default that the inherited
Llama/Qwen2 converters do not consume; it is a generation/inference token id Fast-LLM does not store,
in the same category as the bos/eos/pad ids already allowlisted.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 fast_llm/engine/checkpoint/huggingface.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fast_llm/engine/checkpoint/huggingface.py b/fast_llm/engine/checkpoint/huggingface.py
index 7fbc42afd..bbc3a0a91 100644
--- a/fast_llm/engine/checkpoint/huggingface.py
+++ b/fast_llm/engine/checkpoint/huggingface.py
@@ -139,6 +139,7 @@ def _export_config(cls, config: FastLLMModelConfig) -> dict[str, typing.Any]:
             "bos_token_id",
             "decoder_start_token_id",
             "eos_token_id",
+            "mask_token_id",
             "pad_token_id",
             "sep_token_id",
             # Initialization / pretraining metadata Fast-LLM does not consume.

From e5d584b20229db4bcd5b9baaa83cfd29ec61aaad Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Mon, 1 Jun 2026 12:43:47 -0400
Subject: [PATCH 3/4] State the real remaining diffusion blocker in the
 fixtures

Conversion (config + weights) now works for diffusion_llama and dream; the misleading
"Conversion is broken" TODO is replaced with the actual reason the convert group stays `broken`:
test_huggingface_model fails because these are bidirectional diffusion LMs whose HF forward diverges
from Fast-LLM's causal run (and diffusion_llama additionally lacks an exported generation_config.json).
Both are modeling/model-load concerns, not converter bugs.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/utils/model_configs.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py
index 2d4e2f5d4..0ce2b01c7 100644
--- a/tests/utils/model_configs.py
+++ b/tests/utils/model_configs.py
@@ -454,8 +454,12 @@ def update_and_add_testing_config(
     # Megatron doesn't support Yarn-style Rotary Embeddings
     megatron_args=None,
     checkpoint_format=DiffusionLlamaCheckpointFormat,
-    # TODO: Conversion is broken.
-    # TODO: Add back generate as `normal` when stable.
+    # Config + weight conversion works (test_conversion passes). The convert group stays `broken`
+    # because test_huggingface_model fails: the custom modeling `from_pretrained` requires a
+    # generation_config.json that Fast-LLM does not export (unlike `dream`, which ships one). Behind
+    # that, the forward likely diverges as for `dream` below — DiffusionLlama is a bidirectional
+    # diffusion LM, while Fast-LLM runs it causal — but that is unverified since loading fails first.
+    # Neither is a converter bug. `generate` is broken for the same diffusion-decoding reason.
     groups={
         ModelTestingGroup.basic: ModelTestingGroupAction.unimportant,
         ModelTestingGroup.checkpoint: ModelTestingGroupAction.normal,
@@ -530,8 +534,11 @@ def update_and_add_testing_config(
     # Megatron doesn't support per sub layer biases.
     megatron_args=None,
     checkpoint_format=DiffusionDreamCheckpointFormat,
-    # TODO: Conversion is broken.
-    # TODO: Add back generate as `normal` when stable.
+    # Config + weight conversion works (test_conversion passes). The convert group stays `broken`
+    # because test_huggingface_model fails: Dream is a bidirectional diffusion LM, so the HF forward
+    # diverges from Fast-LLM's causal run (structurally different logits/hidden states, confirmed — not
+    # a tolerance miss). Matching it needs bidirectional-attention modeling, not a converter change.
+    # `generate` is broken for the same diffusion-decoding reason.
     groups={
         ModelTestingGroup.basic: ModelTestingGroupAction.unimportant,
         ModelTestingGroup.checkpoint: ModelTestingGroupAction.broken,

From 66551d206347225e281a808469fc526e793934bd Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Tue, 2 Jun 2026 11:32:52 -0400
Subject: [PATCH 4/4] Drop stale positional 'below' from diffusion_llama
 fixture comment

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/utils/model_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py
index 0ce2b01c7..3a54be088 100644
--- a/tests/utils/model_configs.py
+++ b/tests/utils/model_configs.py
@@ -457,7 +457,7 @@ def update_and_add_testing_config(
     # Config + weight conversion works (test_conversion passes). The convert group stays `broken`
     # because test_huggingface_model fails: the custom modeling `from_pretrained` requires a
     # generation_config.json that Fast-LLM does not export (unlike `dream`, which ships one). Behind
-    # that, the forward likely diverges as for `dream` below — DiffusionLlama is a bidirectional
+    # that, the forward likely diverges as for `dream` — DiffusionLlama is a bidirectional
     # diffusion LM, while Fast-LLM runs it causal — but that is unverified since loading fails first.
     # Neither is a converter bug. `generate` is broken for the same diffusion-decoding reason.
     groups={