ServiceNow · jlamypoirier · Jun 3, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/fast_llm/engine/checkpoint/huggingface.py b/fast_llm/engine/checkpoint/huggingface.py
@@ -139,6 +139,7 @@ def _export_config(cls, config: FastLLMModelConfig) -> dict[str, typing.Any]:
             "bos_token_id",
             "decoder_start_token_id",
             "eos_token_id",
+            "mask_token_id",
             "pad_token_id",
             "sep_token_id",
             # Initialization / pretraining metadata Fast-LLM does not consume.

diff --git a/fast_llm/models/gpt/conversion/llama.py b/fast_llm/models/gpt/conversion/llama.py
@@ -91,6 +91,7 @@ def _llama_rotary_export(config: AttentionConfig) -> dict:
         rope_parameters.update(
             {
                 "rope_type": "yarn",
+                "factor": rotary.scale_factor,
                 "attention_factor": rotary.attention_factor,
                 "beta_fast": rotary.beta_fast,
                 "beta_slow": rotary.beta_slow,
@@ -132,6 +133,7 @@ def _llama_rotary_import(hf_dict: dict) -> dict:
     elif rope_type == "yarn":
         rotary_config.update(
             {
+                "scale_factor": rope_params["factor"],
                 "attention_factor": rope_params["attention_factor"],
                 "beta_fast": rope_params["beta_fast"],
                 "beta_slow": rope_params["beta_slow"],

diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py
@@ -454,8 +454,12 @@ def update_and_add_testing_config(
     # Megatron doesn't support Yarn-style Rotary Embeddings
     megatron_args=None,
     checkpoint_format=DiffusionLlamaCheckpointFormat,
-    # TODO: Conversion is broken.
-    # TODO: Add back generate as `normal` when stable.
+    # Config + weight conversion works (test_conversion passes). The convert group stays `broken`
+    # because test_huggingface_model fails: the custom modeling `from_pretrained` requires a
+    # generation_config.json that Fast-LLM does not export (unlike `dream`, which ships one). Behind
+    # that, the forward likely diverges as for `dream` — DiffusionLlama is a bidirectional
+    # diffusion LM, while Fast-LLM runs it causal — but that is unverified since loading fails first.
+    # Neither is a converter bug. `generate` is broken for the same diffusion-decoding reason.
     groups={
         ModelTestingGroup.basic: ModelTestingGroupAction.unimportant,
         ModelTestingGroup.checkpoint: ModelTestingGroupAction.normal,
@@ -530,8 +534,11 @@ def update_and_add_testing_config(
     # Megatron doesn't support per sub layer biases.
     megatron_args=None,
     checkpoint_format=DiffusionDreamCheckpointFormat,
-    # TODO: Conversion is broken.
-    # TODO: Add back generate as `normal` when stable.
+    # Config + weight conversion works (test_conversion passes). The convert group stays `broken`
+    # because test_huggingface_model fails: Dream is a bidirectional diffusion LM, so the HF forward
+    # diverges from Fast-LLM's causal run (structurally different logits/hidden states, confirmed — not
+    # a tolerance miss). Matching it needs bidirectional-attention modeling, not a converter change.
+    # `generate` is broken for the same diffusion-decoding reason.
     groups={
         ModelTestingGroup.basic: ModelTestingGroupAction.unimportant,
         ModelTestingGroup.checkpoint: ModelTestingGroupAction.broken,