From a1365f22534fb06e84b6c582f111e2aa3ec2f871 Mon Sep 17 00:00:00 2001 From: Joel Lamy-Poirier Date: Mon, 1 Jun 2026 12:10:43 -0400 Subject: [PATCH 1/4] Export/import the yarn rope scaling factor in the Llama converter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The yarn branch of the rope config converter omitted the `factor` key (Fast-LLM's `YarnRotaryConfig.scale_factor`), unlike the llama3 branch right above it. transformers' yarn rope validation requires it, so exporting a yarn config produced an HF config that failed to instantiate (`Missing required keys in rope_parameters for 'rope_type'='yarn': {'factor'}`) — the diffusion_llama conversion failure. Add the symmetric factor <-> scale_factor mapping on both export and import. Co-Authored-By: Claude Opus 4.8 (1M context) --- fast_llm/models/gpt/conversion/llama.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fast_llm/models/gpt/conversion/llama.py b/fast_llm/models/gpt/conversion/llama.py index d57665e6c..59e30d9e1 100644 --- a/fast_llm/models/gpt/conversion/llama.py +++ b/fast_llm/models/gpt/conversion/llama.py @@ -91,6 +91,7 @@ def _llama_rotary_export(config: AttentionConfig) -> dict: rope_parameters.update( { "rope_type": "yarn", + "factor": rotary.scale_factor, "attention_factor": rotary.attention_factor, "beta_fast": rotary.beta_fast, "beta_slow": rotary.beta_slow, @@ -132,6 +133,7 @@ def _llama_rotary_import(hf_dict: dict) -> dict: elif rope_type == "yarn": rotary_config.update( { + "scale_factor": rope_params["factor"], "attention_factor": rope_params["attention_factor"], "beta_fast": rope_params["beta_fast"], "beta_slow": rope_params["beta_slow"], From 5f614604d066227d75d3f6f88e45805b4193580c Mon Sep 17 00:00:00 2001 From: Joel Lamy-Poirier Date: Mon, 1 Jun 2026 12:13:08 -0400 Subject: [PATCH 2/4] Allowlist mask_token_id in HF coverage check Diffusion configs (Dream, DiffusionLlama) carry a mask_token_id default that the inherited Llama/Qwen2 converters do not consume; it is a generation/inference token id Fast-LLM does not store, in the same category as the bos/eos/pad ids already allowlisted. Co-Authored-By: Claude Opus 4.8 (1M context) --- fast_llm/engine/checkpoint/huggingface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fast_llm/engine/checkpoint/huggingface.py b/fast_llm/engine/checkpoint/huggingface.py index 7fbc42afd..bbc3a0a91 100644 --- a/fast_llm/engine/checkpoint/huggingface.py +++ b/fast_llm/engine/checkpoint/huggingface.py @@ -139,6 +139,7 @@ def _export_config(cls, config: FastLLMModelConfig) -> dict[str, typing.Any]: "bos_token_id", "decoder_start_token_id", "eos_token_id", + "mask_token_id", "pad_token_id", "sep_token_id", # Initialization / pretraining metadata Fast-LLM does not consume. From e5d584b20229db4bcd5b9baaa83cfd29ec61aaad Mon Sep 17 00:00:00 2001 From: Joel Lamy-Poirier Date: Mon, 1 Jun 2026 12:43:47 -0400 Subject: [PATCH 3/4] State the real remaining diffusion blocker in the fixtures Conversion (config + weights) now works for diffusion_llama and dream; the misleading "Conversion is broken" TODO is replaced with the actual reason the convert group stays `broken`: test_huggingface_model fails because these are bidirectional diffusion LMs whose HF forward diverges from Fast-LLM's causal run (and diffusion_llama additionally lacks an exported generation_config.json). Both are modeling/model-load concerns, not converter bugs. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/utils/model_configs.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py index 2d4e2f5d4..0ce2b01c7 100644 --- a/tests/utils/model_configs.py +++ b/tests/utils/model_configs.py @@ -454,8 +454,12 @@ def update_and_add_testing_config( # Megatron doesn't support Yarn-style Rotary Embeddings megatron_args=None, checkpoint_format=DiffusionLlamaCheckpointFormat, - # TODO: Conversion is broken. - # TODO: Add back generate as `normal` when stable. + # Config + weight conversion works (test_conversion passes). The convert group stays `broken` + # because test_huggingface_model fails: the custom modeling `from_pretrained` requires a + # generation_config.json that Fast-LLM does not export (unlike `dream`, which ships one). Behind + # that, the forward likely diverges as for `dream` below — DiffusionLlama is a bidirectional + # diffusion LM, while Fast-LLM runs it causal — but that is unverified since loading fails first. + # Neither is a converter bug. `generate` is broken for the same diffusion-decoding reason. groups={ ModelTestingGroup.basic: ModelTestingGroupAction.unimportant, ModelTestingGroup.checkpoint: ModelTestingGroupAction.normal, @@ -530,8 +534,11 @@ def update_and_add_testing_config( # Megatron doesn't support per sub layer biases. megatron_args=None, checkpoint_format=DiffusionDreamCheckpointFormat, - # TODO: Conversion is broken. - # TODO: Add back generate as `normal` when stable. + # Config + weight conversion works (test_conversion passes). The convert group stays `broken` + # because test_huggingface_model fails: Dream is a bidirectional diffusion LM, so the HF forward + # diverges from Fast-LLM's causal run (structurally different logits/hidden states, confirmed — not + # a tolerance miss). Matching it needs bidirectional-attention modeling, not a converter change. + # `generate` is broken for the same diffusion-decoding reason. groups={ ModelTestingGroup.basic: ModelTestingGroupAction.unimportant, ModelTestingGroup.checkpoint: ModelTestingGroupAction.broken, From 66551d206347225e281a808469fc526e793934bd Mon Sep 17 00:00:00 2001 From: Joel Lamy-Poirier Date: Tue, 2 Jun 2026 11:32:52 -0400 Subject: [PATCH 4/4] Drop stale positional 'below' from diffusion_llama fixture comment Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/utils/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py index 0ce2b01c7..3a54be088 100644 --- a/tests/utils/model_configs.py +++ b/tests/utils/model_configs.py @@ -457,7 +457,7 @@ def update_and_add_testing_config( # Config + weight conversion works (test_conversion passes). The convert group stays `broken` # because test_huggingface_model fails: the custom modeling `from_pretrained` requires a # generation_config.json that Fast-LLM does not export (unlike `dream`, which ships one). Behind - # that, the forward likely diverges as for `dream` below — DiffusionLlama is a bidirectional + # that, the forward likely diverges as for `dream` — DiffusionLlama is a bidirectional # diffusion LM, while Fast-LLM runs it causal — but that is unverified since loading fails first. # Neither is a converter bug. `generate` is broken for the same diffusion-decoding reason. groups={