diff --git a/fast_llm/engine/checkpoint/huggingface.py b/fast_llm/engine/checkpoint/huggingface.py index 7fbc42afd..bbc3a0a91 100644 --- a/fast_llm/engine/checkpoint/huggingface.py +++ b/fast_llm/engine/checkpoint/huggingface.py @@ -139,6 +139,7 @@ def _export_config(cls, config: FastLLMModelConfig) -> dict[str, typing.Any]: "bos_token_id", "decoder_start_token_id", "eos_token_id", + "mask_token_id", "pad_token_id", "sep_token_id", # Initialization / pretraining metadata Fast-LLM does not consume. diff --git a/fast_llm/models/gpt/conversion/llama.py b/fast_llm/models/gpt/conversion/llama.py index d57665e6c..59e30d9e1 100644 --- a/fast_llm/models/gpt/conversion/llama.py +++ b/fast_llm/models/gpt/conversion/llama.py @@ -91,6 +91,7 @@ def _llama_rotary_export(config: AttentionConfig) -> dict: rope_parameters.update( { "rope_type": "yarn", + "factor": rotary.scale_factor, "attention_factor": rotary.attention_factor, "beta_fast": rotary.beta_fast, "beta_slow": rotary.beta_slow, @@ -132,6 +133,7 @@ def _llama_rotary_import(hf_dict: dict) -> dict: elif rope_type == "yarn": rotary_config.update( { + "scale_factor": rope_params["factor"], "attention_factor": rope_params["attention_factor"], "beta_fast": rope_params["beta_fast"], "beta_slow": rope_params["beta_slow"], diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py index 2d4e2f5d4..3a54be088 100644 --- a/tests/utils/model_configs.py +++ b/tests/utils/model_configs.py @@ -454,8 +454,12 @@ def update_and_add_testing_config( # Megatron doesn't support Yarn-style Rotary Embeddings megatron_args=None, checkpoint_format=DiffusionLlamaCheckpointFormat, - # TODO: Conversion is broken. - # TODO: Add back generate as `normal` when stable. + # Config + weight conversion works (test_conversion passes). The convert group stays `broken` + # because test_huggingface_model fails: the custom modeling `from_pretrained` requires a + # generation_config.json that Fast-LLM does not export (unlike `dream`, which ships one). Behind + # that, the forward likely diverges as for `dream` — DiffusionLlama is a bidirectional + # diffusion LM, while Fast-LLM runs it causal — but that is unverified since loading fails first. + # Neither is a converter bug. `generate` is broken for the same diffusion-decoding reason. groups={ ModelTestingGroup.basic: ModelTestingGroupAction.unimportant, ModelTestingGroup.checkpoint: ModelTestingGroupAction.normal, @@ -530,8 +534,11 @@ def update_and_add_testing_config( # Megatron doesn't support per sub layer biases. megatron_args=None, checkpoint_format=DiffusionDreamCheckpointFormat, - # TODO: Conversion is broken. - # TODO: Add back generate as `normal` when stable. + # Config + weight conversion works (test_conversion passes). The convert group stays `broken` + # because test_huggingface_model fails: Dream is a bidirectional diffusion LM, so the HF forward + # diverges from Fast-LLM's causal run (structurally different logits/hidden states, confirmed — not + # a tolerance miss). Matching it needs bidirectional-attention modeling, not a converter change. + # `generate` is broken for the same diffusion-decoding reason. groups={ ModelTestingGroup.basic: ModelTestingGroupAction.unimportant, ModelTestingGroup.checkpoint: ModelTestingGroupAction.broken,