From 2cfe5cfdb7e814d1c38615070d7d5d1330e25166 Mon Sep 17 00:00:00 2001 From: yurekami Date: Fri, 26 Dec 2025 01:52:16 +0900 Subject: [PATCH] fix: correct outdated function reference in docstring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed `flash_mla_with_kvcache_sm90` to `flash_mla_with_kvcache` in get_mla_metadata docstring to match the actual function name. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- flash_mla/flash_mla_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash_mla/flash_mla_interface.py b/flash_mla/flash_mla_interface.py index 4d276214..747dbb0f 100644 --- a/flash_mla/flash_mla_interface.py +++ b/flash_mla/flash_mla_interface.py @@ -19,7 +19,7 @@ def get_mla_metadata( num_heads_k: The number of k heads. num_heads_q: The number of q heads. This argument is optional when sparse attention is not enabled is_fp8_kvcache: Whether the k_cache and v_cache are in fp8 format. - topk: If not None, sparse attention will be enabled, and only tokens in the `indices` array passed to `flash_mla_with_kvcache_sm90` will be attended to. + topk: If not None, sparse attention will be enabled, and only tokens in the `indices` array passed to `flash_mla_with_kvcache` will be attended to. Returns: tile_scheduler_metadata: (num_sm_parts, TileSchedulerMetaDataSize), dtype torch.int32.