diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 665fda0f6c6a..221822d09838 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -3376,7 +3376,7 @@ def launchTestJobs(pipeline, testFilter) "GB200-12_GPUs-3_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE2-GPU8-Post-Merge", "auto:gb200-flex", "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8", - 6, + 7, 12, 3 ) @@ -3394,7 +3394,7 @@ def launchTestJobs(pipeline, testFilter) "GB200-20_GPUs-5_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE4-GPU16-Post-Merge", "auto:gb200-flex", "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node4_gpu16", - 1, + 2, 20, 5 ) @@ -3420,7 +3420,7 @@ def launchTestJobs(pipeline, testFilter) "GB200-36_GPUs-9_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE8-GPU32-Post-Merge", "auto:gb200-flex", "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32", - 7, + 8, 36, 9 ) diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_gpus_perf_sanity.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_gpus_perf_sanity.yml index 292d8633f2ab..39db52739d90 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_gpus_perf_sanity.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_gpus_perf_sanity.yml @@ -85,12 +85,12 @@ l0_gb200_multi_gpus_perf_sanity: - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX] TIMEOUT (120) # kimi-k25-thinking-fp4 - # - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX] TIMEOUT (120) + # - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX] TIMEOUT (120) - - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) + # - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) + - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX] TIMEOUT (120) + - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) # qwen3-235b-fp4 - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_qwen3-235b-fp4_8k1k_con1024_ctx1_tp1_gen1_dep8_eplb0_mtp0_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-UCX] TIMEOUT (120) diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8.yml index 32fdf1be3c61..30d6500886c3 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8.yml @@ -20,7 +20,7 @@ l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8: - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX] TIMEOUT (120) # Failed requests + - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX] TIMEOUT (120) # Failed requests # - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120) diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node4_gpu16.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node4_gpu16.yml index ac69996882b3..0e4e68522b1e 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node4_gpu16.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node4_gpu16.yml @@ -15,6 +15,6 @@ l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node4_gpu16: backend: pytorch tests: - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-r1-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp1_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX] TIMEOUT (120) + - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-r1-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp1_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX] TIMEOUT (120) + # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX] TIMEOUT (120) diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32.yml index b7b569f698cb..4a102ab0164d 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32.yml @@ -21,8 +21,8 @@ l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32: - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-UCX] TIMEOUT (120) - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_deepseek-v32-fp4_8k1k_con4096_ctx1_dep4_gen1_dep32_eplb256_mtp0_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX] TIMEOUT (120) + - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX] TIMEOUT (120) + # - perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-UCX] TIMEOUT (120) @@ -30,5 +30,5 @@ l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node8_gpu32: # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-UCX] TIMEOUT (120) # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_deepseek-v32-fp4_8k1k_con4096_ctx1_dep4_gen1_dep32_eplb256_mtp0_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX] TIMEOUT (120) - # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX] TIMEOUT (120) + # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX] TIMEOUT (120) + # - perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX] TIMEOUT (120) diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX.yaml similarity index 97% rename from tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX.yaml rename to tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX.yaml index 03983b003bdb..6d65a89bf440 100644 --- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX.yaml @@ -65,9 +65,6 @@ worker_config: moe_config: backend: CUTEDSL use_low_precision_moe_combine: true - load_balancer: - num_slots: 384 - layer_updates_per_iter: 1 cache_transceiver_config: max_tokens_in_buffer: 16384 backend: UCX diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX.yaml similarity index 97% rename from tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX.yaml rename to tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX.yaml index 7d5a3e3ae85b..a9292d239bf5 100644 --- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX.yaml +++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX.yaml @@ -65,9 +65,6 @@ worker_config: moe_config: backend: CUTEDSL use_low_precision_moe_combine: true - load_balancer: - num_slots: 416 - layer_updates_per_iter: 1 cache_transceiver_config: max_tokens_in_buffer: 16384 backend: UCX diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml similarity index 97% rename from tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX.yaml rename to tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml index ea656cde3dcb..d0d612be80ab 100644 --- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k2-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml @@ -65,9 +65,6 @@ worker_config: moe_config: backend: CUTEDSL use_low_precision_moe_combine: true - load_balancer: - num_slots: 384 - layer_updates_per_iter: 1 cache_transceiver_config: max_tokens_in_buffer: 16384 backend: UCX diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX.yaml similarity index 97% rename from tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX.yaml rename to tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX.yaml index 93e7ba0535ce..4e3eb972d195 100644 --- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-UCX.yaml @@ -65,9 +65,6 @@ worker_config: moe_config: backend: CUTEDSL use_low_precision_moe_combine: true - load_balancer: - num_slots: 384 - layer_updates_per_iter: 1 cache_transceiver_config: max_tokens_in_buffer: 16384 backend: UCX diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX.yaml similarity index 97% rename from tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX.yaml rename to tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX.yaml index 5394bbd77324..d4ab81988a60 100644 --- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb416_mtp3_ccb-UCX.yaml +++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-UCX.yaml @@ -65,9 +65,6 @@ worker_config: moe_config: backend: CUTEDSL use_low_precision_moe_combine: true - load_balancer: - num_slots: 416 - layer_updates_per_iter: 1 cache_transceiver_config: max_tokens_in_buffer: 16384 backend: UCX diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml similarity index 97% rename from tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX.yaml rename to tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml index dc849424ba01..d84d38d742b7 100644 --- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb384_mtp0_ccb-UCX.yaml +++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml @@ -65,9 +65,6 @@ worker_config: moe_config: backend: CUTEDSL use_low_precision_moe_combine: true - load_balancer: - num_slots: 384 - layer_updates_per_iter: 1 cache_transceiver_config: max_tokens_in_buffer: 16384 backend: UCX