diff --git a/common/reasoning-budget.cpp b/common/reasoning-budget.cpp index 8c1f72fc2a1..c6e1f86c91e 100644 --- a/common/reasoning-budget.cpp +++ b/common/reasoning-budget.cpp @@ -158,8 +158,6 @@ static void common_reasoning_budget_apply(struct llama_sampler * smpl, llama_tok for (size_t i = 0; i < cur_p->size; i++) { if (cur_p->data[i].id != forced) { cur_p->data[i].logit = -INFINITY; - } else { - cur_p->data[i].logit = +INFINITY; // force the token } } } diff --git a/tests/test-reasoning-budget.cpp b/tests/test-reasoning-budget.cpp index 747d246448f..f7a60178996 100644 --- a/tests/test-reasoning-budget.cpp +++ b/tests/test-reasoning-budget.cpp @@ -70,20 +70,20 @@ static void test_reasoning_budget( llama_sampler_apply(sampler, &cur_p); // Check if forcing is active (all logits except one should be -INFINITY) - size_t not_neg_inf = 0; - llama_token not_neg_inf_token = -1; + size_t finite_count = 0; + llama_token finite_token = -1; for (size_t j = 0; j < cur.size(); j++) { - if (std::isfinite(cur[j].logit) || cur[j].logit > 0) { // +INFINITY - not_neg_inf++; - not_neg_inf_token = cur[j].id; + if (std::isfinite(cur[j].logit)) { + finite_count++; + finite_token = cur[j].id; } } llama_sampler_accept(sampler, sequence[i]); - fprintf(stderr, " i=%zu: token=%d, not_neg_inf_count=%zu, not_neg_inf_token=%d\n", i, (int)sequence[i], not_neg_inf, (int)not_neg_inf_token); + fprintf(stderr, " i=%zu: token=%d, finite_count=%zu, finite_token=%d\n", i, (int)sequence[i], finite_count, (int)finite_token); - if (not_neg_inf == 1) { + if (finite_count == 1) { if (actual_force_start == SIZE_MAX) { actual_force_start = i; }