paiml · noahgift · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/crates/aprender-serve/src/api/cuda_chat_backend.rs b/crates/aprender-serve/src/api/cuda_chat_backend.rs
@@ -719,7 +719,17 @@ fn try_qwen3_moe_backend(
     // thread top_k/top_p/repeat_penalty/repeat_last_n/seed from the HTTP
     // request through to QuantizedGenerateConfig. Defaults match the dense
     // path's chat-completion behavior (greedy when unspecified).
+    //
+    // EOS stop-token: mirror the dense path (cuda_chat_backend.rs:113) which
+    // populates stop_tokens with the model's EOS so generation halts on
+    // natural turn-end. Without this, qwen3_moe burns the full max_tokens
+    // budget per turn, allowing self-prompted "Human:" runaway text — the
+    // root cause of paiml/claude-code-parity-apr M287's verbosity pattern.
     let defaults = QuantizedGenerateConfig::default();
+    let stop_tokens: Vec<u32> = state
+        .model_eos_token_id()
+        .into_iter()
+        .collect();
     let gen_config = QuantizedGenerateConfig {
         max_tokens,
         temperature: request.temperature.unwrap_or(defaults.temperature),
@@ -728,6 +738,7 @@ fn try_qwen3_moe_backend(
         repeat_penalty: request.repeat_penalty.unwrap_or(defaults.repeat_penalty),
         repeat_last_n: request.repeat_last_n.unwrap_or(defaults.repeat_last_n),
         seed: request.seed.unwrap_or(defaults.seed),
+        stop_tokens,
         ..defaults
     };
 

diff --git a/docs/specifications/aprender-train/distillation-epic-spec.md b/docs/specifications/aprender-train/distillation-epic-spec.md
@@ -16,6 +16,22 @@ The distillation epic ships **MODEL-2 v2** — `paiml/albor-370m-v2` (or v1.1.0)
 
 The epic's strategic value: it converts the stack from "runs end-to-end" → "produces models worth using". That's the difference between an existence proof and a product.
 
+## Status (2026-05-20) — Phases 1-3 CLOSED, Phase 4 RUNNING
+
+| Phase | Status | Evidence |
+|-------|--------|----------|
+| 1 — Teacher provider | ✅ MERGED | PMAT-691/693 (#1786, #1787) |
+| 2 — Student fwd/bwd + KD | ✅ MERGED | PMAT-692/695/696/697 (#1788–#1797) |
+| 3 — E2E smoke on Blackwell GB10 | ✅ DISCHARGED | #1828 (F-DISTILL-SMOKE-001: 7.67 → 7.20) |
+| 3b — seq_len=256 scale verify | ✅ DISCHARGED | #1833 PMAT-698o |
+| **4 — 50K training (Stage D)** | 🟡 **RUNNING** | PID 196378 on gx10, ETA ~22h from 2026-05-20 13:43 UTC |
+| 5 — HumanEval pass@1 | ⏳ ready | #1847 `dispatch-phase5-humaneval-gx10.sh` |
+| 6 — Publish v2 | ⏳ ready | #1848 `dispatch-phase6-publish.sh` |
+
+**Blackwell cascade** (closed 2026-05-19, 11 PRs for first-ever aprender training on sm_121): see `blackwell-cascade-postmortem.md` for the full timeline + lessons. Root cause was a 1-char macro bug; 7 distinct defects had to be unwound to surface it.
+
+**Stage C real-corpus dispatch** (2026-05-20): first end-to-end Phase 4 trial — `initial_loss=15.61 → final_loss=6.01` (Δ=-9.60 over 124 steps, 232.4s) with codeparrot Python corpus via `ShardBatchSource` on GB10. Evidence: `evidence/distill-stage-c-trial/`.
+
 ## Current state (2026-05-18)
 
 The distillation infrastructure is **scaffolded but stubbed**: