livekit · chenghao-mou · May 7, 2026 · May 7, 2026 · May 8, 2026 · May 8, 2026
diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -219,6 +219,9 @@ def __init__(self, agent: Agent, sess: AgentSession) -> None:
         # speeches that audio playout finished but not done because of tool calls
         self._background_speeches: set[SpeechHandle] = set()
 
+        # skip_stt/discard_audio_if_uninterruptible warning
+        self._skip_stt_warning_started: bool = False
+
     def _validate_turn_detection(
         self, turn_detection: TurnDetectionMode | None
     ) -> TurnDetectionMode | None:
@@ -1003,20 +1006,40 @@ def push_audio(self, frame: rtc.AudioFrame) -> None:
         if not self._started:
             return
 
-        should_discard = (
-            self._current_speech
-            and not self._current_speech.allow_interruptions
-            and self._session.options.interruption["discard_audio_if_uninterruptible"]
-        ) or (
+        aec_warmup_active: bool = (
             self._session.agent_state == "speaking"
             and self._session._aec_warmup_remaining > 0
             and self._session._aec_warmup_timer is not None
         )
 
+        speech_active: bool = (
+            self._current_speech is not None
+            and not self._current_speech.done()
+            and not self._current_speech.interrupted
+            and not self._current_speech.allow_interruptions
+            and self._session.options.interruption["discard_audio_if_uninterruptible"]
+        )
+
+        should_discard: bool = aec_warmup_active or speech_active
+
         if not should_discard:
             if self._rt_session is not None:
                 self._rt_session.push_audio(frame)
 
+        # warn once per continuous window
+        if should_discard and not self._skip_stt_warning_started:
+            self._skip_stt_warning_started = True
+            logger.info(
+                "stt audio discarding started",
+                extra={
+                    "aec_warmup_active": aec_warmup_active,
+                    "speech_active": speech_active,
+                },
+            )
+        elif not should_discard and self._skip_stt_warning_started:
+            self._skip_stt_warning_started = False
+            logger.info("stt audio discarding stopped")
+
         # Always forward to _audio_recognition for VAD, even when discarding STT/LLM
         # VAD needs frames to detect speech end and update user state correctly
         if self._audio_recognition is not None: