livekit · chenghao-mou · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/livekit-agents/livekit/agents/voice/audio_recognition.py b/livekit-agents/livekit/agents/voice/audio_recognition.py
@@ -204,7 +204,7 @@ def __init__(
         self._stt_request_ids: list[str] = []
         self._closing = asyncio.Event()
 
-        self._vad_speech_started: bool = False
+        self._turn_speech_started: bool = False
 
     def update_options(
         self,
@@ -653,6 +653,7 @@ def clear_user_turn(self) -> None:
         self._audio_preflight_transcript = ""
         self._final_transcript_confidence = []
         self._user_turn_committed = False
+        self._turn_speech_started = False
 
         # reset stt to clear the buffer from previous user turn
         stt = self._stt
@@ -966,9 +967,9 @@ async def _on_stt_event(self, ev: stt.SpeechEvent) -> None:
     async def _on_vad_event(self, ev: vad.VADEvent) -> None:
         if ev.type == vad.VADEventType.START_OF_SPEECH:
             speech_start_time = time.time() - ev.speech_duration - ev.inference_duration
-            if not self._vad_speech_started:
+            if not self._turn_speech_started:
                 self._speech_start_time = speech_start_time
-                self._vad_speech_started = True
+                self._turn_speech_started = True
 
             with trace.use_span(self._ensure_user_turn_span(start_time=speech_start_time)):
                 self._hooks.on_start_of_speech(ev, speech_start_time=speech_start_time)
@@ -995,7 +996,6 @@ async def _on_vad_event(self, ev: vad.VADEvent) -> None:
             with trace.use_span(self._ensure_user_turn_span()):
                 self._hooks.on_end_of_speech(ev)
 
-            self._vad_speech_started = False
             self._speaking = False
 
             if self._vad_base_turn_detection or (
@@ -1156,7 +1156,7 @@ async def _bounce_eou_task(
                 # only reset if there is no new speech
                 if self._last_speaking_time == last_speaking_time:
                     self._speech_start_time = None
-                    self._vad_speech_started = False
+                    self._turn_speech_started = False
                     self._last_speaking_time = None
 
             self._user_turn_committed = False
@@ -1223,7 +1223,7 @@ async def _forward() -> None:
                 with trace.use_span(self._ensure_user_turn_span()):
                     self._hooks.on_end_of_speech(None)
                 self._speaking = False
-                self._vad_speech_started = False
+                self._turn_speech_started = False
 
     @utils.log_exceptions(logger=logger)
     async def _interruption_task(