opentiny · SonyLeo · Mar 17, 2026 · Mar 18, 2026 · Mar 18, 2026 · Apr 16, 2026
diff --git a/docs/demos/sender/voice-input.vue b/docs/demos/sender/voice-input.vue
@@ -2,36 +2,44 @@
 import { ref } from 'vue'
 import { TrSender, VoiceButton } from '@opentiny/tiny-robot'
 
-const voiceMode = ref<'mixed' | 'continuous'>('mixed')
+const voiceMode = ref<'append' | 'replace'>('append')
 </script>
 
 <template>
   <div style="display: flex; flex-direction: column; gap: 16px">
     <div style="display: flex; align-items: center; gap: 12px">
       <span style="font-weight: 500">模式：</span>
       <label style="display: flex; align-items: center; gap: 4px; cursor: pointer">
-        <input type="radio" value="mixed" v-model="voiceMode" style="cursor: pointer" />
-        <span>混合输入</span>
+        <input type="radio" value="append" v-model="voiceMode" style="cursor: pointer" />
+        <span>追加模式</span>
       </label>
       <label style="display: flex; align-items: center; gap: 4px; cursor: pointer">
-        <input type="radio" value="continuous" v-model="voiceMode" style="cursor: pointer" />
-        <span>连续识别</span>
+        <input type="radio" value="replace" v-model="voiceMode" style="cursor: pointer" />
+        <span>替换模式</span>
       </label>
     </div>
     <div style="padding: 8px 12px; background: #f5f7fa; border-radius: 4px; font-size: 13px; color: #666">
-      {{ voiceMode === 'mixed' ? '语音识别结果追加到输入框，可继续编辑' : '持续识别语音并自动替换内容' }}
+      {{
+        voiceMode === 'append'
+          ? '追加模式：每次语音识别结果会追加到输入框末尾，适合混合输入'
+          : '替换模式：在同一次录音会话内持续识别，并用最新结果更新本次语音输入内容'
+      }}
     </div>
     <tr-sender
       :key="voiceMode"
       mode="multiple"
-      :placeholder="voiceMode === 'mixed' ? '点击麦克风说话，识别结果会追加到此处...' : '点击麦克风开始连续识别...'"
+      :placeholder="
+        voiceMode === 'append'
+          ? '可以打字或点击麦克风说话，语音内容会追加...'
+          : '点击麦克风连续说话，本次语音内容会持续更新...'
+      "
     >
       <template #footer-right>
         <VoiceButton
           :speech-config="
-            voiceMode === 'mixed'
+            voiceMode === 'append'
               ? { autoReplace: false, interimResults: true }
-              : { autoReplace: true, continuous: true }
+              : { autoReplace: true, continuous: true, interimResults: true }
           "
         />
       </template>

diff --git a/docs/src/components/sender.md b/docs/src/components/sender.md
@@ -1,4 +1,4 @@
----
+---
 outline: [1, 3]
 ---
 
@@ -155,9 +155,22 @@ TrSender.Suggestion.configure({ items: suggestions, filterFn: customFilter })
 
 #### 基础语音识别
 
-使用浏览器内置的语音识别功能，支持混合输入和连续识别两种模式。
+使用浏览器内置的语音识别功能，支持追加写入和替换写入两种体验。可通过 `speechConfig.lang` 显式指定识别语言，并结合 `speechConfig.continuous` 控制是否持续识别。
+
+<demo vue="../../demos/sender/voice-input.vue" title="基础语音输入" description="使用浏览器内置语音识别，展示追加写入和连续替换两种体验。" />
+
+:::tip lang 语言说明
+`lang` 用于指定语音识别语言，建议显式传入，并与页面的 `html lang` 保持一致，避免页面语言和浏览器环境语言不一致时出现识别偏差。
 
-<demo vue="../../demos/sender/voice-input.vue" title="基础语音输入" description="使用浏览器内置语音识别，支持混合输入和连续识别。" />
+常见取值示例：
+
+| 值 | 说明 |
+| --- | --- |
+| `en` | 英语 |
+| `zh` | 中文 |
+| `zh-CN` | 简体中文 |
+| `en-US` | 美式英语 |
+:::
 
 #### 自定义语音服务
 
@@ -429,7 +442,7 @@ onSelect: (item) => {
 | tooltipPlacement | Tooltip 位置                 | `TooltipPlacement`    | `'top'`     |
 | speechConfig     | 语音配置                     | `SpeechConfig`        | -           |
 | autoInsert       | 是否自动插入识别结果到编辑器 | `boolean`             | `true`      |
-| onButtonClick    | 按钮点击拦截器               | `Function`            | -           |
+| onButtonClick    | 按钮点击拦截器               | `(isRecording: boolean, preventDefault: () => void) => void \| Promise<void>` | - |
 
 ## Slots
 
@@ -597,11 +610,10 @@ type TooltipPlacement =
 // SpeechConfig 语音配置
 interface SpeechConfig {
   customHandler?: SpeechHandler // 自定义语音处理器
-  lang?: string // 识别语言，默认浏览器语言
-  continuous?: boolean // 是否持续识别
-  interimResults?: boolean // 是否返回中间结果
-  autoReplace?: boolean // 是否自动替换内容
-  onVoiceButtonClick?: (isRecording, preventDefault) => void // 按钮点击拦截器
+  lang?: string // 内置 Web Speech 的识别语言；未传入时使用 navigator.language
+  continuous?: boolean // 内置 Web Speech 是否持续识别
+  interimResults?: boolean // 内置 Web Speech 是否返回中间结果
+  autoReplace?: boolean // 是否在本次录音期间仅用最新识别结果替换语音写入的内容区间
 }
 
 // 模板项（联合类型）

diff --git a/packages/components/src/sender-actions/voice-button/index.vue b/packages/components/src/sender-actions/voice-button/index.vue
@@ -1,5 +1,5 @@
 <script setup lang="ts">
-import { computed } from 'vue'
+import { computed, ref } from 'vue'
 import { useSenderContext } from '../../sender/context'
 import { useSpeechHandler } from './useSpeechHandler'
 import ActionButton from '../action-button/index.vue'
@@ -16,34 +16,123 @@ const emit = defineEmits<VoiceButtonEmits>()
 // 从 Context 获取最小依赖：只需要 editor 和 disabled
 const { editor, disabled: contextDisabled } = useSenderContext()
 const isDisabled = computed(() => props.disabled || contextDisabled.value)
+const isAutoReplace = computed(() => props.speechConfig?.autoReplace ?? false)
+const speechRange = ref<{ from: number; to: number } | null>(null)
+const committedTranscript = ref('')
+const speechPrefix = ref('')
+
+const resetSpeechSession = () => {
+  speechRange.value = null
+  committedTranscript.value = ''
+  speechPrefix.value = ''
+}
+
+const ensureSpeechRange = () => {
+  if (speechRange.value || !editor.value) {
+    return speechRange.value
+  }
+
+  const { from, to } = editor.value.state.selection
+  const previousText = from === to ? (editor.value.state.doc.resolve(from).nodeBefore?.textContent ?? '') : ''
+
+  speechPrefix.value = previousText && /\S$/.test(previousText) ? ' ' : ''
+  speechRange.value = {
+    from,
+    to,
+  }
+
+  return speechRange.value
+}
+
+const focusEditor = () => {
+  if (!editor.value) return
+
+  if (isAutoReplace.value && speechRange.value) {
+    editor.value.commands.focus(speechRange.value.to)
+    return
+  }
+
+  editor.value.commands.focus('end')
+}
+
+const appendTranscript = (transcript: string) => {
+  if (!props.autoInsert || !editor.value || !transcript) return
+
+  editor.value.commands.insertContent(transcript + ' ')
+  focusEditor()
+}
+
+const replaceTranscript = (transcript: string) => {
+  if (!props.autoInsert || !editor.value || !transcript) return
+
+  const range = ensureSpeechRange()
+  const nextTranscript = `${speechPrefix.value}${transcript}`
+
+  if (!range) {
+    return
+  }
+
+  const tr = editor.value.state.tr.insertText(nextTranscript, range.from, range.to)
+  editor.value.view.dispatch(tr)
+
+  speechRange.value = {
+    from: range.from,
+    to: range.from + nextTranscript.length,
+  }
+
+  focusEditor()
+}
+
+const mergeCommittedTranscript = (transcript: string) => {
+  if (!transcript) {
+    return committedTranscript.value
+  }
+
+  if (!committedTranscript.value || transcript.startsWith(committedTranscript.value)) {
+    committedTranscript.value = transcript
+    return committedTranscript.value
+  }
+
+  if (committedTranscript.value !== transcript && !committedTranscript.value.endsWith(transcript)) {
+    committedTranscript.value += transcript
+  }
+
+  return committedTranscript.value
+}
 
 // 语音配置 - 使用普通对象而不是 computed，避免每次都创建新对象
 const speechOptions = {
   ...props.speechConfig,
   onStart: () => {
+    resetSpeechSession()
+    if (isAutoReplace.value) {
+      ensureSpeechRange()
+    }
     emit('speech-start')
   },
   onInterim: (transcript: string) => {
+    if (isAutoReplace.value) {
+      replaceTranscript(transcript)
+    }
     emit('speech-interim', transcript)
   },
   onFinal: (transcript: string) => {
-    // 自动插入到编辑器(可配置)
-    if (props.autoInsert && editor.value) {
-      // 插入内容
-      editor.value.commands.insertContent(transcript + ' ')
-      // 确保光标在内容末尾
-      editor.value.commands.focus('end')
+    if (isAutoReplace.value) {
+      replaceTranscript(mergeCommittedTranscript(transcript))
+    } else {
+      appendTranscript(transcript)
     }
     emit('speech-final', transcript)
   },
   onEnd: (transcript?: string) => {
-    // 结束后聚焦编辑器，确保光标可见
     if (editor.value) {
-      editor.value.commands.focus('end')
+      focusEditor()
     }
+    resetSpeechSession()
     emit('speech-end', transcript)
   },
   onError: (error: Error) => {
+    resetSpeechSession()
     emit('speech-error', error)
   },
 }

diff --git a/packages/components/src/sender-actions/voice-button/speech.types.ts b/packages/components/src/sender-actions/voice-button/speech.types.ts
@@ -27,8 +27,7 @@ export interface SpeechConfig {
   lang?: string // 识别语言，默认浏览器语言
   continuous?: boolean // 是否持续识别
   interimResults?: boolean // 是否返回中间结果
-  autoReplace?: boolean // 是否自动替换当前输入内容
-  onVoiceButtonClick?: (isRecording: boolean, preventDefault: () => void) => void | Promise<void> // 录音按钮点击拦截器
+  autoReplace?: boolean // 是否在本次录音期间仅替换语音写入的内容区间
 }
 
 // 语音识别状态

diff --git a/packages/components/src/sender-actions/voice-button/webSpeechHandler.ts b/packages/components/src/sender-actions/voice-button/webSpeechHandler.ts
@@ -1,12 +1,47 @@
 import type { SpeechCallbacks, SpeechHandler, SpeechConfig } from './speech.types'
 
+interface ParsedSpeechResult {
+  finalTranscript: string
+  interimTranscript: string
+}
+
+export function parseSpeechRecognitionResult(event: SpeechRecognitionEvent): ParsedSpeechResult {
+  let finalTranscript = ''
+  let interimTranscript = ''
+
+  for (let index = event.resultIndex; index < event.results.length; index++) {
+    const result = event.results[index]
+    const transcript = result[0]?.transcript ?? ''
+
+    if (!transcript) {
+      continue
+    }
+
+    if (result.isFinal) {
+      finalTranscript += transcript
+    } else {
+      interimTranscript += transcript
+    }
+  }
+
+  return {
+    finalTranscript,
+    interimTranscript,
+  }
+}
+
 /**
  * 内置 Web Speech API 处理器
  * 基于浏览器原生 Web Speech API 实现的语音识别
  */
 export class WebSpeechHandler implements SpeechHandler {
   private recognition?: SpeechRecognition
   private options: SpeechConfig
+  private finalizedTranscript: string = ''
+
+  private resetSessionTranscript(): void {
+    this.finalizedTranscript = ''
+  }
 
   /**
    * 初始化语音识别实例
@@ -45,25 +80,33 @@ export class WebSpeechHandler implements SpeechHandler {
    */
   private setupEventHandlers(callbacks: SpeechCallbacks): void {
     if (!this.recognition || !callbacks) return
+
     this.recognition.onstart = () => {
+      this.resetSessionTranscript()
       callbacks.onStart()
     }
+
     this.recognition.onend = () => {
-      callbacks.onEnd()
+      callbacks.onEnd(this.finalizedTranscript || undefined)
+      this.resetSessionTranscript()
     }
+
     this.recognition.onresult = (event: SpeechRecognitionEvent) => {
-      const transcript = Array.from(event.results)
-        .map((result) => result[0].transcript)
-        .join('')
-      const current = event.results[event.resultIndex]
-      if (current?.isFinal) {
-        callbacks.onFinal(transcript)
-      } else {
-        callbacks.onInterim(transcript)
+      const { finalTranscript, interimTranscript } = parseSpeechRecognitionResult(event)
+
+      if (finalTranscript) {
+        this.finalizedTranscript += finalTranscript
+        callbacks.onFinal(finalTranscript)
+      }
+
+      if (interimTranscript) {
+        callbacks.onInterim(this.finalizedTranscript + interimTranscript)
       }
     }
+
     this.recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
       callbacks.onError(new Error(event.error))
+      this.resetSessionTranscript()
       this.cleanup()
     }
   }
@@ -88,8 +131,10 @@ export class WebSpeechHandler implements SpeechHandler {
       callbacks.onError(new Error('浏览器不支持语音识别'))
       return
     }
-    // 绑定事件处理器
+
+    this.resetSessionTranscript()
     this.setupEventHandlers(callbacks)
+
     try {
       this.recognition.start()
     } catch (error) {
@@ -102,7 +147,10 @@ export class WebSpeechHandler implements SpeechHandler {
    */
   stop(): void {
     if (!this.recognition) return
+
     this.cleanup()
+    this.resetSessionTranscript()
+
     try {
       this.recognition.stop()
     } catch (error) {