Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions docs/demos/sender/voice-input.vue
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,44 @@
import { ref } from 'vue'
import { TrSender, VoiceButton } from '@opentiny/tiny-robot'

const voiceMode = ref<'mixed' | 'continuous'>('mixed')
const voiceMode = ref<'append' | 'replace'>('append')
</script>

<template>
<div style="display: flex; flex-direction: column; gap: 16px">
<div style="display: flex; align-items: center; gap: 12px">
<span style="font-weight: 500">模式:</span>
<label style="display: flex; align-items: center; gap: 4px; cursor: pointer">
<input type="radio" value="mixed" v-model="voiceMode" style="cursor: pointer" />
<span>混合输入</span>
<input type="radio" value="append" v-model="voiceMode" style="cursor: pointer" />
<span>追加模式</span>
</label>
<label style="display: flex; align-items: center; gap: 4px; cursor: pointer">
<input type="radio" value="continuous" v-model="voiceMode" style="cursor: pointer" />
<span>连续识别</span>
<input type="radio" value="replace" v-model="voiceMode" style="cursor: pointer" />
<span>替换模式</span>
</label>
</div>
<div style="padding: 8px 12px; background: #f5f7fa; border-radius: 4px; font-size: 13px; color: #666">
{{ voiceMode === 'mixed' ? '语音识别结果追加到输入框,可继续编辑' : '持续识别语音并自动替换内容' }}
{{
voiceMode === 'append'
? '追加模式:每次语音识别结果会追加到输入框末尾,适合混合输入'
: '替换模式:在同一次录音会话内持续识别,并用最新结果更新本次语音输入内容'
}}
</div>
<tr-sender
:key="voiceMode"
mode="multiple"
:placeholder="voiceMode === 'mixed' ? '点击麦克风说话,识别结果会追加到此处...' : '点击麦克风开始连续识别...'"
:placeholder="
voiceMode === 'append'
? '可以打字或点击麦克风说话,语音内容会追加...'
: '点击麦克风连续说话,本次语音内容会持续更新...'
"
>
<template #footer-right>
<VoiceButton
:speech-config="
voiceMode === 'mixed'
voiceMode === 'append'
? { autoReplace: false, interimResults: true }
: { autoReplace: true, continuous: true }
: { autoReplace: true, continuous: true, interimResults: true }
"
/>
</template>
Expand Down
30 changes: 21 additions & 9 deletions docs/src/components/sender.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
---
---
Comment thread
coderabbitai[bot] marked this conversation as resolved.
outline: [1, 3]
---

Expand Down Expand Up @@ -155,9 +155,22 @@ TrSender.Suggestion.configure({ items: suggestions, filterFn: customFilter })

#### 基础语音识别

使用浏览器内置的语音识别功能,支持混合输入和连续识别两种模式。
使用浏览器内置的语音识别功能,支持追加写入和替换写入两种体验。可通过 `speechConfig.lang` 显式指定识别语言,并结合 `speechConfig.continuous` 控制是否持续识别。

<demo vue="../../demos/sender/voice-input.vue" title="基础语音输入" description="使用浏览器内置语音识别,展示追加写入和连续替换两种体验。" />

:::tip lang 语言说明
`lang` 用于指定语音识别语言,建议显式传入,并与页面的 `html lang` 保持一致,避免页面语言和浏览器环境语言不一致时出现识别偏差。

<demo vue="../../demos/sender/voice-input.vue" title="基础语音输入" description="使用浏览器内置语音识别,支持混合输入和连续识别。" />
常见取值示例:

| 值 | 说明 |
| --- | --- |
| `en` | 英语 |
| `zh` | 中文 |
| `zh-CN` | 简体中文 |
| `en-US` | 美式英语 |
:::
Comment thread
SonyLeo marked this conversation as resolved.
Outdated

#### 自定义语音服务

Expand Down Expand Up @@ -429,7 +442,7 @@ onSelect: (item) => {
| tooltipPlacement | Tooltip 位置 | `TooltipPlacement` | `'top'` |
| speechConfig | 语音配置 | `SpeechConfig` | - |
| autoInsert | 是否自动插入识别结果到编辑器 | `boolean` | `true` |
| onButtonClick | 按钮点击拦截器 | `Function` | - |
| onButtonClick | 按钮点击拦截器 | `(isRecording: boolean, preventDefault: () => void) => void \| Promise<void>` | - |

## Slots

Expand Down Expand Up @@ -597,11 +610,10 @@ type TooltipPlacement =
// SpeechConfig 语音配置
interface SpeechConfig {
customHandler?: SpeechHandler // 自定义语音处理器
lang?: string // 识别语言,默认浏览器语言
continuous?: boolean // 是否持续识别
interimResults?: boolean // 是否返回中间结果
autoReplace?: boolean // 是否自动替换内容
onVoiceButtonClick?: (isRecording, preventDefault) => void // 按钮点击拦截器
lang?: string // 内置 Web Speech 的识别语言;未传入时使用 navigator.language
continuous?: boolean // 内置 Web Speech 是否持续识别
interimResults?: boolean // 内置 Web Speech 是否返回中间结果
autoReplace?: boolean // 是否在本次录音期间仅用最新识别结果替换语音写入的内容区间
}

// 模板项(联合类型)
Expand Down
107 changes: 98 additions & 9 deletions packages/components/src/sender-actions/voice-button/index.vue
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<script setup lang="ts">
import { computed } from 'vue'
import { computed, ref } from 'vue'
import { useSenderContext } from '../../sender/context'
import { useSpeechHandler } from './useSpeechHandler'
import ActionButton from '../action-button/index.vue'
Expand All @@ -16,34 +16,123 @@ const emit = defineEmits<VoiceButtonEmits>()
// 从 Context 获取最小依赖:只需要 editor 和 disabled
const { editor, disabled: contextDisabled } = useSenderContext()
const isDisabled = computed(() => props.disabled || contextDisabled.value)
const isAutoReplace = computed(() => props.speechConfig?.autoReplace ?? false)
const speechRange = ref<{ from: number; to: number } | null>(null)
const committedTranscript = ref('')
const speechPrefix = ref('')

const resetSpeechSession = () => {
speechRange.value = null
committedTranscript.value = ''
speechPrefix.value = ''
}

const ensureSpeechRange = () => {
if (speechRange.value || !editor.value) {
return speechRange.value
}

const { from, to } = editor.value.state.selection
const previousText = from === to ? (editor.value.state.doc.resolve(from).nodeBefore?.textContent ?? '') : ''

speechPrefix.value = previousText && /\S$/.test(previousText) ? ' ' : ''
speechRange.value = {
from,
to,
}

return speechRange.value
}

const focusEditor = () => {
if (!editor.value) return

if (isAutoReplace.value && speechRange.value) {
editor.value.commands.focus(speechRange.value.to)
return
}

editor.value.commands.focus('end')
}

const appendTranscript = (transcript: string) => {
if (!props.autoInsert || !editor.value || !transcript) return

editor.value.commands.insertContent(transcript + ' ')
focusEditor()
}

const replaceTranscript = (transcript: string) => {
if (!props.autoInsert || !editor.value || !transcript) return

const range = ensureSpeechRange()
const nextTranscript = `${speechPrefix.value}${transcript}`

if (!range) {
return
}

const tr = editor.value.state.tr.insertText(nextTranscript, range.from, range.to)
editor.value.view.dispatch(tr)

speechRange.value = {
from: range.from,
to: range.from + nextTranscript.length,
}

focusEditor()
}

const mergeCommittedTranscript = (transcript: string) => {
if (!transcript) {
return committedTranscript.value
}

if (!committedTranscript.value || transcript.startsWith(committedTranscript.value)) {
committedTranscript.value = transcript
return committedTranscript.value
}

if (committedTranscript.value !== transcript && !committedTranscript.value.endsWith(transcript)) {
committedTranscript.value += transcript
}

return committedTranscript.value
}

// 语音配置 - 使用普通对象而不是 computed,避免每次都创建新对象
const speechOptions = {
...props.speechConfig,
Comment thread
SonyLeo marked this conversation as resolved.
Outdated
onStart: () => {
resetSpeechSession()
if (isAutoReplace.value) {
ensureSpeechRange()
}
emit('speech-start')
},
onInterim: (transcript: string) => {
if (isAutoReplace.value) {
replaceTranscript(transcript)
}
emit('speech-interim', transcript)
},
onFinal: (transcript: string) => {
// 自动插入到编辑器(可配置)
if (props.autoInsert && editor.value) {
// 插入内容
editor.value.commands.insertContent(transcript + ' ')
// 确保光标在内容末尾
editor.value.commands.focus('end')
if (isAutoReplace.value) {
replaceTranscript(mergeCommittedTranscript(transcript))
} else {
appendTranscript(transcript)
}
emit('speech-final', transcript)
},
onEnd: (transcript?: string) => {
// 结束后聚焦编辑器,确保光标可见
if (editor.value) {
editor.value.commands.focus('end')
focusEditor()
}
resetSpeechSession()
emit('speech-end', transcript)
},
onError: (error: Error) => {
resetSpeechSession()
emit('speech-error', error)
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ export interface SpeechConfig {
lang?: string // 识别语言,默认浏览器语言
continuous?: boolean // 是否持续识别
interimResults?: boolean // 是否返回中间结果
autoReplace?: boolean // 是否自动替换当前输入内容
onVoiceButtonClick?: (isRecording: boolean, preventDefault: () => void) => void | Promise<void> // 录音按钮点击拦截器
autoReplace?: boolean // 是否在本次录音期间仅替换语音写入的内容区间
}

// 语音识别状态
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,47 @@
import type { SpeechCallbacks, SpeechHandler, SpeechConfig } from './speech.types'

interface ParsedSpeechResult {
finalTranscript: string
interimTranscript: string
}

export function parseSpeechRecognitionResult(event: SpeechRecognitionEvent): ParsedSpeechResult {
let finalTranscript = ''
let interimTranscript = ''

for (let index = event.resultIndex; index < event.results.length; index++) {
const result = event.results[index]
const transcript = result[0]?.transcript ?? ''

if (!transcript) {
continue
}

if (result.isFinal) {
finalTranscript += transcript
} else {
interimTranscript += transcript
}
}

return {
finalTranscript,
interimTranscript,
}
}

/**
* 内置 Web Speech API 处理器
* 基于浏览器原生 Web Speech API 实现的语音识别
*/
export class WebSpeechHandler implements SpeechHandler {
private recognition?: SpeechRecognition
private options: SpeechConfig
private finalizedTranscript: string = ''

private resetSessionTranscript(): void {
this.finalizedTranscript = ''
}

/**
* 初始化语音识别实例
Expand Down Expand Up @@ -45,25 +80,33 @@ export class WebSpeechHandler implements SpeechHandler {
*/
private setupEventHandlers(callbacks: SpeechCallbacks): void {
if (!this.recognition || !callbacks) return

this.recognition.onstart = () => {
this.resetSessionTranscript()
callbacks.onStart()
}

this.recognition.onend = () => {
callbacks.onEnd()
callbacks.onEnd(this.finalizedTranscript || undefined)
this.resetSessionTranscript()
}

this.recognition.onresult = (event: SpeechRecognitionEvent) => {
const transcript = Array.from(event.results)
.map((result) => result[0].transcript)
.join('')
const current = event.results[event.resultIndex]
if (current?.isFinal) {
callbacks.onFinal(transcript)
} else {
callbacks.onInterim(transcript)
const { finalTranscript, interimTranscript } = parseSpeechRecognitionResult(event)

if (finalTranscript) {
this.finalizedTranscript += finalTranscript
callbacks.onFinal(finalTranscript)
}

if (interimTranscript) {
callbacks.onInterim(this.finalizedTranscript + interimTranscript)
}
}

this.recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
callbacks.onError(new Error(event.error))
this.resetSessionTranscript()
this.cleanup()
}
}
Expand All @@ -88,8 +131,10 @@ export class WebSpeechHandler implements SpeechHandler {
callbacks.onError(new Error('浏览器不支持语音识别'))
return
}
// 绑定事件处理器

this.resetSessionTranscript()
this.setupEventHandlers(callbacks)

try {
this.recognition.start()
} catch (error) {
Expand All @@ -102,7 +147,10 @@ export class WebSpeechHandler implements SpeechHandler {
*/
stop(): void {
if (!this.recognition) return

this.cleanup()
this.resetSessionTranscript()

try {
this.recognition.stop()
} catch (error) {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
Expand Down
Loading