Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/chainlit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
on_app_shutdown,
on_app_startup,
on_audio_chunk,
on_audio_discard,
on_audio_end,
on_audio_start,
on_chat_end,
Expand Down Expand Up @@ -193,6 +194,7 @@ def acall(self):
"on_app_shutdown",
"on_app_startup",
"on_audio_chunk",
"on_audio_discard",
"on_audio_end",
"on_audio_start",
"on_chat_end",
Expand Down
7 changes: 7 additions & 0 deletions backend/chainlit/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,13 @@ def on_audio_end(func: Callable) -> Callable:
return func


def on_audio_discard(func: Callable) -> Callable:
"""Hook to react to audio being discarded by the user."""

config.code.on_audio_discard = wrap_user_function(func, with_task=False)
return func


def author_rename(
func: Callable[[str], Awaitable[str]],
) -> Callable[[str], Awaitable[str]]:
Expand Down
1 change: 1 addition & 0 deletions backend/chainlit/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ class CodeSettings(BaseModel):
on_audio_start: Optional[Callable[[], Any]] = None
on_audio_chunk: Optional[Callable[["InputAudioChunk"], Any]] = None
on_audio_end: Optional[Callable[[], Any]] = None
on_audio_discard: Optional[Callable[[], Any]] = None
on_mcp_connect: Optional[Callable] = None
on_mcp_disconnect: Optional[Callable] = None
on_settings_edit: Optional[Callable[[Dict[str, Any]], Any]] = None
Expand Down
24 changes: 24 additions & 0 deletions backend/chainlit/socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,30 @@ async def audio_chunk(sid, payload: InputAudioChunkPayload):
asyncio.create_task(config.code.on_audio_chunk(InputAudioChunk(**payload)))


@sio.on("audio_discard")
async def audio_discard(sid):
"""Handle the user discarding the audio stream."""
session = WebsocketSession.require(sid)

try:
context = init_ws_context(session)
config: ChainlitConfig = session.get_config() # type: ignore

if (
config.features.audio
and config.features.audio.enabled
and config.code.on_audio_discard
):
await config.code.on_audio_discard()
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.

await context.emitter.update_audio_connection("off")

except asyncio.CancelledError:
pass
except Exception as e:
logger.exception(e)


@sio.on("audio_end")
async def audio_end(sid):
"""Handle the end of the audio stream."""
Expand Down
4 changes: 3 additions & 1 deletion backend/chainlit/translations/en-US.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@
"speech": {
"start": "Start recording",
"stop": "Stop recording",
"connecting": "Connecting"
"connecting": "Connecting",
"accept": "Send recording",
"discard": "Discard recording"
},
"fileUpload": {
"dragDrop": "Drag and drop files here",
Expand Down
154 changes: 100 additions & 54 deletions frontend/src/components/chat/MessageComposer/VoiceButton.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { X } from 'lucide-react';
import { Check, LucideIcon, X } from 'lucide-react';
import { ReactNode } from 'react';
import { useHotkeys } from 'react-hotkeys-hook';

import { useAudio, useConfig } from '@chainlit/react-client';
Expand All @@ -20,17 +21,52 @@ interface Props {
disabled?: boolean;
}

const IconButton = ({
icon: Icon,
className,
disabled,
onClick,
tooltip
}: {
icon: LucideIcon;
className?: string;
disabled?: boolean;
onClick: () => void;
tooltip: ReactNode;
}) => (
<Tooltip>
<TooltipTrigger asChild>
<Button
disabled={disabled}
variant="ghost"
size="icon"
className={`hover:bg-muted ${className ?? ''}`}
onClick={onClick}
>
<Icon className="!size-5" />
</Button>
</TooltipTrigger>
<TooltipContent>
<p>{tooltip}</p>
</TooltipContent>
</Tooltip>
);

const VoiceButton = ({ disabled }: Props) => {
const { config } = useConfig();
const { startConversation, endConversation, audioConnection } = useAudio();
const {
startConversation,
endConversation,
discardConversation,
audioConnection
} = useAudio();
const isEnabled = !!config?.features.audio.enabled;

useHotkeys(
'p',
() => {
if (!isEnabled) return;

// Double-check at execution time that we're not in a form field
const getDeepActiveElement = (): Element | null => {
let activeElement = document.activeElement;
while (
Expand All @@ -51,7 +87,7 @@ const VoiceButton = ({ disabled }: Props) => {
activeElement.getAttribute('contenteditable') === 'true';

if (isFormField || isContentEditable) {
return; // Don't execute the hotkey
return;
}
}

Expand All @@ -60,66 +96,76 @@ const VoiceButton = ({ disabled }: Props) => {
},
{
enableOnFormTags: false,
preventDefault: false // Don't prevent default - let letters be typed
preventDefault: false
},
[isEnabled, audioConnection, startConversation, endConversation]
);

if (!isEnabled) return null;

return (
<div className="flex items-center gap-1">
{audioConnection === 'on' ? (
<AudioPresence
type="client"
height={18}
width={36}
barCount={4}
barSpacing={2}
/>
) : null}
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<Button
<TooltipProvider>
<div className="flex items-center gap-1">
{audioConnection === 'on' ? (
<>
<AudioPresence
type="client"
height={18}
width={36}
barCount={4}
barSpacing={2}
/>
<IconButton
icon={X}
className="text-destructive"
disabled={disabled}
onClick={discardConversation}
tooltip={<Translator path="chat.speech.discard" />}
/>
<IconButton
icon={Check}
className="text-primary"
disabled={disabled}
variant="ghost"
size="icon"
className="hover:bg-muted"
onClick={
audioConnection === 'on'
? endConversation
: audioConnection === 'off'
? startConversation
: undefined
}
>
{audioConnection === 'on' ? <X className="!size-5" /> : null}
{audioConnection === 'off' ? (
<VoiceLines className="!size-6" />
) : null}
{audioConnection === 'connecting' ? (
<Loader className="!size-5" />
) : null}
</Button>
</TooltipTrigger>
<TooltipContent>
<p>
<Translator
path={
audioConnection === 'on'
? 'chat.speech.stop'
: audioConnection === 'off'
onClick={endConversation}
tooltip={<Translator path="chat.speech.accept" />}
/>
</>
) : (
<Tooltip>
<TooltipTrigger asChild>
<Button
disabled={disabled}
variant="ghost"
size="icon"
className="hover:bg-muted"
onClick={
audioConnection === 'off' ? startConversation : undefined
}
>
{audioConnection === 'off' ? (
<VoiceLines className="!size-6" />
) : null}
{audioConnection === 'connecting' ? (
<Loader className="!size-5" />
) : null}
</Button>
</TooltipTrigger>
<TooltipContent>
<p>
<Translator
path={
audioConnection === 'off'
? 'chat.speech.start'
: 'chat.speech.connecting'
}
suffix=" (P)"
/>
</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
</div>
}
suffix=" (P)"
/>
</p>
</TooltipContent>
</Tooltip>
)}
</div>
</TooltipProvider>
);
};
export default VoiceButton;
20 changes: 15 additions & 5 deletions libs/react-client/src/useAudio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,33 @@ const useAudio = () => {
const wavStreamPlayer = useRecoilValue(wavStreamPlayerState);
const isAiSpeaking = useRecoilValue(isAiSpeakingState);

const { startAudioStream, endAudioStream } = useChatInteract();
const { startAudioStream, endAudioStream, discardAudioStream } =
useChatInteract();

const startConversation = useCallback(async () => {
setAudioConnection('connecting');
await startAudioStream();
}, [startAudioStream]);

const endConversation = useCallback(async () => {
const stopRecording = useCallback(async () => {
setAudioConnection('off');
await wavRecorder.end();
await wavStreamPlayer.interrupt();
await Promise.all([wavRecorder.end(), wavStreamPlayer.interrupt()]);
}, [wavRecorder, wavStreamPlayer]);

const endConversation = useCallback(async () => {
await stopRecording();
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
await endAudioStream();
}, [endAudioStream, wavRecorder, wavStreamPlayer]);
}, [stopRecording, endAudioStream]);

const discardConversation = useCallback(async () => {
await stopRecording();
await discardAudioStream();
}, [stopRecording, discardAudioStream]);

return {
startConversation,
endConversation,
discardConversation,
audioConnection,
isAiSpeaking,
wavRecorder,
Expand Down
5 changes: 5 additions & 0 deletions libs/react-client/src/useChatInteract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ const useChatInteract = () => {
session?.socket.emit('audio_end');
}, [session?.socket]);

const discardAudioStream = useCallback(() => {
session?.socket.emit('audio_discard');
}, [session?.socket]);

const replyMessage = useCallback(
(message: IStep) => {
if (askUser) {
Expand Down Expand Up @@ -213,6 +217,7 @@ const useChatInteract = () => {
startAudioStream,
sendAudioChunk,
endAudioStream,
discardAudioStream,
stopTask,
setIdToResume,
updateChatSettings,
Expand Down