useSpeechRecognition
A React hook for speech-to-text functionality using the Web Speech Recognition API. Convert voice to text in real-time with support for multiple languages, continuous listening, and interim results.
Installation
npx shadcn@latest add https://r.fiberui.com/r/hooks/use-speech-recognition.jsonA React hook that provides speech-to-text functionality using the Web Speech Recognition API. Perfect for voice commands, dictation, accessibility features, and hands-free interfaces.
Source Code
View the full hook implementation in the Hook Source Code section below.
Permission Required
Important: The browser will prompt the user for microphone permission
before starting speech recognition. If the user denies permission, the
error will be set to not-allowed. Always handle this gracefully.
Related Hook
Need text-to-speech instead? See useSpeechSynthesis.
Features
- Real-time Transcription - Get interim results as the user speaks
- Continuous Mode - Keep listening after pauses in speech
- Multi-language Support - 50+ languages with BCP 47 language codes
- Error Handling - Typed errors with human-readable messages
- Control Functions -
start,stop,abort, andresetTranscript - Callbacks -
onResult,onError,onEndfor event-driven workflows - SSR Safe - Gracefully handles server-side rendering
Learn More
Basic Transcription
The simplest usage - click the microphone to start listening and see your speech transcribed in real-time. Interim results appear in gray while the final transcript is confirmed.
Speech Recognition is not supported in your browser.
Try using Chrome, Edge, or Safari.
"use client";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, RotateCcw } from "lucide-react";
/* BASIC USAGE - Voice Transcription */
export const Example1 = () => {
const {
transcript,
interimTranscript,
isListening,
isSupported,
error,
errorMessage,
start,
stop,
resetTranscript,
} = useSpeechRecognition();
if (!isSupported) {
return (
<div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
<p className="font-medium">
Speech Recognition is not supported in your browser.
</p>
<p className="mt-1 text-sm opacity-80">
Try using Chrome, Edge, or Safari.
</p>
</div>
);
}
return (
<div className="flex w-full max-w-md flex-col gap-4">
{/* Microphone Button */}
<div className="flex items-center justify-center gap-3">
<button
onClick={isListening ? stop : start}
className={`flex h-16 w-16 items-center justify-center rounded-full transition-all ${
isListening
? "animate-pulse bg-red-500 text-white"
: "bg-primary text-primary-foreground hover:bg-primary/90"
}`}
>
{isListening ? (
<MicOff className="h-6 w-6" />
) : (
<Mic className="h-6 w-6" />
)}
</button>
<button
onClick={resetTranscript}
className="bg-muted hover:bg-muted/80 flex h-10 w-10 items-center justify-center rounded-full transition-colors"
title="Reset transcript"
>
<RotateCcw className="h-4 w-4" />
</button>
</div>
{/* Status */}
<p className="text-muted-foreground text-center text-sm">
{isListening
? "Listening... speak now"
: "Click the microphone to start"}
</p>
{/* Transcript Display */}
<div className="bg-muted/50 min-h-32 rounded-lg p-4">
<p className="text-muted-foreground mb-2 text-xs font-medium uppercase tracking-wide">
Transcript
</p>
<p className="text-sm leading-relaxed">
{transcript}
{interimTranscript && (
<span className="text-muted-foreground italic">
{interimTranscript}
</span>
)}
{!transcript && !interimTranscript && (
<span className="text-muted-foreground italic">
Your speech will appear here...
</span>
)}
</p>
</div>
{/* Error Display */}
{error && (
<div className="bg-destructive/10 text-destructive rounded-lg p-3 text-sm">
{errorMessage}
</div>
)}
</div>
);
};
Voice Commands
Use speech recognition to execute actions based on voice commands. This example demonstrates a voice-controlled task manager where you can add, remove, and clear tasks by speaking.
"use client";
import { useState } from "react";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, ListChecks, Trash2 } from "lucide-react";
/* VOICE COMMANDS - Execute Actions from Speech */
export const Example2 = () => {
const [tasks, setTasks] = useState<string[]>([
"Review PR #42",
"Update documentation",
]);
const [lastCommand, setLastCommand] = useState<string | null>(null);
const { isListening, isSupported, start, stop, resetTranscript } =
useSpeechRecognition({
continuous: true,
onResult: (text, isFinal) => {
if (!isFinal) return;
const command = text.toLowerCase().trim();
setLastCommand(command);
// Add task command
if (
command.startsWith("add task ") ||
command.startsWith("add ")
) {
const taskName = command.replace(/^add( task)?\s+/i, "");
if (taskName) {
setTasks((prev) => [...prev, taskName]);
}
}
// Remove task command
else if (
command.startsWith("remove task ") ||
command.startsWith("delete ")
) {
const taskName = command.replace(
/^(remove task|delete)\s+/i,
"",
);
setTasks((prev) =>
prev.filter((t) => !t.toLowerCase().includes(taskName)),
);
}
// Clear all command
else if (command === "clear all" || command === "delete all") {
setTasks([]);
}
},
});
if (!isSupported) {
return (
<div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
Speech Recognition is not supported in your browser.
</div>
);
}
return (
<div className="flex w-full max-w-md flex-col gap-4">
{/* Header with mic button */}
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<ListChecks className="text-primary h-5 w-5" />
<h3 className="font-semibold">Voice Task Manager</h3>
</div>
<button
onClick={() => {
if (isListening) {
stop();
} else {
resetTranscript();
start();
}
}}
className={`flex h-10 w-10 items-center justify-center rounded-full transition-all ${
isListening
? "animate-pulse bg-red-500 text-white"
: "bg-primary text-primary-foreground"
}`}
>
{isListening ? (
<MicOff className="h-4 w-4" />
) : (
<Mic className="h-4 w-4" />
)}
</button>
</div>
{/* Commands hint */}
<div className="bg-muted/50 rounded-lg p-3 text-xs">
<p className="text-muted-foreground mb-1 font-medium">
Try saying:
</p>
<ul className="text-muted-foreground space-y-0.5">
<li>"Add task buy groceries"</li>
<li>"Delete groceries"</li>
<li>"Clear all"</li>
</ul>
</div>
{/* Last command */}
{lastCommand && (
<p className="text-muted-foreground text-xs">
Last command:{" "}
<span className="text-foreground font-mono">
"{lastCommand}"
</span>
</p>
)}
{/* Task list */}
<div className="bg-background rounded-lg border">
{tasks.length === 0 ? (
<p className="text-muted-foreground p-4 text-center text-sm italic">
No tasks. Say "Add task..." to create one.
</p>
) : (
<ul className="divide-y">
{tasks.map((task, index) => (
<li
key={index}
className="flex items-center justify-between p-3"
>
<span className="text-sm">{task}</span>
<button
onClick={() =>
setTasks((prev) =>
prev.filter((_, i) => i !== index),
)
}
className="text-muted-foreground hover:text-destructive"
>
<Trash2 className="h-4 w-4" />
</button>
</li>
))}
</ul>
)}
</div>
</div>
);
};
Multi-language Support
Speech recognition supports 50+ languages. Select a language and speak in that language to see accurate transcription. The recognition engine automatically adjusts to the selected language's phonetics and vocabulary.
"use client";
import { useState } from "react";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, Globe, RotateCcw } from "lucide-react";
const LANGUAGES = [
{ code: "en-US", name: "English (US)" },
{ code: "en-GB", name: "English (UK)" },
{ code: "es-ES", name: "Spanish (Spain)" },
{ code: "es-MX", name: "Spanish (Mexico)" },
{ code: "fr-FR", name: "French" },
{ code: "de-DE", name: "German" },
{ code: "it-IT", name: "Italian" },
{ code: "pt-BR", name: "Portuguese (Brazil)" },
{ code: "ja-JP", name: "Japanese" },
{ code: "ko-KR", name: "Korean" },
{ code: "zh-CN", name: "Chinese (Simplified)" },
{ code: "hi-IN", name: "Hindi" },
{ code: "ar-SA", name: "Arabic" },
{ code: "ru-RU", name: "Russian" },
];
/* LANGUAGE SELECTOR - Multi-language Support */
export const Example3 = () => {
const [selectedLang, setSelectedLang] = useState("en-US");
const {
transcript,
interimTranscript,
isListening,
isSupported,
start,
stop,
resetTranscript,
} = useSpeechRecognition({
lang: selectedLang,
continuous: true,
interimResults: true,
});
if (!isSupported) {
return (
<div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
Speech Recognition is not supported in your browser.
</div>
);
}
const selectedLanguage = LANGUAGES.find((l) => l.code === selectedLang);
return (
<div className="flex w-full max-w-md flex-col gap-4">
{/* Language Selector */}
<div className="flex items-center gap-2">
<Globe className="text-muted-foreground h-4 w-4" />
<select
value={selectedLang}
onChange={(e) => {
if (isListening) stop();
setSelectedLang(e.target.value);
resetTranscript();
}}
className="border-input bg-background flex-1 rounded-md border px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
>
{LANGUAGES.map((lang) => (
<option key={lang.code} value={lang.code}>
{lang.name}
</option>
))}
</select>
</div>
{/* Controls */}
<div className="flex items-center justify-center gap-3">
<button
onClick={isListening ? stop : start}
className={`flex h-14 w-14 items-center justify-center rounded-full transition-all ${
isListening
? "animate-pulse bg-red-500 text-white"
: "bg-primary text-primary-foreground hover:bg-primary/90"
}`}
>
{isListening ? (
<MicOff className="h-5 w-5" />
) : (
<Mic className="h-5 w-5" />
)}
</button>
<button
onClick={resetTranscript}
className="bg-muted hover:bg-muted/80 flex h-10 w-10 items-center justify-center rounded-full transition-colors"
>
<RotateCcw className="h-4 w-4" />
</button>
</div>
{/* Status */}
<p className="text-muted-foreground text-center text-sm">
{isListening
? `Listening in ${selectedLanguage?.name}...`
: `Ready to listen in ${selectedLanguage?.name}`}
</p>
{/* Transcript */}
<div className="bg-muted/50 min-h-32 rounded-lg p-4">
<div className="mb-2 flex items-center justify-between">
<p className="text-muted-foreground text-xs font-medium uppercase tracking-wide">
Transcript
</p>
<span className="text-muted-foreground rounded bg-gray-200 px-2 py-0.5 text-xs dark:bg-gray-700">
{selectedLang}
</span>
</div>
<p className="text-sm leading-relaxed">
{transcript}
{interimTranscript && (
<span className="text-muted-foreground italic">
{interimTranscript}
</span>
)}
{!transcript && !interimTranscript && (
<span className="text-muted-foreground italic">
Speak in {selectedLanguage?.name}...
</span>
)}
</p>
</div>
</div>
);
};
API Reference
Hook Signature
function useSpeechRecognition(
options?: UseSpeechRecognitionOptions,
): UseSpeechRecognitionReturn;Options
| Property | Type | Default | Description |
|---|---|---|---|
lang | string | "en-US" | BCP 47 language code (e.g., "es-ES", "ja-JP") |
continuous | boolean | false | Keep listening after user stops speaking |
interimResults | boolean | true | Return interim results before final |
maxAlternatives | number | 1 | Number of alternative transcriptions |
onResult | (transcript: string, isFinal: boolean) => void | - | Callback when a result is received |
onError | (error: string) => void | - | Callback when an error occurs |
onEnd | () => void | - | Callback when recognition ends |
Return Value
| Property | Type | Description |
|---|---|---|
transcript | string | The final transcribed text |
interimTranscript | string | In-progress transcription (not finalized) |
isListening | boolean | Whether recognition is active |
isSupported | boolean | Whether the API is supported |
error | SpeechRecognitionErrorCode | null | Error code if recognition failed |
errorMessage | string | null | Human-readable error message |
start | () => void | Start listening |
stop | () => void | Stop listening (waits for final result) |
abort | () => void | Abort immediately (discards results) |
resetTranscript | () => void | Clear the transcript |
Error Codes
| Code | Description |
|---|---|
no-speech | No speech was detected |
aborted | Recognition was aborted |
audio-capture | Microphone not found or access failed |
network | Network error during recognition |
not-allowed | Microphone permission denied |
service-not-allowed | Speech recognition service not allowed |
language-not-supported | The specified language is not supported |
Browser Support
| Browser | Support |
|---|---|
| Chrome | ✅ Full support |
| Edge | ✅ Full support |
| Safari | ✅ Full support (iOS 14.5+) |
| Firefox | ❌ Not supported |
Hook Source Code
import { useState, useEffect, useCallback, useRef } from "react";
/**
* Speech recognition error types
*/
export type SpeechRecognitionErrorCode =
| "no-speech"
| "aborted"
| "audio-capture"
| "network"
| "not-allowed"
| "service-not-allowed"
| "bad-grammar"
| "language-not-supported";
/**
* Options for the useSpeechRecognition hook
*/
export interface UseSpeechRecognitionOptions {
/** Language for recognition (BCP 47 format, e.g., 'en-US', 'es-ES') */
lang?: string;
/** Keep listening after user stops speaking (default: false) */
continuous?: boolean;
/** Return interim results before final transcription (default: true) */
interimResults?: boolean;
/** Maximum number of alternative transcriptions to return (default: 1) */
maxAlternatives?: number;
/** Callback when a final result is received */
onResult?: (transcript: string, isFinal: boolean) => void;
/** Callback when an error occurs */
onError?: (error: SpeechRecognitionErrorCode) => void;
/** Callback when recognition ends */
onEnd?: () => void;
}
/**
* Return type for useSpeechRecognition hook
*/
export interface UseSpeechRecognitionReturn {
/** The final transcribed text */
transcript: string;
/** The interim (in-progress) transcribed text */
interimTranscript: string;
/** Whether speech recognition is currently active */
isListening: boolean;
/** Whether the Speech Recognition API is supported */
isSupported: boolean;
/** The error code if recognition failed */
error: SpeechRecognitionErrorCode | null;
/** Human-readable error message */
errorMessage: string | null;
/** Start listening for speech */
start: () => void;
/** Stop listening gracefully (waits for final result) */
stop: () => void;
/** Abort listening immediately (discards results) */
abort: () => void;
/** Reset the transcript to empty */
resetTranscript: () => void;
}
/**
* Human-readable error messages for speech recognition errors
*/
function getErrorMessage(error: SpeechRecognitionErrorCode): string {
switch (error) {
case "no-speech":
return "No speech was detected. Please try again.";
case "aborted":
return "Speech recognition was aborted.";
case "audio-capture":
return "No microphone was found or microphone access failed.";
case "network":
return "Network error occurred during recognition.";
case "not-allowed":
return "Microphone permission denied. Please allow access in your browser settings.";
case "service-not-allowed":
return "Speech recognition service is not allowed.";
case "bad-grammar":
return "Speech grammar error occurred.";
case "language-not-supported":
return "The specified language is not supported.";
default:
return "An unknown error occurred during speech recognition.";
}
}
// Type declarations for the Web Speech API (not fully typed in TypeScript)
interface SpeechRecognitionEvent extends Event {
resultIndex: number;
results: SpeechRecognitionResultList;
}
interface SpeechRecognitionErrorEvent extends Event {
error: SpeechRecognitionErrorCode;
message: string;
}
interface SpeechRecognitionResultList {
length: number;
item(index: number): SpeechRecognitionResult;
[index: number]: SpeechRecognitionResult;
}
interface SpeechRecognitionResult {
length: number;
item(index: number): SpeechRecognitionAlternative;
[index: number]: SpeechRecognitionAlternative;
isFinal: boolean;
}
interface SpeechRecognitionAlternative {
transcript: string;
confidence: number;
}
interface SpeechRecognitionInstance extends EventTarget {
continuous: boolean;
interimResults: boolean;
lang: string;
maxAlternatives: number;
start(): void;
stop(): void;
abort(): void;
onresult: ((event: SpeechRecognitionEvent) => void) | null;
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
onend: (() => void) | null;
onstart: (() => void) | null;
onspeechend: (() => void) | null;
}
declare global {
interface Window {
SpeechRecognition: new () => SpeechRecognitionInstance;
webkitSpeechRecognition: new () => SpeechRecognitionInstance;
}
}
/**
* A React hook that provides speech-to-text functionality using the
* Web Speech Recognition API.
*
* @param options - Configuration options for the hook
* @returns UseSpeechRecognitionReturn object with transcript and control functions
*
* @example
* ```tsx
* // Basic usage
* const { transcript, isListening, start, stop } = useSpeechRecognition();
*
* // With options
* const { transcript, interimTranscript } = useSpeechRecognition({
* lang: 'es-ES',
* continuous: true,
* interimResults: true
* });
* ```
*/
export function useSpeechRecognition(
options: UseSpeechRecognitionOptions = {},
): UseSpeechRecognitionReturn {
const {
lang = "en-US",
continuous = false,
interimResults = true,
maxAlternatives = 1,
onResult,
onError,
onEnd,
} = options;
const [transcript, setTranscript] = useState("");
const [interimTranscript, setInterimTranscript] = useState("");
const [isListening, setIsListening] = useState(false);
const [error, setError] = useState<SpeechRecognitionErrorCode | null>(null);
const [errorMessage, setErrorMessage] = useState<string | null>(null);
// Use refs for callbacks to avoid re-creating the recognition instance
// when callbacks change (which happens on every render if not memoized)
const onResultRef = useRef(onResult);
const onErrorRef = useRef(onError);
const onEndRef = useRef(onEnd);
const recognitionRef = useRef<SpeechRecognitionInstance | null>(null);
const isManualStopRef = useRef(false);
useEffect(() => {
onResultRef.current = onResult;
onErrorRef.current = onError;
onEndRef.current = onEnd;
}, [onResult, onError, onEnd]);
// Check if API is supported
const isSupported =
typeof window !== "undefined" &&
("SpeechRecognition" in window || "webkitSpeechRecognition" in window);
// Initialize recognition instance
useEffect(() => {
if (!isSupported) return;
const SpeechRecognitionAPI =
window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognitionAPI();
recognition.continuous = continuous;
recognition.interimResults = interimResults;
recognition.lang = lang;
recognition.maxAlternatives = maxAlternatives;
recognition.onstart = () => {
setIsListening(true);
setError(null);
setErrorMessage(null);
};
recognition.onresult = (event: SpeechRecognitionEvent) => {
let finalTranscript = "";
let currentInterim = "";
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
if (!result || !result[0]) continue;
if (result.isFinal) {
finalTranscript += result[0].transcript;
} else {
currentInterim += result[0].transcript;
}
}
if (finalTranscript) {
setTranscript((prev) => prev + finalTranscript);
onResultRef.current?.(finalTranscript, true);
}
setInterimTranscript(currentInterim);
if (currentInterim) {
onResultRef.current?.(currentInterim, false);
}
};
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
const errorCode = event.error;
setError(errorCode);
setErrorMessage(getErrorMessage(errorCode));
setIsListening(false);
onErrorRef.current?.(errorCode);
};
recognition.onend = () => {
setIsListening(false);
setInterimTranscript("");
// Auto-restart if continuous mode and not manually stopped
if (continuous && !isManualStopRef.current && !error) {
try {
recognition.start();
} catch {
// Ignore if already started
}
}
onEndRef.current?.();
};
recognitionRef.current = recognition;
return () => {
recognition.abort();
};
}, [isSupported, lang, continuous, interimResults, maxAlternatives]); // Removed callbacks from dependencies
// Update recognition settings when options change
useEffect(() => {
if (recognitionRef.current) {
recognitionRef.current.lang = lang;
recognitionRef.current.continuous = continuous;
recognitionRef.current.interimResults = interimResults;
recognitionRef.current.maxAlternatives = maxAlternatives;
}
}, [lang, continuous, interimResults, maxAlternatives]);
const start = useCallback(() => {
if (!isSupported || !recognitionRef.current) return;
isManualStopRef.current = false;
setError(null);
setErrorMessage(null);
try {
recognitionRef.current.start();
} catch {
// Ignore if already started - this can happen in continuous mode
}
}, [isSupported]);
const stop = useCallback(() => {
if (!recognitionRef.current) return;
isManualStopRef.current = true;
recognitionRef.current.stop();
}, []);
const abort = useCallback(() => {
if (!recognitionRef.current) return;
isManualStopRef.current = true;
recognitionRef.current.abort();
setInterimTranscript("");
}, []);
const resetTranscript = useCallback(() => {
setTranscript("");
setInterimTranscript("");
}, []);
return {
transcript,
interimTranscript,
isListening,
isSupported,
error,
errorMessage,
start,
stop,
abort,
resetTranscript,
};
}
export default useSpeechRecognition;
useFileUpload
A comprehensive React hook for handling file uploads with drag-and-drop support, file validation, preview generation, and multiple file management.
useSpeechSynthesis
A React hook for text-to-speech functionality using the Web Speech Synthesis API. Read text aloud with customizable voices, rate, pitch, and volume controls.