useSpeechRecognition

A React hook for speech-to-text functionality using the Web Speech Recognition API. Convert voice to text in real-time with support for multiple languages, continuous listening, and interim results.

Installation

npx shadcn@latest add https://r.fiberui.com/r/hooks/use-speech-recognition.json

A React hook that provides speech-to-text functionality using the Web Speech Recognition API. Perfect for voice commands, dictation, accessibility features, and hands-free interfaces.

Source Code

View the full hook implementation in the Hook Source Code section below.

Permission Required

Important: The browser will prompt the user for microphone permission before starting speech recognition. If the user denies permission, the error will be set to not-allowed. Always handle this gracefully.

Related Hook

Need text-to-speech instead? See useSpeechSynthesis.

Features

Real-time Transcription - Get interim results as the user speaks
Continuous Mode - Keep listening after pauses in speech
Multi-language Support - 50+ languages with BCP 47 language codes
Error Handling - Typed errors with human-readable messages
Control Functions - start, stop, abort, and resetTranscript
Callbacks - onResult, onError, onEnd for event-driven workflows
SSR Safe - Gracefully handles server-side rendering

Learn More

MDN: Web Speech API

Documentation on using speech recognition in web browsers

Basic Transcription

The simplest usage - click the microphone to start listening and see your speech transcribed in real-time. Interim results appear in gray while the final transcript is confirmed.

Speech Recognition is not supported in your browser.

Try using Chrome, Edge, or Safari.

"use client";

import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, RotateCcw } from "lucide-react";

/* BASIC USAGE - Voice Transcription */
export const Example1 = () => {
    const {
        transcript,
        interimTranscript,
        isListening,
        isSupported,
        error,
        errorMessage,
        start,
        stop,
        resetTranscript,
    } = useSpeechRecognition();

    if (!isSupported) {
        return (
            <div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
                <p className="font-medium">
                    Speech Recognition is not supported in your browser.
                </p>
                <p className="mt-1 text-sm opacity-80">
                    Try using Chrome, Edge, or Safari.
                </p>
            </div>
        );
    }

    return (
        <div className="flex w-full max-w-md flex-col gap-4">
            {/* Microphone Button */}
            <div className="flex items-center justify-center gap-3">
                <button
                    onClick={isListening ? stop : start}
                    className={`flex h-16 w-16 items-center justify-center rounded-full transition-all ${
                        isListening
                            ? "animate-pulse bg-red-500 text-white"
                            : "bg-primary text-primary-foreground hover:bg-primary/90"
                    }`}
                >
                    {isListening ? (
                        <MicOff className="h-6 w-6" />
                    ) : (
                        <Mic className="h-6 w-6" />
                    )}
                </button>

                <button
                    onClick={resetTranscript}
                    className="bg-muted hover:bg-muted/80 flex h-10 w-10 items-center justify-center rounded-full transition-colors"
                    title="Reset transcript"
                >
                    <RotateCcw className="h-4 w-4" />
                </button>
            </div>

            {/* Status */}
            <p className="text-muted-foreground text-center text-sm">
                {isListening
                    ? "Listening... speak now"
                    : "Click the microphone to start"}
            </p>

            {/* Transcript Display */}
            <div className="bg-muted/50 min-h-32 rounded-lg p-4">
                <p className="text-muted-foreground mb-2 text-xs font-medium uppercase tracking-wide">
                    Transcript
                </p>
                <p className="text-sm leading-relaxed">
                    {transcript}
                    {interimTranscript && (
                        <span className="text-muted-foreground italic">
                            {interimTranscript}
                        </span>
                    )}
                    {!transcript && !interimTranscript && (
                        <span className="text-muted-foreground italic">
                            Your speech will appear here...
                        </span>
                    )}
                </p>
            </div>

            {/* Error Display */}
            {error && (
                <div className="bg-destructive/10 text-destructive rounded-lg p-3 text-sm">
                    {errorMessage}
                </div>
            )}
        </div>
    );
};

Voice Commands

Use speech recognition to execute actions based on voice commands. This example demonstrates a voice-controlled task manager where you can add, remove, and clear tasks by speaking.

Speech Recognition is not supported in your browser.

"use client";

import { useState } from "react";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, ListChecks, Trash2 } from "lucide-react";

/* VOICE COMMANDS - Execute Actions from Speech */
export const Example2 = () => {
    const [tasks, setTasks] = useState<string[]>([
        "Review PR #42",
        "Update documentation",
    ]);
    const [lastCommand, setLastCommand] = useState<string | null>(null);

    const { isListening, isSupported, start, stop, resetTranscript } =
        useSpeechRecognition({
            continuous: true,
            onResult: (text, isFinal) => {
                if (!isFinal) return;

                const command = text.toLowerCase().trim();
                setLastCommand(command);

                // Add task command
                if (
                    command.startsWith("add task ") ||
                    command.startsWith("add ")
                ) {
                    const taskName = command.replace(/^add( task)?\s+/i, "");
                    if (taskName) {
                        setTasks((prev) => [...prev, taskName]);
                    }
                }
                // Remove task command
                else if (
                    command.startsWith("remove task ") ||
                    command.startsWith("delete ")
                ) {
                    const taskName = command.replace(
                        /^(remove task|delete)\s+/i,
                        "",
                    );
                    setTasks((prev) =>
                        prev.filter((t) => !t.toLowerCase().includes(taskName)),
                    );
                }
                // Clear all command
                else if (command === "clear all" || command === "delete all") {
                    setTasks([]);
                }
            },
        });

    if (!isSupported) {
        return (
            <div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
                Speech Recognition is not supported in your browser.
            </div>
        );
    }

    return (
        <div className="flex w-full max-w-md flex-col gap-4">
            {/* Header with mic button */}
            <div className="flex items-center justify-between">
                <div className="flex items-center gap-2">
                    <ListChecks className="text-primary h-5 w-5" />
                    <h3 className="font-semibold">Voice Task Manager</h3>
                </div>
                <button
                    onClick={() => {
                        if (isListening) {
                            stop();
                        } else {
                            resetTranscript();
                            start();
                        }
                    }}
                    className={`flex h-10 w-10 items-center justify-center rounded-full transition-all ${
                        isListening
                            ? "animate-pulse bg-red-500 text-white"
                            : "bg-primary text-primary-foreground"
                    }`}
                >
                    {isListening ? (
                        <MicOff className="h-4 w-4" />
                    ) : (
                        <Mic className="h-4 w-4" />
                    )}
                </button>
            </div>

            {/* Commands hint */}
            <div className="bg-muted/50 rounded-lg p-3 text-xs">
                <p className="text-muted-foreground mb-1 font-medium">
                    Try saying:
                </p>
                <ul className="text-muted-foreground space-y-0.5">
                    <li>&quot;Add task buy groceries&quot;</li>
                    <li>&quot;Delete groceries&quot;</li>
                    <li>&quot;Clear all&quot;</li>
                </ul>
            </div>

            {/* Last command */}
            {lastCommand && (
                <p className="text-muted-foreground text-xs">
                    Last command:{" "}
                    <span className="text-foreground font-mono">
                        &quot;{lastCommand}&quot;
                    </span>
                </p>
            )}

            {/* Task list */}
            <div className="bg-background rounded-lg border">
                {tasks.length === 0 ? (
                    <p className="text-muted-foreground p-4 text-center text-sm italic">
                        No tasks. Say &quot;Add task...&quot; to create one.
                    </p>
                ) : (
                    <ul className="divide-y">
                        {tasks.map((task, index) => (
                            <li
                                key={index}
                                className="flex items-center justify-between p-3"
                            >
                                <span className="text-sm">{task}</span>
                                <button
                                    onClick={() =>
                                        setTasks((prev) =>
                                            prev.filter((_, i) => i !== index),
                                        )
                                    }
                                    className="text-muted-foreground hover:text-destructive"
                                >
                                    <Trash2 className="h-4 w-4" />
                                </button>
                            </li>
                        ))}
                    </ul>
                )}
            </div>
        </div>
    );
};

Multi-language Support

Speech recognition supports 50+ languages. Select a language and speak in that language to see accurate transcription. The recognition engine automatically adjusts to the selected language's phonetics and vocabulary.

Speech Recognition is not supported in your browser.

"use client";

import { useState } from "react";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, Globe, RotateCcw } from "lucide-react";

const LANGUAGES = [
    { code: "en-US", name: "English (US)" },
    { code: "en-GB", name: "English (UK)" },
    { code: "es-ES", name: "Spanish (Spain)" },
    { code: "es-MX", name: "Spanish (Mexico)" },
    { code: "fr-FR", name: "French" },
    { code: "de-DE", name: "German" },
    { code: "it-IT", name: "Italian" },
    { code: "pt-BR", name: "Portuguese (Brazil)" },
    { code: "ja-JP", name: "Japanese" },
    { code: "ko-KR", name: "Korean" },
    { code: "zh-CN", name: "Chinese (Simplified)" },
    { code: "hi-IN", name: "Hindi" },
    { code: "ar-SA", name: "Arabic" },
    { code: "ru-RU", name: "Russian" },
];

/* LANGUAGE SELECTOR - Multi-language Support */
export const Example3 = () => {
    const [selectedLang, setSelectedLang] = useState("en-US");

    const {
        transcript,
        interimTranscript,
        isListening,
        isSupported,
        start,
        stop,
        resetTranscript,
    } = useSpeechRecognition({
        lang: selectedLang,
        continuous: true,
        interimResults: true,
    });

    if (!isSupported) {
        return (
            <div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
                Speech Recognition is not supported in your browser.
            </div>
        );
    }

    const selectedLanguage = LANGUAGES.find((l) => l.code === selectedLang);

    return (
        <div className="flex w-full max-w-md flex-col gap-4">
            {/* Language Selector */}
            <div className="flex items-center gap-2">
                <Globe className="text-muted-foreground h-4 w-4" />
                <select
                    value={selectedLang}
                    onChange={(e) => {
                        if (isListening) stop();
                        setSelectedLang(e.target.value);
                        resetTranscript();
                    }}
                    className="border-input bg-background flex-1 rounded-md border px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
                >
                    {LANGUAGES.map((lang) => (
                        <option key={lang.code} value={lang.code}>
                            {lang.name}
                        </option>
                    ))}
                </select>
            </div>

            {/* Controls */}
            <div className="flex items-center justify-center gap-3">
                <button
                    onClick={isListening ? stop : start}
                    className={`flex h-14 w-14 items-center justify-center rounded-full transition-all ${
                        isListening
                            ? "animate-pulse bg-red-500 text-white"
                            : "bg-primary text-primary-foreground hover:bg-primary/90"
                    }`}
                >
                    {isListening ? (
                        <MicOff className="h-5 w-5" />
                    ) : (
                        <Mic className="h-5 w-5" />
                    )}
                </button>

                <button
                    onClick={resetTranscript}
                    className="bg-muted hover:bg-muted/80 flex h-10 w-10 items-center justify-center rounded-full transition-colors"
                >
                    <RotateCcw className="h-4 w-4" />
                </button>
            </div>

            {/* Status */}
            <p className="text-muted-foreground text-center text-sm">
                {isListening
                    ? `Listening in ${selectedLanguage?.name}...`
                    : `Ready to listen in ${selectedLanguage?.name}`}
            </p>

            {/* Transcript */}
            <div className="bg-muted/50 min-h-32 rounded-lg p-4">
                <div className="mb-2 flex items-center justify-between">
                    <p className="text-muted-foreground text-xs font-medium uppercase tracking-wide">
                        Transcript
                    </p>
                    <span className="text-muted-foreground rounded bg-gray-200 px-2 py-0.5 text-xs dark:bg-gray-700">
                        {selectedLang}
                    </span>
                </div>
                <p className="text-sm leading-relaxed">
                    {transcript}
                    {interimTranscript && (
                        <span className="text-muted-foreground italic">
                            {interimTranscript}
                        </span>
                    )}
                    {!transcript && !interimTranscript && (
                        <span className="text-muted-foreground italic">
                            Speak in {selectedLanguage?.name}...
                        </span>
                    )}
                </p>
            </div>
        </div>
    );
};

API Reference

Hook Signature

function useSpeechRecognition(
    options?: UseSpeechRecognitionOptions,
): UseSpeechRecognitionReturn;

Options

Property	Type	Default	Description
`lang`	`string`	`"en-US"`	BCP 47 language code (e.g., `"es-ES"`, `"ja-JP"`)
`continuous`	`boolean`	`false`	Keep listening after user stops speaking
`interimResults`	`boolean`	`true`	Return interim results before final
`maxAlternatives`	`number`	`1`	Number of alternative transcriptions
`onResult`	`(transcript: string, isFinal: boolean) => void`	-	Callback when a result is received
`onError`	`(error: string) => void`	-	Callback when an error occurs
`onEnd`	`() => void`	-	Callback when recognition ends

Return Value

Property	Type	Description
`transcript`	`string`	The final transcribed text
`interimTranscript`	`string`	In-progress transcription (not finalized)
`isListening`	`boolean`	Whether recognition is active
`isSupported`	`boolean`	Whether the API is supported
`error`	`SpeechRecognitionErrorCode \| null`	Error code if recognition failed
`errorMessage`	`string \| null`	Human-readable error message
`start`	`() => void`	Start listening
`stop`	`() => void`	Stop listening (waits for final result)
`abort`	`() => void`	Abort immediately (discards results)
`resetTranscript`	`() => void`	Clear the transcript

Error Codes

Code	Description
`no-speech`	No speech was detected
`aborted`	Recognition was aborted
`audio-capture`	Microphone not found or access failed
`network`	Network error during recognition
`not-allowed`	Microphone permission denied
`service-not-allowed`	Speech recognition service not allowed
`language-not-supported`	The specified language is not supported

Browser Support

Browser	Support
Chrome	✅ Full support
Edge	✅ Full support
Safari	✅ Full support (iOS 14.5+)
Firefox	❌ Not supported

Hook Source Code

import { useState, useEffect, useCallback, useRef } from "react";

/**
 * Speech recognition error types
 */
export type SpeechRecognitionErrorCode =
    | "no-speech"
    | "aborted"
    | "audio-capture"
    | "network"
    | "not-allowed"
    | "service-not-allowed"
    | "bad-grammar"
    | "language-not-supported";

/**
 * Options for the useSpeechRecognition hook
 */
export interface UseSpeechRecognitionOptions {
    /** Language for recognition (BCP 47 format, e.g., 'en-US', 'es-ES') */
    lang?: string;
    /** Keep listening after user stops speaking (default: false) */
    continuous?: boolean;
    /** Return interim results before final transcription (default: true) */
    interimResults?: boolean;
    /** Maximum number of alternative transcriptions to return (default: 1) */
    maxAlternatives?: number;
    /** Callback when a final result is received */
    onResult?: (transcript: string, isFinal: boolean) => void;
    /** Callback when an error occurs */
    onError?: (error: SpeechRecognitionErrorCode) => void;
    /** Callback when recognition ends */
    onEnd?: () => void;
}

/**
 * Return type for useSpeechRecognition hook
 */
export interface UseSpeechRecognitionReturn {
    /** The final transcribed text */
    transcript: string;
    /** The interim (in-progress) transcribed text */
    interimTranscript: string;
    /** Whether speech recognition is currently active */
    isListening: boolean;
    /** Whether the Speech Recognition API is supported */
    isSupported: boolean;
    /** The error code if recognition failed */
    error: SpeechRecognitionErrorCode | null;
    /** Human-readable error message */
    errorMessage: string | null;
    /** Start listening for speech */
    start: () => void;
    /** Stop listening gracefully (waits for final result) */
    stop: () => void;
    /** Abort listening immediately (discards results) */
    abort: () => void;
    /** Reset the transcript to empty */
    resetTranscript: () => void;
}

/**
 * Human-readable error messages for speech recognition errors
 */
function getErrorMessage(error: SpeechRecognitionErrorCode): string {
    switch (error) {
        case "no-speech":
            return "No speech was detected. Please try again.";
        case "aborted":
            return "Speech recognition was aborted.";
        case "audio-capture":
            return "No microphone was found or microphone access failed.";
        case "network":
            return "Network error occurred during recognition.";
        case "not-allowed":
            return "Microphone permission denied. Please allow access in your browser settings.";
        case "service-not-allowed":
            return "Speech recognition service is not allowed.";
        case "bad-grammar":
            return "Speech grammar error occurred.";
        case "language-not-supported":
            return "The specified language is not supported.";
        default:
            return "An unknown error occurred during speech recognition.";
    }
}

// Type declarations for the Web Speech API (not fully typed in TypeScript)
interface SpeechRecognitionEvent extends Event {
    resultIndex: number;
    results: SpeechRecognitionResultList;
}

interface SpeechRecognitionErrorEvent extends Event {
    error: SpeechRecognitionErrorCode;
    message: string;
}

interface SpeechRecognitionResultList {
    length: number;
    item(index: number): SpeechRecognitionResult;
    [index: number]: SpeechRecognitionResult;
}

interface SpeechRecognitionResult {
    length: number;
    item(index: number): SpeechRecognitionAlternative;
    [index: number]: SpeechRecognitionAlternative;
    isFinal: boolean;
}

interface SpeechRecognitionAlternative {
    transcript: string;
    confidence: number;
}

interface SpeechRecognitionInstance extends EventTarget {
    continuous: boolean;
    interimResults: boolean;
    lang: string;
    maxAlternatives: number;
    start(): void;
    stop(): void;
    abort(): void;
    onresult: ((event: SpeechRecognitionEvent) => void) | null;
    onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
    onend: (() => void) | null;
    onstart: (() => void) | null;
    onspeechend: (() => void) | null;
}

declare global {
    interface Window {
        SpeechRecognition: new () => SpeechRecognitionInstance;
        webkitSpeechRecognition: new () => SpeechRecognitionInstance;
    }
}

/**
 * A React hook that provides speech-to-text functionality using the
 * Web Speech Recognition API.
 *
 * @param options - Configuration options for the hook
 * @returns UseSpeechRecognitionReturn object with transcript and control functions
 *
 * @example
 * ```tsx
 * // Basic usage
 * const { transcript, isListening, start, stop } = useSpeechRecognition();
 *
 * // With options
 * const { transcript, interimTranscript } = useSpeechRecognition({
 *     lang: 'es-ES',
 *     continuous: true,
 *     interimResults: true
 * });
 * ```
 */
export function useSpeechRecognition(
    options: UseSpeechRecognitionOptions = {},
): UseSpeechRecognitionReturn {
    const {
        lang = "en-US",
        continuous = false,
        interimResults = true,
        maxAlternatives = 1,
        onResult,
        onError,
        onEnd,
    } = options;

    const [transcript, setTranscript] = useState("");
    const [interimTranscript, setInterimTranscript] = useState("");
    const [isListening, setIsListening] = useState(false);
    const [error, setError] = useState<SpeechRecognitionErrorCode | null>(null);
    const [errorMessage, setErrorMessage] = useState<string | null>(null);

    // Use refs for callbacks to avoid re-creating the recognition instance
    // when callbacks change (which happens on every render if not memoized)
    const onResultRef = useRef(onResult);
    const onErrorRef = useRef(onError);
    const onEndRef = useRef(onEnd);

    const recognitionRef = useRef<SpeechRecognitionInstance | null>(null);
    const isManualStopRef = useRef(false);

    useEffect(() => {
        onResultRef.current = onResult;
        onErrorRef.current = onError;
        onEndRef.current = onEnd;
    }, [onResult, onError, onEnd]);

    // Check if API is supported
    const isSupported =
        typeof window !== "undefined" &&
        ("SpeechRecognition" in window || "webkitSpeechRecognition" in window);

    // Initialize recognition instance
    useEffect(() => {
        if (!isSupported) return;

        const SpeechRecognitionAPI =
            window.SpeechRecognition || window.webkitSpeechRecognition;
        const recognition = new SpeechRecognitionAPI();

        recognition.continuous = continuous;
        recognition.interimResults = interimResults;
        recognition.lang = lang;
        recognition.maxAlternatives = maxAlternatives;

        recognition.onstart = () => {
            setIsListening(true);
            setError(null);
            setErrorMessage(null);
        };

        recognition.onresult = (event: SpeechRecognitionEvent) => {
            let finalTranscript = "";
            let currentInterim = "";

            for (let i = event.resultIndex; i < event.results.length; i++) {
                const result = event.results[i];
                if (!result || !result[0]) continue;

                if (result.isFinal) {
                    finalTranscript += result[0].transcript;
                } else {
                    currentInterim += result[0].transcript;
                }
            }

            if (finalTranscript) {
                setTranscript((prev) => prev + finalTranscript);
                onResultRef.current?.(finalTranscript, true);
            }

            setInterimTranscript(currentInterim);
            if (currentInterim) {
                onResultRef.current?.(currentInterim, false);
            }
        };

        recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
            const errorCode = event.error;
            setError(errorCode);
            setErrorMessage(getErrorMessage(errorCode));
            setIsListening(false);
            onErrorRef.current?.(errorCode);
        };

        recognition.onend = () => {
            setIsListening(false);
            setInterimTranscript("");

            // Auto-restart if continuous mode and not manually stopped
            if (continuous && !isManualStopRef.current && !error) {
                try {
                    recognition.start();
                } catch {
                    // Ignore if already started
                }
            }

            onEndRef.current?.();
        };

        recognitionRef.current = recognition;

        return () => {
            recognition.abort();
        };
    }, [isSupported, lang, continuous, interimResults, maxAlternatives]); // Removed callbacks from dependencies

    // Update recognition settings when options change
    useEffect(() => {
        if (recognitionRef.current) {
            recognitionRef.current.lang = lang;
            recognitionRef.current.continuous = continuous;
            recognitionRef.current.interimResults = interimResults;
            recognitionRef.current.maxAlternatives = maxAlternatives;
        }
    }, [lang, continuous, interimResults, maxAlternatives]);

    const start = useCallback(() => {
        if (!isSupported || !recognitionRef.current) return;

        isManualStopRef.current = false;
        setError(null);
        setErrorMessage(null);

        try {
            recognitionRef.current.start();
        } catch {
            // Ignore if already started - this can happen in continuous mode
        }
    }, [isSupported]);

    const stop = useCallback(() => {
        if (!recognitionRef.current) return;

        isManualStopRef.current = true;
        recognitionRef.current.stop();
    }, []);

    const abort = useCallback(() => {
        if (!recognitionRef.current) return;

        isManualStopRef.current = true;
        recognitionRef.current.abort();
        setInterimTranscript("");
    }, []);

    const resetTranscript = useCallback(() => {
        setTranscript("");
        setInterimTranscript("");
    }, []);

    return {
        transcript,
        interimTranscript,
        isListening,
        isSupported,
        error,
        errorMessage,
        start,
        stop,
        abort,
        resetTranscript,
    };
}

export default useSpeechRecognition;

useSpeechRecognition

MDN: Web Speech API

On this page