Fiber UI LogoFiberUI

useSpeechRecognition

A React hook for speech-to-text functionality using the Web Speech Recognition API. Convert voice to text in real-time with support for multiple languages, continuous listening, and interim results.

Installation

npx shadcn@latest add https://r.fiberui.com/r/hooks/use-speech-recognition.json

A React hook that provides speech-to-text functionality using the Web Speech Recognition API. Perfect for voice commands, dictation, accessibility features, and hands-free interfaces.

Source Code

View the full hook implementation in the Hook Source Code section below.

Permission Required

Important: The browser will prompt the user for microphone permission before starting speech recognition. If the user denies permission, the error will be set to not-allowed. Always handle this gracefully.

Related Hook

Need text-to-speech instead? See useSpeechSynthesis.

Features

  • Real-time Transcription - Get interim results as the user speaks
  • Continuous Mode - Keep listening after pauses in speech
  • Multi-language Support - 50+ languages with BCP 47 language codes
  • Error Handling - Typed errors with human-readable messages
  • Control Functions - start, stop, abort, and resetTranscript
  • Callbacks - onResult, onError, onEnd for event-driven workflows
  • SSR Safe - Gracefully handles server-side rendering

Learn More


Basic Transcription

The simplest usage - click the microphone to start listening and see your speech transcribed in real-time. Interim results appear in gray while the final transcript is confirmed.

Speech Recognition is not supported in your browser.

Try using Chrome, Edge, or Safari.

"use client";

import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, RotateCcw } from "lucide-react";

/* BASIC USAGE - Voice Transcription */
export const Example1 = () => {
    const {
        transcript,
        interimTranscript,
        isListening,
        isSupported,
        error,
        errorMessage,
        start,
        stop,
        resetTranscript,
    } = useSpeechRecognition();

    if (!isSupported) {
        return (
            <div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
                <p className="font-medium">
                    Speech Recognition is not supported in your browser.
                </p>
                <p className="mt-1 text-sm opacity-80">
                    Try using Chrome, Edge, or Safari.
                </p>
            </div>
        );
    }

    return (
        <div className="flex w-full max-w-md flex-col gap-4">
            {/* Microphone Button */}
            <div className="flex items-center justify-center gap-3">
                <button
                    onClick={isListening ? stop : start}
                    className={`flex h-16 w-16 items-center justify-center rounded-full transition-all ${
                        isListening
                            ? "animate-pulse bg-red-500 text-white"
                            : "bg-primary text-primary-foreground hover:bg-primary/90"
                    }`}
                >
                    {isListening ? (
                        <MicOff className="h-6 w-6" />
                    ) : (
                        <Mic className="h-6 w-6" />
                    )}
                </button>

                <button
                    onClick={resetTranscript}
                    className="bg-muted hover:bg-muted/80 flex h-10 w-10 items-center justify-center rounded-full transition-colors"
                    title="Reset transcript"
                >
                    <RotateCcw className="h-4 w-4" />
                </button>
            </div>

            {/* Status */}
            <p className="text-muted-foreground text-center text-sm">
                {isListening
                    ? "Listening... speak now"
                    : "Click the microphone to start"}
            </p>

            {/* Transcript Display */}
            <div className="bg-muted/50 min-h-32 rounded-lg p-4">
                <p className="text-muted-foreground mb-2 text-xs font-medium uppercase tracking-wide">
                    Transcript
                </p>
                <p className="text-sm leading-relaxed">
                    {transcript}
                    {interimTranscript && (
                        <span className="text-muted-foreground italic">
                            {interimTranscript}
                        </span>
                    )}
                    {!transcript && !interimTranscript && (
                        <span className="text-muted-foreground italic">
                            Your speech will appear here...
                        </span>
                    )}
                </p>
            </div>

            {/* Error Display */}
            {error && (
                <div className="bg-destructive/10 text-destructive rounded-lg p-3 text-sm">
                    {errorMessage}
                </div>
            )}
        </div>
    );
};

Voice Commands

Use speech recognition to execute actions based on voice commands. This example demonstrates a voice-controlled task manager where you can add, remove, and clear tasks by speaking.

Speech Recognition is not supported in your browser.
"use client";

import { useState } from "react";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, ListChecks, Trash2 } from "lucide-react";

/* VOICE COMMANDS - Execute Actions from Speech */
export const Example2 = () => {
    const [tasks, setTasks] = useState<string[]>([
        "Review PR #42",
        "Update documentation",
    ]);
    const [lastCommand, setLastCommand] = useState<string | null>(null);

    const { isListening, isSupported, start, stop, resetTranscript } =
        useSpeechRecognition({
            continuous: true,
            onResult: (text, isFinal) => {
                if (!isFinal) return;

                const command = text.toLowerCase().trim();
                setLastCommand(command);

                // Add task command
                if (
                    command.startsWith("add task ") ||
                    command.startsWith("add ")
                ) {
                    const taskName = command.replace(/^add( task)?\s+/i, "");
                    if (taskName) {
                        setTasks((prev) => [...prev, taskName]);
                    }
                }
                // Remove task command
                else if (
                    command.startsWith("remove task ") ||
                    command.startsWith("delete ")
                ) {
                    const taskName = command.replace(
                        /^(remove task|delete)\s+/i,
                        "",
                    );
                    setTasks((prev) =>
                        prev.filter((t) => !t.toLowerCase().includes(taskName)),
                    );
                }
                // Clear all command
                else if (command === "clear all" || command === "delete all") {
                    setTasks([]);
                }
            },
        });

    if (!isSupported) {
        return (
            <div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
                Speech Recognition is not supported in your browser.
            </div>
        );
    }

    return (
        <div className="flex w-full max-w-md flex-col gap-4">
            {/* Header with mic button */}
            <div className="flex items-center justify-between">
                <div className="flex items-center gap-2">
                    <ListChecks className="text-primary h-5 w-5" />
                    <h3 className="font-semibold">Voice Task Manager</h3>
                </div>
                <button
                    onClick={() => {
                        if (isListening) {
                            stop();
                        } else {
                            resetTranscript();
                            start();
                        }
                    }}
                    className={`flex h-10 w-10 items-center justify-center rounded-full transition-all ${
                        isListening
                            ? "animate-pulse bg-red-500 text-white"
                            : "bg-primary text-primary-foreground"
                    }`}
                >
                    {isListening ? (
                        <MicOff className="h-4 w-4" />
                    ) : (
                        <Mic className="h-4 w-4" />
                    )}
                </button>
            </div>

            {/* Commands hint */}
            <div className="bg-muted/50 rounded-lg p-3 text-xs">
                <p className="text-muted-foreground mb-1 font-medium">
                    Try saying:
                </p>
                <ul className="text-muted-foreground space-y-0.5">
                    <li>&quot;Add task buy groceries&quot;</li>
                    <li>&quot;Delete groceries&quot;</li>
                    <li>&quot;Clear all&quot;</li>
                </ul>
            </div>

            {/* Last command */}
            {lastCommand && (
                <p className="text-muted-foreground text-xs">
                    Last command:{" "}
                    <span className="text-foreground font-mono">
                        &quot;{lastCommand}&quot;
                    </span>
                </p>
            )}

            {/* Task list */}
            <div className="bg-background rounded-lg border">
                {tasks.length === 0 ? (
                    <p className="text-muted-foreground p-4 text-center text-sm italic">
                        No tasks. Say &quot;Add task...&quot; to create one.
                    </p>
                ) : (
                    <ul className="divide-y">
                        {tasks.map((task, index) => (
                            <li
                                key={index}
                                className="flex items-center justify-between p-3"
                            >
                                <span className="text-sm">{task}</span>
                                <button
                                    onClick={() =>
                                        setTasks((prev) =>
                                            prev.filter((_, i) => i !== index),
                                        )
                                    }
                                    className="text-muted-foreground hover:text-destructive"
                                >
                                    <Trash2 className="h-4 w-4" />
                                </button>
                            </li>
                        ))}
                    </ul>
                )}
            </div>
        </div>
    );
};

Multi-language Support

Speech recognition supports 50+ languages. Select a language and speak in that language to see accurate transcription. The recognition engine automatically adjusts to the selected language's phonetics and vocabulary.

Speech Recognition is not supported in your browser.
"use client";

import { useState } from "react";
import { useSpeechRecognition } from "@repo/hooks/speech/use-speech-recognition";
import { Mic, MicOff, Globe, RotateCcw } from "lucide-react";

const LANGUAGES = [
    { code: "en-US", name: "English (US)" },
    { code: "en-GB", name: "English (UK)" },
    { code: "es-ES", name: "Spanish (Spain)" },
    { code: "es-MX", name: "Spanish (Mexico)" },
    { code: "fr-FR", name: "French" },
    { code: "de-DE", name: "German" },
    { code: "it-IT", name: "Italian" },
    { code: "pt-BR", name: "Portuguese (Brazil)" },
    { code: "ja-JP", name: "Japanese" },
    { code: "ko-KR", name: "Korean" },
    { code: "zh-CN", name: "Chinese (Simplified)" },
    { code: "hi-IN", name: "Hindi" },
    { code: "ar-SA", name: "Arabic" },
    { code: "ru-RU", name: "Russian" },
];

/* LANGUAGE SELECTOR - Multi-language Support */
export const Example3 = () => {
    const [selectedLang, setSelectedLang] = useState("en-US");

    const {
        transcript,
        interimTranscript,
        isListening,
        isSupported,
        start,
        stop,
        resetTranscript,
    } = useSpeechRecognition({
        lang: selectedLang,
        continuous: true,
        interimResults: true,
    });

    if (!isSupported) {
        return (
            <div className="bg-destructive/10 text-destructive rounded-lg p-4 text-center">
                Speech Recognition is not supported in your browser.
            </div>
        );
    }

    const selectedLanguage = LANGUAGES.find((l) => l.code === selectedLang);

    return (
        <div className="flex w-full max-w-md flex-col gap-4">
            {/* Language Selector */}
            <div className="flex items-center gap-2">
                <Globe className="text-muted-foreground h-4 w-4" />
                <select
                    value={selectedLang}
                    onChange={(e) => {
                        if (isListening) stop();
                        setSelectedLang(e.target.value);
                        resetTranscript();
                    }}
                    className="border-input bg-background flex-1 rounded-md border px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
                >
                    {LANGUAGES.map((lang) => (
                        <option key={lang.code} value={lang.code}>
                            {lang.name}
                        </option>
                    ))}
                </select>
            </div>

            {/* Controls */}
            <div className="flex items-center justify-center gap-3">
                <button
                    onClick={isListening ? stop : start}
                    className={`flex h-14 w-14 items-center justify-center rounded-full transition-all ${
                        isListening
                            ? "animate-pulse bg-red-500 text-white"
                            : "bg-primary text-primary-foreground hover:bg-primary/90"
                    }`}
                >
                    {isListening ? (
                        <MicOff className="h-5 w-5" />
                    ) : (
                        <Mic className="h-5 w-5" />
                    )}
                </button>

                <button
                    onClick={resetTranscript}
                    className="bg-muted hover:bg-muted/80 flex h-10 w-10 items-center justify-center rounded-full transition-colors"
                >
                    <RotateCcw className="h-4 w-4" />
                </button>
            </div>

            {/* Status */}
            <p className="text-muted-foreground text-center text-sm">
                {isListening
                    ? `Listening in ${selectedLanguage?.name}...`
                    : `Ready to listen in ${selectedLanguage?.name}`}
            </p>

            {/* Transcript */}
            <div className="bg-muted/50 min-h-32 rounded-lg p-4">
                <div className="mb-2 flex items-center justify-between">
                    <p className="text-muted-foreground text-xs font-medium uppercase tracking-wide">
                        Transcript
                    </p>
                    <span className="text-muted-foreground rounded bg-gray-200 px-2 py-0.5 text-xs dark:bg-gray-700">
                        {selectedLang}
                    </span>
                </div>
                <p className="text-sm leading-relaxed">
                    {transcript}
                    {interimTranscript && (
                        <span className="text-muted-foreground italic">
                            {interimTranscript}
                        </span>
                    )}
                    {!transcript && !interimTranscript && (
                        <span className="text-muted-foreground italic">
                            Speak in {selectedLanguage?.name}...
                        </span>
                    )}
                </p>
            </div>
        </div>
    );
};

API Reference

Hook Signature

function useSpeechRecognition(
    options?: UseSpeechRecognitionOptions,
): UseSpeechRecognitionReturn;

Options

PropertyTypeDefaultDescription
langstring"en-US"BCP 47 language code (e.g., "es-ES", "ja-JP")
continuousbooleanfalseKeep listening after user stops speaking
interimResultsbooleantrueReturn interim results before final
maxAlternativesnumber1Number of alternative transcriptions
onResult(transcript: string, isFinal: boolean) => void-Callback when a result is received
onError(error: string) => void-Callback when an error occurs
onEnd() => void-Callback when recognition ends

Return Value

PropertyTypeDescription
transcriptstringThe final transcribed text
interimTranscriptstringIn-progress transcription (not finalized)
isListeningbooleanWhether recognition is active
isSupportedbooleanWhether the API is supported
errorSpeechRecognitionErrorCode | nullError code if recognition failed
errorMessagestring | nullHuman-readable error message
start() => voidStart listening
stop() => voidStop listening (waits for final result)
abort() => voidAbort immediately (discards results)
resetTranscript() => voidClear the transcript

Error Codes

CodeDescription
no-speechNo speech was detected
abortedRecognition was aborted
audio-captureMicrophone not found or access failed
networkNetwork error during recognition
not-allowedMicrophone permission denied
service-not-allowedSpeech recognition service not allowed
language-not-supportedThe specified language is not supported

Browser Support

BrowserSupport
Chrome✅ Full support
Edge✅ Full support
Safari✅ Full support (iOS 14.5+)
Firefox❌ Not supported

Hook Source Code

import { useState, useEffect, useCallback, useRef } from "react";

/**
 * Speech recognition error types
 */
export type SpeechRecognitionErrorCode =
    | "no-speech"
    | "aborted"
    | "audio-capture"
    | "network"
    | "not-allowed"
    | "service-not-allowed"
    | "bad-grammar"
    | "language-not-supported";

/**
 * Options for the useSpeechRecognition hook
 */
export interface UseSpeechRecognitionOptions {
    /** Language for recognition (BCP 47 format, e.g., 'en-US', 'es-ES') */
    lang?: string;
    /** Keep listening after user stops speaking (default: false) */
    continuous?: boolean;
    /** Return interim results before final transcription (default: true) */
    interimResults?: boolean;
    /** Maximum number of alternative transcriptions to return (default: 1) */
    maxAlternatives?: number;
    /** Callback when a final result is received */
    onResult?: (transcript: string, isFinal: boolean) => void;
    /** Callback when an error occurs */
    onError?: (error: SpeechRecognitionErrorCode) => void;
    /** Callback when recognition ends */
    onEnd?: () => void;
}

/**
 * Return type for useSpeechRecognition hook
 */
export interface UseSpeechRecognitionReturn {
    /** The final transcribed text */
    transcript: string;
    /** The interim (in-progress) transcribed text */
    interimTranscript: string;
    /** Whether speech recognition is currently active */
    isListening: boolean;
    /** Whether the Speech Recognition API is supported */
    isSupported: boolean;
    /** The error code if recognition failed */
    error: SpeechRecognitionErrorCode | null;
    /** Human-readable error message */
    errorMessage: string | null;
    /** Start listening for speech */
    start: () => void;
    /** Stop listening gracefully (waits for final result) */
    stop: () => void;
    /** Abort listening immediately (discards results) */
    abort: () => void;
    /** Reset the transcript to empty */
    resetTranscript: () => void;
}

/**
 * Human-readable error messages for speech recognition errors
 */
function getErrorMessage(error: SpeechRecognitionErrorCode): string {
    switch (error) {
        case "no-speech":
            return "No speech was detected. Please try again.";
        case "aborted":
            return "Speech recognition was aborted.";
        case "audio-capture":
            return "No microphone was found or microphone access failed.";
        case "network":
            return "Network error occurred during recognition.";
        case "not-allowed":
            return "Microphone permission denied. Please allow access in your browser settings.";
        case "service-not-allowed":
            return "Speech recognition service is not allowed.";
        case "bad-grammar":
            return "Speech grammar error occurred.";
        case "language-not-supported":
            return "The specified language is not supported.";
        default:
            return "An unknown error occurred during speech recognition.";
    }
}

// Type declarations for the Web Speech API (not fully typed in TypeScript)
interface SpeechRecognitionEvent extends Event {
    resultIndex: number;
    results: SpeechRecognitionResultList;
}

interface SpeechRecognitionErrorEvent extends Event {
    error: SpeechRecognitionErrorCode;
    message: string;
}

interface SpeechRecognitionResultList {
    length: number;
    item(index: number): SpeechRecognitionResult;
    [index: number]: SpeechRecognitionResult;
}

interface SpeechRecognitionResult {
    length: number;
    item(index: number): SpeechRecognitionAlternative;
    [index: number]: SpeechRecognitionAlternative;
    isFinal: boolean;
}

interface SpeechRecognitionAlternative {
    transcript: string;
    confidence: number;
}

interface SpeechRecognitionInstance extends EventTarget {
    continuous: boolean;
    interimResults: boolean;
    lang: string;
    maxAlternatives: number;
    start(): void;
    stop(): void;
    abort(): void;
    onresult: ((event: SpeechRecognitionEvent) => void) | null;
    onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
    onend: (() => void) | null;
    onstart: (() => void) | null;
    onspeechend: (() => void) | null;
}

declare global {
    interface Window {
        SpeechRecognition: new () => SpeechRecognitionInstance;
        webkitSpeechRecognition: new () => SpeechRecognitionInstance;
    }
}

/**
 * A React hook that provides speech-to-text functionality using the
 * Web Speech Recognition API.
 *
 * @param options - Configuration options for the hook
 * @returns UseSpeechRecognitionReturn object with transcript and control functions
 *
 * @example
 * ```tsx
 * // Basic usage
 * const { transcript, isListening, start, stop } = useSpeechRecognition();
 *
 * // With options
 * const { transcript, interimTranscript } = useSpeechRecognition({
 *     lang: 'es-ES',
 *     continuous: true,
 *     interimResults: true
 * });
 * ```
 */
export function useSpeechRecognition(
    options: UseSpeechRecognitionOptions = {},
): UseSpeechRecognitionReturn {
    const {
        lang = "en-US",
        continuous = false,
        interimResults = true,
        maxAlternatives = 1,
        onResult,
        onError,
        onEnd,
    } = options;

    const [transcript, setTranscript] = useState("");
    const [interimTranscript, setInterimTranscript] = useState("");
    const [isListening, setIsListening] = useState(false);
    const [error, setError] = useState<SpeechRecognitionErrorCode | null>(null);
    const [errorMessage, setErrorMessage] = useState<string | null>(null);

    // Use refs for callbacks to avoid re-creating the recognition instance
    // when callbacks change (which happens on every render if not memoized)
    const onResultRef = useRef(onResult);
    const onErrorRef = useRef(onError);
    const onEndRef = useRef(onEnd);

    const recognitionRef = useRef<SpeechRecognitionInstance | null>(null);
    const isManualStopRef = useRef(false);

    useEffect(() => {
        onResultRef.current = onResult;
        onErrorRef.current = onError;
        onEndRef.current = onEnd;
    }, [onResult, onError, onEnd]);

    // Check if API is supported
    const isSupported =
        typeof window !== "undefined" &&
        ("SpeechRecognition" in window || "webkitSpeechRecognition" in window);

    // Initialize recognition instance
    useEffect(() => {
        if (!isSupported) return;

        const SpeechRecognitionAPI =
            window.SpeechRecognition || window.webkitSpeechRecognition;
        const recognition = new SpeechRecognitionAPI();

        recognition.continuous = continuous;
        recognition.interimResults = interimResults;
        recognition.lang = lang;
        recognition.maxAlternatives = maxAlternatives;

        recognition.onstart = () => {
            setIsListening(true);
            setError(null);
            setErrorMessage(null);
        };

        recognition.onresult = (event: SpeechRecognitionEvent) => {
            let finalTranscript = "";
            let currentInterim = "";

            for (let i = event.resultIndex; i < event.results.length; i++) {
                const result = event.results[i];
                if (!result || !result[0]) continue;

                if (result.isFinal) {
                    finalTranscript += result[0].transcript;
                } else {
                    currentInterim += result[0].transcript;
                }
            }

            if (finalTranscript) {
                setTranscript((prev) => prev + finalTranscript);
                onResultRef.current?.(finalTranscript, true);
            }

            setInterimTranscript(currentInterim);
            if (currentInterim) {
                onResultRef.current?.(currentInterim, false);
            }
        };

        recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
            const errorCode = event.error;
            setError(errorCode);
            setErrorMessage(getErrorMessage(errorCode));
            setIsListening(false);
            onErrorRef.current?.(errorCode);
        };

        recognition.onend = () => {
            setIsListening(false);
            setInterimTranscript("");

            // Auto-restart if continuous mode and not manually stopped
            if (continuous && !isManualStopRef.current && !error) {
                try {
                    recognition.start();
                } catch {
                    // Ignore if already started
                }
            }

            onEndRef.current?.();
        };

        recognitionRef.current = recognition;

        return () => {
            recognition.abort();
        };
    }, [isSupported, lang, continuous, interimResults, maxAlternatives]); // Removed callbacks from dependencies

    // Update recognition settings when options change
    useEffect(() => {
        if (recognitionRef.current) {
            recognitionRef.current.lang = lang;
            recognitionRef.current.continuous = continuous;
            recognitionRef.current.interimResults = interimResults;
            recognitionRef.current.maxAlternatives = maxAlternatives;
        }
    }, [lang, continuous, interimResults, maxAlternatives]);

    const start = useCallback(() => {
        if (!isSupported || !recognitionRef.current) return;

        isManualStopRef.current = false;
        setError(null);
        setErrorMessage(null);

        try {
            recognitionRef.current.start();
        } catch {
            // Ignore if already started - this can happen in continuous mode
        }
    }, [isSupported]);

    const stop = useCallback(() => {
        if (!recognitionRef.current) return;

        isManualStopRef.current = true;
        recognitionRef.current.stop();
    }, []);

    const abort = useCallback(() => {
        if (!recognitionRef.current) return;

        isManualStopRef.current = true;
        recognitionRef.current.abort();
        setInterimTranscript("");
    }, []);

    const resetTranscript = useCallback(() => {
        setTranscript("");
        setInterimTranscript("");
    }, []);

    return {
        transcript,
        interimTranscript,
        isListening,
        isSupported,
        error,
        errorMessage,
        start,
        stop,
        abort,
        resetTranscript,
    };
}

export default useSpeechRecognition;