language-llm/frontend/js/voice.js

/* FluentGerman.ai — Voice module (Web Speech API + API mode) */

class VoiceManager {
    constructor() {
        this.mode = 'browser'; // will be set from server config
        this.recognition = null;
        this.synthesis = window.speechSynthesis;
        this.isRecording = false;
        this.lastInputWasVoice = false; // tracks if last message was spoken
        this.mediaRecorder = null;
        this.audioChunks = [];
        this.onResult = null;
        this.onStateChange = null;
        this.browserSTTSupported = false;
    }

    async init() {
        // Check browser STT support
        this._initBrowserSTT();

        // Fetch voice mode from server
        try {
            const response = await api('/voice/config');
            if (response?.ok) {
                const config = await response.json();
                this.mode = config.voice_mode;
                console.log('[Voice] Server mode:', this.mode);
            }
        } catch (e) {
            console.warn('[Voice] Could not fetch config, using browser mode');
            this.mode = 'browser';
        }

        // Auto-fallback: if server says "browser" but browser doesn't support STT, use API
        if (this.mode === 'browser' && !this.browserSTTSupported) {
            console.warn('[Voice] Browser STT not supported, falling back to API mode');
            this.mode = 'api';
            showToast('Using cloud voice recognition — your browser doesn\'t support built-in speech recognition.', 'info');
        }

        console.log('[Voice] Active mode:', this.mode);
    }

    _initBrowserSTT() {
        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
        if (!SpeechRecognition) {
            console.warn('[Voice] SpeechRecognition API not available in this browser');
            this.browserSTTSupported = false;
            return;
        }

        this.browserSTTSupported = true;
        this.recognition = new SpeechRecognition();
        this.recognition.continuous = false;
        this.recognition.interimResults = false;
        this.recognition.lang = 'de-DE';

        this.recognition.onresult = (event) => {
            const text = event.results[0][0].transcript;
            console.log('[Voice] Browser STT result:', text);
            this.lastInputWasVoice = true;
            if (this.onResult) this.onResult(text);
        };

        this.recognition.onend = () => {
            console.log('[Voice] Browser STT ended');
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);
        };

        this.recognition.onerror = (event) => {
            console.error('[Voice] Browser STT error:', event.error);
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);

            if (event.error === 'not-allowed') {
                showToast('Microphone access denied. Please allow microphone in browser settings.', 'error');
            } else if (event.error === 'no-speech') {
                showToast('No speech detected. Try again.', 'error');
            }
        };

        console.log('[Voice] Browser STT initialized');
    }

    async startRecording() {
        this.isRecording = true;
        this.lastInputWasVoice = true;
        if (this.onStateChange) this.onStateChange(true);

        if (this.mode === 'api') {
            // API mode — record audio via MediaRecorder, send to Whisper
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                this.audioChunks = [];
                this.mediaRecorder = new MediaRecorder(stream);

                this.mediaRecorder.ondataavailable = (event) => {
                    this.audioChunks.push(event.data);
                };

                this.mediaRecorder.onstop = async () => {
                    stream.getTracks().forEach(track => track.stop());
                    const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
                    await this._transcribeAPI(blob);
                };

                this.mediaRecorder.start();
                console.log('[Voice] API recording started');
            } catch (e) {
                console.error('[Voice] Microphone access error:', e);
                showToast('Microphone access denied', 'error');
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
            }
        } else {
            // Browser mode — use Web Speech API
            if (this.recognition) {
                try {
                    this.recognition.start();
                    console.log('[Voice] Browser STT started');
                } catch (e) {
                    console.error('[Voice] Failed to start recognition:', e);
                    this.isRecording = false;
                    if (this.onStateChange) this.onStateChange(false);
                    showToast('Voice recognition failed to start. Try again.', 'error');
                }
            } else {
                // Shouldn't happen after init() fallback, but safety net
                console.warn('[Voice] No speech recognition available, switching to API');
                this.mode = 'api';
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
                showToast('Switched to cloud voice recognition. Please try again.', 'info');
            }
        }
    }

    stopRecording() {
        console.log('[Voice] Stopping recording...');
        if (this.mode === 'api') {
            if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
                this.mediaRecorder.stop();
            } else {
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
            }
        } else {
            if (this.recognition) {
                try {
                    this.recognition.stop();
                } catch (e) {
                    // Already stopped
                }
            }
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);
        }
    }

    async _transcribeAPI(blob) {
        try {
            const formData = new FormData();
            formData.append('audio', blob, 'recording.webm');

            console.log('[Voice] Sending audio to API for transcription...');
            const response = await api('/voice/transcribe', {
                method: 'POST',
                body: formData,
            });

            if (response?.ok) {
                const data = await response.json();
                console.log('[Voice] API transcription result:', data.text);
                this.lastInputWasVoice = true;
                if (this.onResult) this.onResult(data.text);
            } else {
                showToast('Transcription failed. Please try again.', 'error');
            }
        } catch (e) {
            console.error('[Voice] API transcription error:', e);
            showToast('Transcription error', 'error');
        } finally {
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);
        }
    }

    async speak(text) {
        if (this.mode === 'api') {
            return this._speakAPI(text);
        } else {
            return this._speakBrowser(text);
        }
    }

    _speakBrowser(text) {
        return new Promise((resolve) => {
            // Cancel any ongoing speech
            this.synthesis.cancel();
            const utterance = new SpeechSynthesisUtterance(text);
            utterance.lang = 'de-DE';
            utterance.rate = 0.95;
            utterance.onend = resolve;
            utterance.onerror = () => {
                console.warn('[Voice] Browser TTS error');
                resolve();
            };
            this.synthesis.speak(utterance);
        });
    }

    async _speakAPI(text) {
        try {
            const response = await api(`/voice/synthesize?text=${encodeURIComponent(text)}`, {
                method: 'POST',
            });

            if (response?.ok) {
                const audioBlob = await response.blob();
                const audioUrl = URL.createObjectURL(audioBlob);
                const audio = new Audio(audioUrl);
                await audio.play();
                return new Promise(resolve => {
                    audio.onended = resolve;
                });
            }
        } catch (e) {
            console.warn('[Voice] API TTS failed, falling back to browser');
        }
        // Fallback to browser TTS
        return this._speakBrowser(text);
    }

    toggleRecording() {
        if (this.isRecording) {
            this.stopRecording();
        } else {
            this.startRecording();
        }
    }
}