/* FluentGerman.ai — Voice module (Web Speech API + API mode) */ class VoiceManager { constructor() { this.mode = 'browser'; // will be set from server config this.recognition = null; this.synthesis = window.speechSynthesis; this.isRecording = false; this.lastInputWasVoice = false; // tracks if last message was spoken this.mediaRecorder = null; this.audioChunks = []; this.onResult = null; this.onStateChange = null; } async init() { // Always init browser STT as fallback this._initBrowserSTT(); // Fetch voice mode from server try { const response = await api('/voice/config'); if (response?.ok) { const config = await response.json(); this.mode = config.voice_mode; console.log('[Voice] Mode:', this.mode); } } catch (e) { console.warn('[Voice] Could not fetch config, using browser mode'); this.mode = 'browser'; } } _initBrowserSTT() { const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; if (!SpeechRecognition) { console.warn('[Voice] Speech recognition not supported in this browser'); return; } this.recognition = new SpeechRecognition(); this.recognition.continuous = false; this.recognition.interimResults = false; this.recognition.lang = 'de-DE'; this.recognition.onresult = (event) => { const text = event.results[0][0].transcript; console.log('[Voice] Browser STT result:', text); this.lastInputWasVoice = true; if (this.onResult) this.onResult(text); }; this.recognition.onend = () => { console.log('[Voice] Browser STT ended'); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); }; this.recognition.onerror = (event) => { console.error('[Voice] Browser STT error:', event.error); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); if (event.error === 'not-allowed') { showToast('Microphone access denied. Please allow microphone in browser settings.', 'error'); } else if (event.error === 'no-speech') { showToast('No speech detected. Try again.', 'error'); } }; console.log('[Voice] Browser STT initialized'); } async startRecording() { this.isRecording = true; this.lastInputWasVoice = true; if (this.onStateChange) this.onStateChange(true); if (this.mode === 'api') { // API mode — record audio via MediaRecorder, send to Whisper try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.audioChunks = []; this.mediaRecorder = new MediaRecorder(stream); this.mediaRecorder.ondataavailable = (event) => { this.audioChunks.push(event.data); }; this.mediaRecorder.onstop = async () => { stream.getTracks().forEach(track => track.stop()); const blob = new Blob(this.audioChunks, { type: 'audio/webm' }); await this._transcribeAPI(blob); }; this.mediaRecorder.start(); console.log('[Voice] API recording started'); } catch (e) { console.error('[Voice] Microphone access error:', e); showToast('Microphone access denied', 'error'); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } else { // Browser mode — use Web Speech API if (this.recognition) { try { this.recognition.start(); console.log('[Voice] Browser STT started'); } catch (e) { console.error('[Voice] Failed to start recognition:', e); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); showToast('Voice recognition failed to start. Try again.', 'error'); } } else { console.warn('[Voice] No speech recognition available'); showToast('Speech recognition not supported in this browser', 'error'); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } } stopRecording() { console.log('[Voice] Stopping recording...'); if (this.mode === 'api') { if (this.mediaRecorder && this.mediaRecorder.state === 'recording') { this.mediaRecorder.stop(); } else { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } else { if (this.recognition) { try { this.recognition.stop(); } catch (e) { // Already stopped } } this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } async _transcribeAPI(blob) { try { const formData = new FormData(); formData.append('audio', blob, 'recording.webm'); console.log('[Voice] Sending audio to API for transcription...'); const response = await api('/voice/transcribe', { method: 'POST', body: formData, }); if (response?.ok) { const data = await response.json(); console.log('[Voice] API transcription result:', data.text); this.lastInputWasVoice = true; if (this.onResult) this.onResult(data.text); } else { showToast('Transcription failed. Falling back to browser voice.', 'error'); // Fallback: switch to browser mode for this session this.mode = 'browser'; } } catch (e) { console.error('[Voice] API transcription error:', e); showToast('Transcription error', 'error'); } finally { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } async speak(text) { if (this.mode === 'api') { return this._speakAPI(text); } else { return this._speakBrowser(text); } } _speakBrowser(text) { return new Promise((resolve) => { // Cancel any ongoing speech this.synthesis.cancel(); const utterance = new SpeechSynthesisUtterance(text); utterance.lang = 'de-DE'; utterance.rate = 0.95; utterance.onend = resolve; utterance.onerror = () => { console.warn('[Voice] Browser TTS error'); resolve(); }; this.synthesis.speak(utterance); }); } async _speakAPI(text) { try { const response = await api(`/voice/synthesize?text=${encodeURIComponent(text)}`, { method: 'POST', }); if (response?.ok) { const audioBlob = await response.blob(); const audioUrl = URL.createObjectURL(audioBlob); const audio = new Audio(audioUrl); await audio.play(); return new Promise(resolve => { audio.onended = resolve; }); } } catch (e) { console.warn('[Voice] API TTS failed, falling back to browser'); } // Fallback to browser TTS return this._speakBrowser(text); } toggleRecording() { if (this.isRecording) { this.stopRecording(); } else { this.startRecording(); } } }