/* FluentGerman.ai — Voice module (API-only TTS, browser + API STT) */ class VoiceManager { constructor() { this.mode = 'browser'; this.recognition = null; this.isRecording = false; this.isDisabled = false; this.lastInputWasVoice = false; this.mediaRecorder = null; this.audioChunks = []; this.onResult = null; this.onStateChange = null; this.browserSTTSupported = false; this.apiAvailable = false; this.onProcessing = null; // New callback for "Transcribing..." state } async init() { this._initBrowserSTT(); try { const response = await api('/voice/config'); if (response?.ok) { const config = await response.json(); this.mode = config.voice_mode; this.apiAvailable = config.voice_api_available || false; console.log('[Voice] Server mode:', this.mode, '| API available:', this.apiAvailable); } } catch (e) { console.warn('[Voice] Could not fetch config, using browser mode'); this.mode = 'browser'; } // Determine best STT method if (this.mode === 'browser' && !this.browserSTTSupported) { if (this.apiAvailable) { this.mode = 'api'; } else { this.isDisabled = true; } } else if (this.mode === 'api' && !this.apiAvailable) { if (this.browserSTTSupported) { this.mode = 'browser'; } else { this.isDisabled = true; } } console.log('[Voice] Final mode:', this.isDisabled ? 'DISABLED' : this.mode); } _initBrowserSTT() { const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; if (!SpeechRecognition) { this.browserSTTSupported = false; return; } this.browserSTTSupported = true; this.recognition = new SpeechRecognition(); this.recognition.continuous = false; this.recognition.interimResults = false; this.recognition.lang = 'de-DE'; this.recognition.onresult = (event) => { const text = event.results[0][0].transcript; console.log('[Voice] STT result:', text); this.lastInputWasVoice = true; if (this.onResult) this.onResult(text); }; this.recognition.onend = () => { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); }; this.recognition.onerror = (event) => { console.error('[Voice] STT error:', event.error); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); if (event.error === 'not-allowed') { showToast('Microphone access denied. Allow it in browser settings.', 'error'); } else if (event.error === 'no-speech') { showToast('No speech detected. Try again.', 'error'); } }; } async startRecording() { if (this.isDisabled) { showToast('Voice requires Chrome or Edge (HTTPS).', 'error'); return; } // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state // if hardware access fails or takes time. if (this.mode === 'api') { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.audioChunks = []; this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); this.mediaRecorder.ondataavailable = (e) => { if (e.data.size > 0) this.audioChunks.push(e.data); }; this.mediaRecorder.onstop = async () => { stream.getTracks().forEach(t => t.stop()); const blob = new Blob(this.audioChunks, { type: 'audio/webm' }); await this._transcribeAPI(blob); }; // Wait for recorder to actually start before updating UI this.mediaRecorder.start(); // Now we are truly recording this.isRecording = true; this.lastInputWasVoice = true; if (this.onStateChange) this.onStateChange(true); } catch (e) { console.error('[Voice] Mic access error:', e); showToast('Microphone access denied or error', 'error'); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } else { if (this.recognition) { try { this.recognition.start(); // Recognition 'onstart' would be better, but this is okay for browser mode this.isRecording = true; this.lastInputWasVoice = true; if (this.onStateChange) this.onStateChange(true); } catch (e) { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); showToast('Voice recognition failed. Try again.', 'error'); } } } } stopRecording() { if (this.mode === 'api') { if (this.mediaRecorder?.state === 'recording') { // Show processing state immediately (don't wait for onstop callback) if (this.onProcessing) this.onProcessing(true); this.mediaRecorder.stop(); } else { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } else { try { this.recognition?.stop(); } catch (e) { /* already stopped */ } this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } toggleRecording() { if (this.isDisabled) { showToast('Voice requires Chrome or Edge (HTTPS).', 'error'); return; } this.isRecording ? this.stopRecording() : this.startRecording(); } async _transcribeAPI(blob) { if (this.onProcessing) this.onProcessing(true); try { const formData = new FormData(); formData.append('audio', blob, 'recording.webm'); const response = await api('/voice/transcribe', { method: 'POST', body: formData, }); if (response?.ok) { const data = await response.json(); this.lastInputWasVoice = true; if (this.onResult) this.onResult(data.text); } else { const err = await response.json().catch(() => ({})); showToast(`Transcription failed: ${err.detail || 'Unknown error'}`, 'error'); } } catch (e) { showToast('Transcription network error', 'error'); } finally { this.isRecording = false; // Stop processing state if (this.onProcessing) this.onProcessing(false); if (this.onStateChange) this.onStateChange(false); } } /** * Fetch TTS audio blob for text (API only). * Returns audio URL or null. */ async fetchAudio(text) { if (!this.apiAvailable) return null; const clean = VoiceManager.stripMarkdown(text); try { const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, { method: 'POST', }); if (response?.ok) { const audioBlob = await response.blob(); return URL.createObjectURL(audioBlob); } else { const err = await response.json().catch(() => ({})); console.warn('[Voice] TTS error:', err); } } catch (e) { console.warn('[Voice] TTS network error:', e); } return null; } /** * Play pre-fetched audio URL with visual feedback. */ async playAudio(audioUrl) { if (!audioUrl) return; const audio = new Audio(audioUrl); // Visual feedback const avatarContainer = document.querySelector('.avatar-container'); if (avatarContainer) avatarContainer.classList.add('speaking'); try { await audio.play(); return new Promise(resolve => { audio.onended = () => { if (avatarContainer) avatarContainer.classList.remove('speaking'); URL.revokeObjectURL(audioUrl); resolve(); }; // Handle errors during playback (e.g. format issues) audio.onerror = () => { if (avatarContainer) avatarContainer.classList.remove('speaking'); URL.revokeObjectURL(audioUrl); resolve(); } }); } catch (e) { console.error("Playback failed", e); if (avatarContainer) avatarContainer.classList.remove('speaking'); URL.revokeObjectURL(audioUrl); } } /** * Legacy method for backward compatibility if needed, * or for simple direct speech. */ async speak(text) { const url = await this.fetchAudio(text); if (url) await this.playAudio(url); } /** * Strip markdown formatting from text so TTS reads naturally. */ static stripMarkdown(text) { return text .replace(/```[\s\S]*?```/g, '') // code blocks .replace(/`([^`]+)`/g, '$1') // inline code .replace(/#{1,6}\s+/g, '') // headings .replace(/\*\*([^*]+)\*\*/g, '$1') // bold .replace(/\*([^*]+)\*/g, '$1') // italic .replace(/__([^_]+)__/g, '$1') // bold alt .replace(/_([^_]+)_/g, '$1') // italic alt .replace(/~~([^~]+)~~/g, '$1') // strikethrough .replace(/^\s*[-*+]\s+/gm, '') // unordered lists .replace(/^\s*\d+\.\s+/gm, '') // ordered lists .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')// links .replace(/!\[([^\]]*)\]\([^)]+\)/g, '') // images .replace(/>\s+/g, '') // blockquotes .replace(/\n{2,}/g, '. ') // paragraph breaks → pause .replace(/\n/g, ' ') // newlines → space .trim(); } }