/* FluentGerman.ai — Voice module (API-only TTS, browser + API STT) */ class VoiceManager { constructor() { this.mode = 'browser'; this.recognition = null; this.isRecording = false; this.isDisabled = false; this.lastInputWasVoice = false; this.mediaRecorder = null; this.audioChunks = []; this.onResult = null; this.onStateChange = null; this.browserSTTSupported = false; this.apiAvailable = false; this.onProcessing = null; // New callback for "Transcribing..." state } async init() { this._initBrowserSTT(); try { const response = await api('/voice/config'); if (response?.ok) { const config = await response.json(); this.mode = config.voice_mode; this.apiAvailable = config.voice_api_available || false; console.log('[Voice] Server mode:', this.mode, '| API available:', this.apiAvailable); } } catch (e) { console.warn('[Voice] Could not fetch config, using browser mode'); this.mode = 'browser'; } // Determine best STT method if (this.mode === 'browser' && !this.browserSTTSupported) { if (this.apiAvailable) { this.mode = 'api'; } else { this.isDisabled = true; } } else if (this.mode === 'api' && !this.apiAvailable) { if (this.browserSTTSupported) { this.mode = 'browser'; } else { this.isDisabled = true; } } console.log('[Voice] Final mode:', this.isDisabled ? 'DISABLED' : this.mode); } _initBrowserSTT() { const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; if (!SpeechRecognition) { this.browserSTTSupported = false; return; } this.browserSTTSupported = true; this.recognition = new SpeechRecognition(); this.recognition.continuous = false; this.recognition.interimResults = false; this.recognition.lang = 'de-DE'; this.recognition.onresult = (event) => { const text = event.results[0][0].transcript; console.log('[Voice] STT result:', text); this.lastInputWasVoice = true; if (this.onResult) this.onResult(text); }; this.recognition.onend = () => { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); }; this.recognition.onerror = (event) => { console.error('[Voice] STT error:', event.error); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); if (event.error === 'not-allowed') { showToast('Microphone access denied. Allow it in browser settings.', 'error'); } else if (event.error === 'no-speech') { showToast('No speech detected. Try again.', 'error'); } }; } async startRecording() { if (this.isDisabled) { showToast('Voice requires Chrome or Edge (HTTPS).', 'error'); return; } // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state // if hardware access fails or takes time. if (this.mode === 'api') { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.audioChunks = []; this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); this.mediaRecorder.ondataavailable = (e) => { if (e.data.size > 0) this.audioChunks.push(e.data); }; this.mediaRecorder.onstop = async () => { stream.getTracks().forEach(t => t.stop()); const blob = new Blob(this.audioChunks, { type: 'audio/webm' }); await this._transcribeAPI(blob); }; // Wait for recorder to actually start before updating UI this.mediaRecorder.start(); // Now we are truly recording this.isRecording = true; this.lastInputWasVoice = true; if (this.onStateChange) this.onStateChange(true); } catch (e) { console.error('[Voice] Mic access error:', e); showToast('Microphone access denied or error', 'error'); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } else { if (this.recognition) { try { this.recognition.start(); // Recognition 'onstart' would be better, but this is okay for browser mode this.isRecording = true; this.lastInputWasVoice = true; if (this.onStateChange) this.onStateChange(true); } catch (e) { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); showToast('Voice recognition failed. Try again.', 'error'); } } } } stopRecording() { if (this.mode === 'api') { if (this.mediaRecorder?.state === 'recording') { // Show processing state immediately (don't wait for onstop callback) if (this.onProcessing) this.onProcessing(true); this.mediaRecorder.stop(); } else { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } else { try { this.recognition?.stop(); } catch (e) { /* already stopped */ } this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } } toggleRecording() { if (this.isDisabled) { showToast('Voice requires Chrome or Edge (HTTPS).', 'error'); return; } this.isRecording ? this.stopRecording() : this.startRecording(); } async _transcribeAPI(blob) { if (this.onProcessing) this.onProcessing(true); try { const formData = new FormData(); formData.append('audio', blob, 'recording.webm'); const response = await api('/voice/transcribe', { method: 'POST', body: formData, }); if (response?.ok) { const data = await response.json(); this.lastInputWasVoice = true; if (this.onResult) this.onResult(data.text); } else { const err = await response.json().catch(() => ({})); showToast(`Transcription failed: ${err.detail || 'Unknown error'}`, 'error'); } } catch (e) { showToast('Transcription network error', 'error'); } finally { this.isRecording = false; // Stop processing state if (this.onProcessing) this.onProcessing(false); if (this.onStateChange) this.onStateChange(false); } } /** * Fetch TTS audio blob for text (API only). * Returns audio URL or null. */ async fetchAudio(text) { if (!this.apiAvailable) return null; const clean = VoiceManager.stripMarkdown(text); try { const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, { method: 'POST', }); if (response?.ok) { const audioBlob = await response.blob(); return URL.createObjectURL(audioBlob); } else { const err = await response.json().catch(() => ({})); console.warn('[Voice] TTS error:', err); } } catch (e) { console.warn('[Voice] TTS network error:', e); } return null; } /** * Play audio with an inline mini-player (progress bar, seek, replay). * @param {string} audioUrl – blob URL from fetchAudio() * @param {HTMLElement} [containerEl] – element to append the player into * @returns {Promise} resolves when first playback ends */ async playAudio(audioUrl, containerEl) { if (!audioUrl) return; const audio = new Audio(audioUrl); // Visual feedback — avatar pulse const avatarContainer = document.querySelector('.avatar-container'); if (avatarContainer) avatarContainer.classList.add('speaking'); // ── Build player DOM ────────────────────────────────────────── const player = document.createElement('div'); player.className = 'audio-player'; const playBtn = document.createElement('button'); playBtn.className = 'audio-player-btn playing'; playBtn.innerHTML = VoiceManager._pauseIcon(); playBtn.title = 'Pause'; const track = document.createElement('div'); track.className = 'audio-player-track'; const fill = document.createElement('div'); fill.className = 'audio-player-fill'; track.appendChild(fill); const timeLabel = document.createElement('span'); timeLabel.className = 'audio-player-time'; timeLabel.textContent = '0:00 / 0:00'; player.appendChild(playBtn); player.appendChild(track); player.appendChild(timeLabel); if (containerEl) { containerEl.appendChild(player); } // ── Helpers ─────────────────────────────────────────────────── function fmt(s) { if (!isFinite(s)) return '0:00'; const m = Math.floor(s / 60); const sec = Math.floor(s % 60); return `${m}:${sec.toString().padStart(2, '0')}`; } function updateProgress() { if (!audio.duration) return; const pct = (audio.currentTime / audio.duration) * 100; fill.style.width = pct + '%'; timeLabel.textContent = `${fmt(audio.currentTime)} / ${fmt(audio.duration)}`; } // ── Events ──────────────────────────────────────────────────── audio.addEventListener('timeupdate', updateProgress); audio.addEventListener('loadedmetadata', () => { timeLabel.textContent = `0:00 / ${fmt(audio.duration)}`; }); // Seek on track click track.addEventListener('click', (e) => { const rect = track.getBoundingClientRect(); const pct = (e.clientX - rect.left) / rect.width; audio.currentTime = pct * audio.duration; updateProgress(); }); // Play/pause toggle playBtn.addEventListener('click', () => { if (audio.paused) { audio.play(); playBtn.classList.add('playing'); playBtn.innerHTML = VoiceManager._pauseIcon(); playBtn.title = 'Pause'; if (avatarContainer) avatarContainer.classList.add('speaking'); } else { audio.pause(); playBtn.classList.remove('playing'); playBtn.innerHTML = VoiceManager._playIcon(); playBtn.title = 'Play'; if (avatarContainer) avatarContainer.classList.remove('speaking'); } }); // ── Playback ────────────────────────────────────────────────── try { // Wait for audio to be fully buffered before playing await new Promise((resolve, reject) => { audio.addEventListener('canplaythrough', resolve, { once: true }); audio.addEventListener('error', reject, { once: true }); audio.load(); // Explicitly trigger loading }); audio.currentTime = 0; // Ensure we start from the very beginning await audio.play(); return new Promise(resolve => { audio.onended = () => { if (avatarContainer) avatarContainer.classList.remove('speaking'); playBtn.classList.remove('playing'); playBtn.innerHTML = VoiceManager._playIcon(); playBtn.title = 'Replay'; fill.style.width = '100%'; // Reset to beginning for replay audio.currentTime = 0; resolve(); }; audio.onerror = () => { if (avatarContainer) avatarContainer.classList.remove('speaking'); resolve(); }; }); } catch (e) { console.error('Playback failed', e); if (avatarContainer) avatarContainer.classList.remove('speaking'); playBtn.classList.remove('playing'); playBtn.innerHTML = VoiceManager._playIcon(); } } // ── SVG icons (inline, no external deps) ────────────────────────── static _playIcon() { return ``; } static _pauseIcon() { return ``; } /** * Legacy method for backward compatibility if needed, * or for simple direct speech. */ async speak(text) { const url = await this.fetchAudio(text); if (url) await this.playAudio(url); } /** * Strip markdown formatting from text so TTS reads naturally. */ static stripMarkdown(text) { return text .replace(/```[\s\S]*?```/g, '') // code blocks .replace(/`([^`]+)`/g, '$1') // inline code .replace(/#{1,6}\s+/g, '') // headings .replace(/\*\*([^*]+)\*\*/g, '$1') // bold .replace(/\*([^*]+)\*/g, '$1') // italic .replace(/__([^_]+)__/g, '$1') // bold alt .replace(/_([^_]+)_/g, '$1') // italic alt .replace(/~~([^~]+)~~/g, '$1') // strikethrough .replace(/^\s*[-*+]\s+/gm, '') // unordered lists .replace(/^\s*\d+\.\s+/gm, '') // ordered lists .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')// links .replace(/!\[([^\]]*)\]\([^)]+\)/g, '') // images .replace(/>\s+/g, '') // blockquotes .replace(/\n{2,}/g, '. ') // paragraph breaks → pause .replace(/\n/g, ' ') // newlines → space .trim(); } }