From 3805157f67cecd22716a064556cb7e7270f4e671 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Wed, 18 Feb 2026 12:19:21 +0100 Subject: [PATCH] improved feedback for voice mode --- frontend/css/style.css | 33 ++++++++++++ frontend/js/chat.js | 117 ++++++++++++++++++++++++++++++----------- frontend/js/voice.js | 104 +++++++++++++++++++++++++----------- 3 files changed, 194 insertions(+), 60 deletions(-) diff --git a/frontend/css/style.css b/frontend/css/style.css index cd79eb8..d431533 100644 --- a/frontend/css/style.css +++ b/frontend/css/style.css @@ -1227,4 +1227,37 @@ tr:hover td { transform: scale(1); opacity: 0.8; } +} + +/* ── Thinking dots ────────────────────────────────────────────────── */ +.message-thinking { + color: var(--text-muted); + font-style: italic; + font-size: 0.9em; +} + +.thinking-dots span { + animation: thinkingDots 1.4s infinite ease-in-out both; + margin-left: 2px; +} + +.thinking-dots span:nth-child(1) { + animation-delay: -0.32s; +} + +.thinking-dots span:nth-child(2) { + animation-delay: -0.16s; +} + +@keyframes thinkingDots { + + 0%, + 80%, + 100% { + transform: scale(0); + } + + 40% { + transform: scale(1); + } } \ No newline at end of file diff --git a/frontend/js/chat.js b/frontend/js/chat.js index ebae714..8954c77 100644 --- a/frontend/js/chat.js +++ b/frontend/js/chat.js @@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => { micBtn.classList.toggle('recording', recording); }; + // Show "Transcribing..." state + voice.onProcessing = (processing) => { + if (processing) { + inputEl.placeholder = 'Transcribing...'; + inputEl.disabled = true; + } else { + inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...'; + inputEl.disabled = false; + inputEl.focus(); + } + }; + micBtn.addEventListener('click', () => voice.toggleRecording()); // ── Chat ────────────────────────────────────────────────────────── @@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => { div.className = `message message-${role}`; if (role === 'assistant') { - div.innerHTML = renderMarkdown(content); + // content might be empty initially for thinking state + if (content === 'Thinking...') { + div.innerHTML = 'Thinking...'; + div.classList.add('message-thinking'); + } else { + div.innerHTML = renderMarkdown(content); + } } else { div.textContent = content; } @@ -168,41 +186,80 @@ document.addEventListener('DOMContentLoaded', async () => { const reader = response.body.getReader(); const decoder = new TextDecoder(); - while (true) { - const { done, value } = await reader.read(); - if (done) break; + // Special handling for Voice Mode: Buffer text, wait for TTS, then show & play + if (voiceModeOn) { + // Show thinking state + assistantEl.innerHTML = 'Thinking...'; + assistantEl.classList.add('message-thinking'); - const chunk = decoder.decode(value); - const lines = chunk.split('\n'); - - for (const line of lines) { - if (line.startsWith('data: ')) { - const data = line.slice(6).trim(); - if (data === '[DONE]') break; - - try { - const parsed = JSON.parse(data); - if (parsed.token) { - fullResponse += parsed.token; - assistantEl.innerHTML = renderMarkdown(fullResponse); - messagesEl.scrollTop = messagesEl.scrollHeight; - } - if (parsed.error) { - showToast(parsed.error, 'error'); - } - } catch (e) { - // skip unparseable chunks + while (true) { + const { done, value } = await reader.read(); + if (done) break; + const chunk = decoder.decode(value); + const lines = chunk.split('\n'); + for (const line of lines) { + if (line.startsWith('data: ')) { + const data = line.slice(6).trim(); + if (data === '[DONE]') break; + try { + const parsed = JSON.parse(data); + if (parsed.token) fullResponse += parsed.token; + if (parsed.error) showToast(parsed.error, 'error'); + } catch (e) { } } } } - } - if (fullResponse) { - history.push({ role: 'assistant', content: fullResponse }); + // Text complete. Now fetch audio. + if (fullResponse) { + history.push({ role: 'assistant', content: fullResponse }); - // Auto-speak if voice mode is ON (regardless of input method) - if (voiceModeOn) { - await voice.speak(fullResponse); + // Keep "Thinking..." until audio is ready or failed + const audioUrl = await voice.fetchAudio(fullResponse); + + // Visual update: Remove thinking, show text + assistantEl.classList.remove('message-thinking'); + assistantEl.innerHTML = renderMarkdown(fullResponse); + messagesEl.scrollTop = messagesEl.scrollHeight; + + if (audioUrl) { + await voice.playAudio(audioUrl); + } + } + + } else { + // Normal Text Mode: Stream directly to UI + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const chunk = decoder.decode(value); + const lines = chunk.split('\n'); + + for (const line of lines) { + if (line.startsWith('data: ')) { + const data = line.slice(6).trim(); + if (data === '[DONE]') break; + + try { + const parsed = JSON.parse(data); + if (parsed.token) { + fullResponse += parsed.token; + assistantEl.innerHTML = renderMarkdown(fullResponse); + messagesEl.scrollTop = messagesEl.scrollHeight; + } + if (parsed.error) { + showToast(parsed.error, 'error'); + } + } catch (e) { + // skip unparseable chunks + } + } + } + } + + if (fullResponse) { + history.push({ role: 'assistant', content: fullResponse }); } } } catch (e) { diff --git a/frontend/js/voice.js b/frontend/js/voice.js index ed294b0..4d0bea2 100644 --- a/frontend/js/voice.js +++ b/frontend/js/voice.js @@ -13,6 +13,7 @@ class VoiceManager { this.onStateChange = null; this.browserSTTSupported = false; this.apiAvailable = false; + this.onProcessing = null; // New callback for "Transcribing..." state } async init() { @@ -93,17 +94,18 @@ class VoiceManager { return; } - this.isRecording = true; - this.lastInputWasVoice = true; - if (this.onStateChange) this.onStateChange(true); + // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state + // if hardware access fails or takes time. if (this.mode === 'api') { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.audioChunks = []; - this.mediaRecorder = new MediaRecorder(stream); + this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); - this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data); + this.mediaRecorder.ondataavailable = (e) => { + if (e.data.size > 0) this.audioChunks.push(e.data); + }; this.mediaRecorder.onstop = async () => { stream.getTracks().forEach(t => t.stop()); @@ -111,9 +113,17 @@ class VoiceManager { await this._transcribeAPI(blob); }; + // Wait for recorder to actually start before updating UI this.mediaRecorder.start(); + + // Now we are truly recording + this.isRecording = true; + this.lastInputWasVoice = true; + if (this.onStateChange) this.onStateChange(true); + } catch (e) { - showToast('Microphone access denied', 'error'); + console.error('[Voice] Mic access error:', e); + showToast('Microphone access denied or error', 'error'); this.isRecording = false; if (this.onStateChange) this.onStateChange(false); } @@ -121,6 +131,10 @@ class VoiceManager { if (this.recognition) { try { this.recognition.start(); + // Recognition 'onstart' would be better, but this is okay for browser mode + this.isRecording = true; + this.lastInputWasVoice = true; + if (this.onStateChange) this.onStateChange(true); } catch (e) { this.isRecording = false; if (this.onStateChange) this.onStateChange(false); @@ -154,6 +168,8 @@ class VoiceManager { } async _transcribeAPI(blob) { + if (this.onProcessing) this.onProcessing(true); + try { const formData = new FormData(); formData.append('audio', blob, 'recording.webm'); @@ -175,22 +191,20 @@ class VoiceManager { showToast('Transcription network error', 'error'); } finally { this.isRecording = false; + // Stop processing state + if (this.onProcessing) this.onProcessing(false); if (this.onStateChange) this.onStateChange(false); } } /** - * Speak text via API TTS only. No browser fallback. - * Strips markdown formatting before sending. + * Fetch TTS audio blob for text (API only). + * Returns audio URL or null. */ - async speak(text) { - if (!this.apiAvailable) { - console.log('[Voice] API TTS not available, skipping speech'); - return; - } + async fetchAudio(text) { + if (!this.apiAvailable) return null; const clean = VoiceManager.stripMarkdown(text); - try { const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, { method: 'POST', @@ -198,25 +212,55 @@ class VoiceManager { if (response?.ok) { const audioBlob = await response.blob(); - const audioUrl = URL.createObjectURL(audioBlob); - const audio = new Audio(audioUrl); - - // Visual feedback - const avatarContainer = document.querySelector('.avatar-container'); - if (avatarContainer) avatarContainer.classList.add('speaking'); - - await audio.play(); - - return new Promise(resolve => { - audio.onended = () => { - if (avatarContainer) avatarContainer.classList.remove('speaking'); - resolve(); - }; - }); + return URL.createObjectURL(audioBlob); + } else { + const err = await response.json().catch(() => ({})); + console.warn('[Voice] TTS error:', err); } } catch (e) { - console.warn('[Voice] API TTS failed:', e); + console.warn('[Voice] TTS network error:', e); } + return null; + } + + /** + * Play pre-fetched audio URL with visual feedback. + */ + async playAudio(audioUrl) { + if (!audioUrl) return; + + const audio = new Audio(audioUrl); + + // Visual feedback + const avatarContainer = document.querySelector('.avatar-container'); + if (avatarContainer) avatarContainer.classList.add('speaking'); + + try { + await audio.play(); + return new Promise(resolve => { + audio.onended = () => { + if (avatarContainer) avatarContainer.classList.remove('speaking'); + resolve(); + }; + // Handle errors during playback (e.g. format issues) + audio.onerror = () => { + if (avatarContainer) avatarContainer.classList.remove('speaking'); + resolve(); + } + }); + } catch (e) { + console.error("Playback failed", e); + if (avatarContainer) avatarContainer.classList.remove('speaking'); + } + } + + /** + * Legacy method for backward compatibility if needed, + * or for simple direct speech. + */ + async speak(text) { + const url = await this.fetchAudio(text); + if (url) await this.playAudio(url); } /**