From 3805157f67cecd22716a064556cb7e7270f4e671 Mon Sep 17 00:00:00 2001
From: Dennis Thiessen <dennis@thiessen.io>
Date: Wed, 18 Feb 2026 12:19:21 +0100
Subject: [PATCH] improved feedback for voice mode

---
 frontend/css/style.css |  33 ++++++++++++
 frontend/js/chat.js    | 117 ++++++++++++++++++++++++++++++-----------
 frontend/js/voice.js   | 104 +++++++++++++++++++++++++-----------
 3 files changed, 194 insertions(+), 60 deletions(-)
diff --git a/frontend/css/style.css b/frontend/css/style.css
index cd79eb8..d431533 100644
--- a/frontend/css/style.css
+++ b/frontend/css/style.css
@@ -1227,4 +1227,37 @@ tr:hover td {
     transform: scale(1);
     opacity: 0.8;
   }
+}
+
+/* ── Thinking dots ────────────────────────────────────────────────── */
+.message-thinking {
+  color: var(--text-muted);
+  font-style: italic;
+  font-size: 0.9em;
+}
+
+.thinking-dots span {
+  animation: thinkingDots 1.4s infinite ease-in-out both;
+  margin-left: 2px;
+}
+
+.thinking-dots span:nth-child(1) {
+  animation-delay: -0.32s;
+}
+
+.thinking-dots span:nth-child(2) {
+  animation-delay: -0.16s;
+}
+
+@keyframes thinkingDots {
+
+  0%,
+  80%,
+  100% {
+    transform: scale(0);
+  }
+
+  40% {
+    transform: scale(1);
+  }
 }
\ No newline at end of file
diff --git a/frontend/js/chat.js b/frontend/js/chat.js
index ebae714..8954c77 100644
--- a/frontend/js/chat.js
+++ b/frontend/js/chat.js
@@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => {
         micBtn.classList.toggle('recording', recording);
     };
 
+    // Show "Transcribing..." state
+    voice.onProcessing = (processing) => {
+        if (processing) {
+            inputEl.placeholder = 'Transcribing...';
+            inputEl.disabled = true;
+        } else {
+            inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...';
+            inputEl.disabled = false;
+            inputEl.focus();
+        }
+    };
+
     micBtn.addEventListener('click', () => voice.toggleRecording());
 
     // ── Chat ──────────────────────────────────────────────────────────
@@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => {
         div.className = `message message-${role}`;
 
         if (role === 'assistant') {
-            div.innerHTML = renderMarkdown(content);
+            // content might be empty initially for thinking state
+            if (content === 'Thinking...') {
+                div.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
+                div.classList.add('message-thinking');
+            } else {
+                div.innerHTML = renderMarkdown(content);
+            }
         } else {
             div.textContent = content;
         }
@@ -168,41 +186,80 @@ document.addEventListener('DOMContentLoaded', async () => {
             const reader = response.body.getReader();
             const decoder = new TextDecoder();
 
-            while (true) {
-                const { done, value } = await reader.read();
-                if (done) break;
+            // Special handling for Voice Mode: Buffer text, wait for TTS, then show & play
+            if (voiceModeOn) {
+                // Show thinking state
+                assistantEl.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
+                assistantEl.classList.add('message-thinking');
 
-                const chunk = decoder.decode(value);
-                const lines = chunk.split('\n');
-
-                for (const line of lines) {
-                    if (line.startsWith('data: ')) {
-                        const data = line.slice(6).trim();
-                        if (data === '[DONE]') break;
-
-                        try {
-                            const parsed = JSON.parse(data);
-                            if (parsed.token) {
-                                fullResponse += parsed.token;
-                                assistantEl.innerHTML = renderMarkdown(fullResponse);
-                                messagesEl.scrollTop = messagesEl.scrollHeight;
-                            }
-                            if (parsed.error) {
-                                showToast(parsed.error, 'error');
-                            }
-                        } catch (e) {
-                            // skip unparseable chunks
+                while (true) {
+                    const { done, value } = await reader.read();
+                    if (done) break;
+                    const chunk = decoder.decode(value);
+                    const lines = chunk.split('\n');
+                    for (const line of lines) {
+                        if (line.startsWith('data: ')) {
+                            const data = line.slice(6).trim();
+                            if (data === '[DONE]') break;
+                            try {
+                                const parsed = JSON.parse(data);
+                                if (parsed.token) fullResponse += parsed.token;
+                                if (parsed.error) showToast(parsed.error, 'error');
+                            } catch (e) { }
                         }
                     }
                 }
-            }
 
-            if (fullResponse) {
-                history.push({ role: 'assistant', content: fullResponse });
+                // Text complete. Now fetch audio.
+                if (fullResponse) {
+                    history.push({ role: 'assistant', content: fullResponse });
 
-                // Auto-speak if voice mode is ON (regardless of input method)
-                if (voiceModeOn) {
-                    await voice.speak(fullResponse);
+                    // Keep "Thinking..." until audio is ready or failed
+                    const audioUrl = await voice.fetchAudio(fullResponse);
+
+                    // Visual update: Remove thinking, show text
+                    assistantEl.classList.remove('message-thinking');
+                    assistantEl.innerHTML = renderMarkdown(fullResponse);
+                    messagesEl.scrollTop = messagesEl.scrollHeight;
+
+                    if (audioUrl) {
+                        await voice.playAudio(audioUrl);
+                    }
+                }
+
+            } else {
+                // Normal Text Mode: Stream directly to UI
+                while (true) {
+                    const { done, value } = await reader.read();
+                    if (done) break;
+
+                    const chunk = decoder.decode(value);
+                    const lines = chunk.split('\n');
+
+                    for (const line of lines) {
+                        if (line.startsWith('data: ')) {
+                            const data = line.slice(6).trim();
+                            if (data === '[DONE]') break;
+
+                            try {
+                                const parsed = JSON.parse(data);
+                                if (parsed.token) {
+                                    fullResponse += parsed.token;
+                                    assistantEl.innerHTML = renderMarkdown(fullResponse);
+                                    messagesEl.scrollTop = messagesEl.scrollHeight;
+                                }
+                                if (parsed.error) {
+                                    showToast(parsed.error, 'error');
+                                }
+                            } catch (e) {
+                                // skip unparseable chunks
+                            }
+                        }
+                    }
+                }
+
+                if (fullResponse) {
+                    history.push({ role: 'assistant', content: fullResponse });
                 }
             }
         } catch (e) {
diff --git a/frontend/js/voice.js b/frontend/js/voice.js
index ed294b0..4d0bea2 100644
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -13,6 +13,7 @@ class VoiceManager {
         this.onStateChange = null;
         this.browserSTTSupported = false;
         this.apiAvailable = false;
+        this.onProcessing = null; // New callback for "Transcribing..." state
     }
 
     async init() {
@@ -93,17 +94,18 @@ class VoiceManager {
             return;
         }
 
-        this.isRecording = true;
-        this.lastInputWasVoice = true;
-        if (this.onStateChange) this.onStateChange(true);
+        // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
+        // if hardware access fails or takes time.
 
         if (this.mode === 'api') {
             try {
                 const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                 this.audioChunks = [];
-                this.mediaRecorder = new MediaRecorder(stream);
+                this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
 
-                this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
+                this.mediaRecorder.ondataavailable = (e) => {
+                    if (e.data.size > 0) this.audioChunks.push(e.data);
+                };
 
                 this.mediaRecorder.onstop = async () => {
                     stream.getTracks().forEach(t => t.stop());
@@ -111,9 +113,17 @@ class VoiceManager {
                     await this._transcribeAPI(blob);
                 };
 
+                // Wait for recorder to actually start before updating UI
                 this.mediaRecorder.start();
+
+                // Now we are truly recording
+                this.isRecording = true;
+                this.lastInputWasVoice = true;
+                if (this.onStateChange) this.onStateChange(true);
+
             } catch (e) {
-                showToast('Microphone access denied', 'error');
+                console.error('[Voice] Mic access error:', e);
+                showToast('Microphone access denied or error', 'error');
                 this.isRecording = false;
                 if (this.onStateChange) this.onStateChange(false);
             }
@@ -121,6 +131,10 @@ class VoiceManager {
             if (this.recognition) {
                 try {
                     this.recognition.start();
+                    // Recognition 'onstart' would be better, but this is okay for browser mode
+                    this.isRecording = true;
+                    this.lastInputWasVoice = true;
+                    if (this.onStateChange) this.onStateChange(true);
                 } catch (e) {
                     this.isRecording = false;
                     if (this.onStateChange) this.onStateChange(false);
@@ -154,6 +168,8 @@ class VoiceManager {
     }
 
     async _transcribeAPI(blob) {
+        if (this.onProcessing) this.onProcessing(true);
+
         try {
             const formData = new FormData();
             formData.append('audio', blob, 'recording.webm');
@@ -175,22 +191,20 @@ class VoiceManager {
             showToast('Transcription network error', 'error');
         } finally {
             this.isRecording = false;
+            // Stop processing state
+            if (this.onProcessing) this.onProcessing(false);
             if (this.onStateChange) this.onStateChange(false);
         }
     }
 
     /**
-     * Speak text via API TTS only. No browser fallback.
-     * Strips markdown formatting before sending.
+     * Fetch TTS audio blob for text (API only).
+     * Returns audio URL or null.
      */
-    async speak(text) {
-        if (!this.apiAvailable) {
-            console.log('[Voice] API TTS not available, skipping speech');
-            return;
-        }
+    async fetchAudio(text) {
+        if (!this.apiAvailable) return null;
 
         const clean = VoiceManager.stripMarkdown(text);
-
         try {
             const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
                 method: 'POST',
@@ -198,25 +212,55 @@ class VoiceManager {
 
             if (response?.ok) {
                 const audioBlob = await response.blob();
-                const audioUrl = URL.createObjectURL(audioBlob);
-                const audio = new Audio(audioUrl);
-
-                // Visual feedback
-                const avatarContainer = document.querySelector('.avatar-container');
-                if (avatarContainer) avatarContainer.classList.add('speaking');
-
-                await audio.play();
-
-                return new Promise(resolve => {
-                    audio.onended = () => {
-                        if (avatarContainer) avatarContainer.classList.remove('speaking');
-                        resolve();
-                    };
-                });
+                return URL.createObjectURL(audioBlob);
+            } else {
+                const err = await response.json().catch(() => ({}));
+                console.warn('[Voice] TTS error:', err);
             }
         } catch (e) {
-            console.warn('[Voice] API TTS failed:', e);
+            console.warn('[Voice] TTS network error:', e);
         }
+        return null;
+    }
+
+    /**
+     * Play pre-fetched audio URL with visual feedback.
+     */
+    async playAudio(audioUrl) {
+        if (!audioUrl) return;
+
+        const audio = new Audio(audioUrl);
+
+        // Visual feedback
+        const avatarContainer = document.querySelector('.avatar-container');
+        if (avatarContainer) avatarContainer.classList.add('speaking');
+
+        try {
+            await audio.play();
+            return new Promise(resolve => {
+                audio.onended = () => {
+                    if (avatarContainer) avatarContainer.classList.remove('speaking');
+                    resolve();
+                };
+                // Handle errors during playback (e.g. format issues)
+                audio.onerror = () => {
+                    if (avatarContainer) avatarContainer.classList.remove('speaking');
+                    resolve();
+                }
+            });
+        } catch (e) {
+            console.error("Playback failed", e);
+            if (avatarContainer) avatarContainer.classList.remove('speaking');
+        }
+    }
+
+    /**
+     * Legacy method for backward compatibility if needed, 
+     * or for simple direct speech.
+     */
+    async speak(text) {
+        const url = await this.fetchAudio(text);
+        if (url) await this.playAudio(url);
     }
 
     /**