improved feedback for voice mode

2026-02-18 12:19:21 +01:00
parent 9143c27af8
commit 3805157f67
3 changed files with 194 additions and 60 deletions
@@ -1228,3 +1228,36 @@ tr:hover td {
    opacity: 0.8;
  }
 }
 /* ── Thinking dots ────────────────────────────────────────────────── */
 .message-thinking {
  color: var(--text-muted);
  font-style: italic;
  font-size: 0.9em;
 }
 .thinking-dots span {
  animation: thinkingDots 1.4s infinite ease-in-out both;
  margin-left: 2px;
 }
 .thinking-dots span:nth-child(1) {
  animation-delay: -0.32s;
 }
 .thinking-dots span:nth-child(2) {
  animation-delay: -0.16s;
 }
@keyframes thinkingDots {
  0%,
  80%,
  100% {
    transform: scale(0);
  }
  40% {
    transform: scale(1);
  }
 }
@@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => {
        micBtn.classList.toggle('recording', recording);
    };
    // Show "Transcribing..." state
    voice.onProcessing = (processing) => {
        if (processing) {
            inputEl.placeholder = 'Transcribing...';
            inputEl.disabled = true;
        } else {
            inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...';
            inputEl.disabled = false;
            inputEl.focus();
        }
    };
    micBtn.addEventListener('click', () => voice.toggleRecording());
    // ── Chat ──────────────────────────────────────────────────────────
@@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => {
        div.className = `message message-${role}`;
        if (role === 'assistant') {
            // content might be empty initially for thinking state
            if (content === 'Thinking...') {
                div.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
                div.classList.add('message-thinking');
            } else {
                div.innerHTML = renderMarkdown(content);
            }
        } else {
            div.textContent = content;
        }
@@ -168,6 +186,49 @@ document.addEventListener('DOMContentLoaded', async () => {
            const reader = response.body.getReader();
            const decoder = new TextDecoder();
            // Special handling for Voice Mode: Buffer text, wait for TTS, then show & play
            if (voiceModeOn) {
                // Show thinking state
                assistantEl.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
                assistantEl.classList.add('message-thinking');
                while (true) {
                    const { done, value } = await reader.read();
                    if (done) break;
                    const chunk = decoder.decode(value);
                    const lines = chunk.split('\n');
                    for (const line of lines) {
                        if (line.startsWith('data: ')) {
                            const data = line.slice(6).trim();
                            if (data === '[DONE]') break;
                            try {
                                const parsed = JSON.parse(data);
                                if (parsed.token) fullResponse += parsed.token;
                                if (parsed.error) showToast(parsed.error, 'error');
                            } catch (e) { }
                        }
                    }
                }
                // Text complete. Now fetch audio.
                if (fullResponse) {
                    history.push({ role: 'assistant', content: fullResponse });
                    // Keep "Thinking..." until audio is ready or failed
                    const audioUrl = await voice.fetchAudio(fullResponse);
                    // Visual update: Remove thinking, show text
                    assistantEl.classList.remove('message-thinking');
                    assistantEl.innerHTML = renderMarkdown(fullResponse);
                    messagesEl.scrollTop = messagesEl.scrollHeight;
                    if (audioUrl) {
                        await voice.playAudio(audioUrl);
                    }
                }
            } else {
                // Normal Text Mode: Stream directly to UI
                while (true) {
                    const { done, value } = await reader.read();
                    if (done) break;
@@ -199,10 +260,6 @@ document.addEventListener('DOMContentLoaded', async () => {
                if (fullResponse) {
                    history.push({ role: 'assistant', content: fullResponse });
                // Auto-speak if voice mode is ON (regardless of input method)
                if (voiceModeOn) {
                    await voice.speak(fullResponse);
                }
            }
        } catch (e) {
@@ -13,6 +13,7 @@ class VoiceManager {
        this.onStateChange = null;
        this.browserSTTSupported = false;
        this.apiAvailable = false;
        this.onProcessing = null; // New callback for "Transcribing..." state
    }
    async init() {
@@ -93,17 +94,18 @@ class VoiceManager {
            return;
        }
-        this.isRecording = true;
+        // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
-        this.lastInputWasVoice = true;
+        // if hardware access fails or takes time.
        if (this.onStateChange) this.onStateChange(true);
        if (this.mode === 'api') {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                this.audioChunks = [];
-                this.mediaRecorder = new MediaRecorder(stream);
+                this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
-                this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
+                this.mediaRecorder.ondataavailable = (e) => {
                    if (e.data.size > 0) this.audioChunks.push(e.data);
                };
                this.mediaRecorder.onstop = async () => {
                    stream.getTracks().forEach(t => t.stop());
@@ -111,9 +113,17 @@ class VoiceManager {
                    await this._transcribeAPI(blob);
                };
                // Wait for recorder to actually start before updating UI
                this.mediaRecorder.start();
                // Now we are truly recording
                this.isRecording = true;
                this.lastInputWasVoice = true;
                if (this.onStateChange) this.onStateChange(true);
            } catch (e) {
-                showToast('Microphone access denied', 'error');
+                console.error('[Voice] Mic access error:', e);
                showToast('Microphone access denied or error', 'error');
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
            }
@@ -121,6 +131,10 @@ class VoiceManager {
            if (this.recognition) {
                try {
                    this.recognition.start();
                    // Recognition 'onstart' would be better, but this is okay for browser mode
                    this.isRecording = true;
                    this.lastInputWasVoice = true;
                    if (this.onStateChange) this.onStateChange(true);
                } catch (e) {
                    this.isRecording = false;
                    if (this.onStateChange) this.onStateChange(false);
@@ -154,6 +168,8 @@ class VoiceManager {
    }
    async _transcribeAPI(blob) {
        if (this.onProcessing) this.onProcessing(true);
        try {
            const formData = new FormData();
            formData.append('audio', blob, 'recording.webm');
@@ -175,22 +191,20 @@ class VoiceManager {
            showToast('Transcription network error', 'error');
        } finally {
            this.isRecording = false;
            // Stop processing state
            if (this.onProcessing) this.onProcessing(false);
            if (this.onStateChange) this.onStateChange(false);
        }
    }
    /**
-     * Speak text via API TTS only. No browser fallback.
+     * Fetch TTS audio blob for text (API only).
-     * Strips markdown formatting before sending.
+     * Returns audio URL or null.
     */
-    async speak(text) {
+    async fetchAudio(text) {
-        if (!this.apiAvailable) {
+        if (!this.apiAvailable) return null;
            console.log('[Voice] API TTS not available, skipping speech');
            return;
        }
        const clean = VoiceManager.stripMarkdown(text);
        try {
            const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
                method: 'POST',
@@ -198,27 +212,57 @@ class VoiceManager {
            if (response?.ok) {
                const audioBlob = await response.blob();
-                const audioUrl = URL.createObjectURL(audioBlob);
+                return URL.createObjectURL(audioBlob);
            } else {
                const err = await response.json().catch(() => ({}));
                console.warn('[Voice] TTS error:', err);
            }
        } catch (e) {
            console.warn('[Voice] TTS network error:', e);
        }
        return null;
    }
    /**
     * Play pre-fetched audio URL with visual feedback.
     */
    async playAudio(audioUrl) {
        if (!audioUrl) return;
        const audio = new Audio(audioUrl);
        // Visual feedback
        const avatarContainer = document.querySelector('.avatar-container');
        if (avatarContainer) avatarContainer.classList.add('speaking');
        try {
            await audio.play();
            return new Promise(resolve => {
                audio.onended = () => {
                    if (avatarContainer) avatarContainer.classList.remove('speaking');
                    resolve();
                };
                // Handle errors during playback (e.g. format issues)
                audio.onerror = () => {
                    if (avatarContainer) avatarContainer.classList.remove('speaking');
                    resolve();
                }
            });
            }
        } catch (e) {
-            console.warn('[Voice] API TTS failed:', e);
+            console.error("Playback failed", e);
            if (avatarContainer) avatarContainer.classList.remove('speaking');
        }
    }
    /**
     * Legacy method for backward compatibility if needed, 
     * or for simple direct speech.
     */
    async speak(text) {
        const url = await this.fetchAudio(text);
        if (url) await this.playAudio(url);
    }
    /**
     * Strip markdown formatting from text so TTS reads naturally.
     */