improved feedback for voice mode

2026-02-18 12:19:21 +01:00
parent 9143c27af8
commit 3805157f67
3 changed files with 194 additions and 60 deletions
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -13,6 +13,7 @@ class VoiceManager {
        this.onStateChange = null;
        this.browserSTTSupported = false;
        this.apiAvailable = false;
+        this.onProcessing = null; // New callback for "Transcribing..." state
    }

    async init() {
@@ -93,17 +94,18 @@ class VoiceManager {
            return;
        }

-        this.isRecording = true;
-        this.lastInputWasVoice = true;
-        if (this.onStateChange) this.onStateChange(true);
+        // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
+        // if hardware access fails or takes time.

        if (this.mode === 'api') {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                this.audioChunks = [];
-                this.mediaRecorder = new MediaRecorder(stream);
+                this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });

-                this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
+                this.mediaRecorder.ondataavailable = (e) => {
+                    if (e.data.size > 0) this.audioChunks.push(e.data);
+                };

                this.mediaRecorder.onstop = async () => {
                    stream.getTracks().forEach(t => t.stop());
@@ -111,9 +113,17 @@ class VoiceManager {
                    await this._transcribeAPI(blob);
                };

+                // Wait for recorder to actually start before updating UI
                this.mediaRecorder.start();
+
+                // Now we are truly recording
+                this.isRecording = true;
+                this.lastInputWasVoice = true;
+                if (this.onStateChange) this.onStateChange(true);
+
            } catch (e) {
-                showToast('Microphone access denied', 'error');
+                console.error('[Voice] Mic access error:', e);
+                showToast('Microphone access denied or error', 'error');
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
            }
@@ -121,6 +131,10 @@ class VoiceManager {
            if (this.recognition) {
                try {
                    this.recognition.start();
+                    // Recognition 'onstart' would be better, but this is okay for browser mode
+                    this.isRecording = true;
+                    this.lastInputWasVoice = true;
+                    if (this.onStateChange) this.onStateChange(true);
                } catch (e) {
                    this.isRecording = false;
                    if (this.onStateChange) this.onStateChange(false);
@@ -154,6 +168,8 @@ class VoiceManager {
    }

    async _transcribeAPI(blob) {
+        if (this.onProcessing) this.onProcessing(true);
+
        try {
            const formData = new FormData();
            formData.append('audio', blob, 'recording.webm');
@@ -175,22 +191,20 @@ class VoiceManager {
            showToast('Transcription network error', 'error');
        } finally {
            this.isRecording = false;
+            // Stop processing state
+            if (this.onProcessing) this.onProcessing(false);
            if (this.onStateChange) this.onStateChange(false);
        }
    }

    /**
-     * Speak text via API TTS only. No browser fallback.
-     * Strips markdown formatting before sending.
+     * Fetch TTS audio blob for text (API only).
+     * Returns audio URL or null.
     */
-    async speak(text) {
-        if (!this.apiAvailable) {
-            console.log('[Voice] API TTS not available, skipping speech');
-            return;
-        }
+    async fetchAudio(text) {
+        if (!this.apiAvailable) return null;

        const clean = VoiceManager.stripMarkdown(text);
-
        try {
            const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
                method: 'POST',
@@ -198,25 +212,55 @@ class VoiceManager {

            if (response?.ok) {
                const audioBlob = await response.blob();
-                const audioUrl = URL.createObjectURL(audioBlob);
-                const audio = new Audio(audioUrl);
-
-                // Visual feedback
-                const avatarContainer = document.querySelector('.avatar-container');
-                if (avatarContainer) avatarContainer.classList.add('speaking');
-
-                await audio.play();
-
-                return new Promise(resolve => {
-                    audio.onended = () => {
-                        if (avatarContainer) avatarContainer.classList.remove('speaking');
-                        resolve();
-                    };
-                });
+                return URL.createObjectURL(audioBlob);
+            } else {
+                const err = await response.json().catch(() => ({}));
+                console.warn('[Voice] TTS error:', err);
            }
        } catch (e) {
-            console.warn('[Voice] API TTS failed:', e);
+            console.warn('[Voice] TTS network error:', e);
        }
+        return null;
+    }
+
+    /**
+     * Play pre-fetched audio URL with visual feedback.
+     */
+    async playAudio(audioUrl) {
+        if (!audioUrl) return;
+
+        const audio = new Audio(audioUrl);
+
+        // Visual feedback
+        const avatarContainer = document.querySelector('.avatar-container');
+        if (avatarContainer) avatarContainer.classList.add('speaking');
+
+        try {
+            await audio.play();
+            return new Promise(resolve => {
+                audio.onended = () => {
+                    if (avatarContainer) avatarContainer.classList.remove('speaking');
+                    resolve();
+                };
+                // Handle errors during playback (e.g. format issues)
+                audio.onerror = () => {
+                    if (avatarContainer) avatarContainer.classList.remove('speaking');
+                    resolve();
+                }
+            });
+        } catch (e) {
+            console.error("Playback failed", e);
+            if (avatarContainer) avatarContainer.classList.remove('speaking');
+        }
+    }
+
+    /**
+     * Legacy method for backward compatibility if needed, 
+     * or for simple direct speech.
+     */
+    async speak(text) {
+        const url = await this.fetchAudio(text);
+        if (url) await this.playAudio(url);
    }

    /**