updated avatars and voice mode

2026-02-16 20:59:39 +01:00
parent 3890f0479f
commit bce4124974
5 changed files with 227 additions and 137 deletions
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -1,12 +1,11 @@
-/* FluentGerman.ai — Voice module (Web Speech API + API mode) */
+/* FluentGerman.ai — Voice module (API-only TTS, browser + API STT) */

 class VoiceManager {
    constructor() {
-        this.mode = 'browser'; // will be set from server config
+        this.mode = 'browser';
        this.recognition = null;
-        this.synthesis = window.speechSynthesis;
        this.isRecording = false;
-        this.isDisabled = false; // true when no STT method is available
+        this.isDisabled = false;
        this.lastInputWasVoice = false;
        this.mediaRecorder = null;
        this.audioChunks = [];
@@ -17,10 +16,8 @@ class VoiceManager {
    }

    async init() {
-        // Check browser STT support
        this._initBrowserSTT();

-        // Fetch voice mode from server
        try {
            const response = await api('/voice/config');
            if (response?.ok) {
@@ -34,24 +31,17 @@ class VoiceManager {
            this.mode = 'browser';
        }

-        // Determine the best available mode
+        // Determine best STT method
        if (this.mode === 'browser' && !this.browserSTTSupported) {
            if (this.apiAvailable) {
-                console.log('[Voice] Browser STT not supported, falling back to API mode');
                this.mode = 'api';
-                showToast('Using cloud voice recognition — your browser doesn\'t support built-in speech recognition.', 'info');
            } else {
-                // Neither method works
-                console.warn('[Voice] No STT method available — disabling voice input');
                this.isDisabled = true;
            }
        } else if (this.mode === 'api' && !this.apiAvailable) {
-            // Server says API but API isn't actually configured
            if (this.browserSTTSupported) {
-                console.log('[Voice] API STT not configured, using browser STT');
                this.mode = 'browser';
            } else {
-                console.warn('[Voice] No STT method available — disabling voice input');
                this.isDisabled = true;
            }
        }
@@ -62,7 +52,6 @@ class VoiceManager {
    _initBrowserSTT() {
        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
        if (!SpeechRecognition) {
-            console.warn('[Voice] SpeechRecognition API not available in this browser');
            this.browserSTTSupported = false;
            return;
        }
@@ -75,35 +64,32 @@ class VoiceManager {

        this.recognition.onresult = (event) => {
            const text = event.results[0][0].transcript;
-            console.log('[Voice] Browser STT result:', text);
+            console.log('[Voice] STT result:', text);
            this.lastInputWasVoice = true;
            if (this.onResult) this.onResult(text);
        };

        this.recognition.onend = () => {
-            console.log('[Voice] Browser STT ended');
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);
        };

        this.recognition.onerror = (event) => {
-            console.error('[Voice] Browser STT error:', event.error);
+            console.error('[Voice] STT error:', event.error);
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);

            if (event.error === 'not-allowed') {
-                showToast('Microphone access denied. Please allow microphone in browser settings.', 'error');
+                showToast('Microphone access denied. Allow it in browser settings.', 'error');
            } else if (event.error === 'no-speech') {
                showToast('No speech detected. Try again.', 'error');
            }
        };
-
-        console.log('[Voice] Browser STT initialized');
    }

    async startRecording() {
        if (this.isDisabled) {
-            showToast('Voice input requires Chrome or Edge (with HTTPS). Firefox is not supported.', 'error');
+            showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
            return;
        }

@@ -112,79 +98,66 @@ class VoiceManager {
        if (this.onStateChange) this.onStateChange(true);

        if (this.mode === 'api') {
-            // API mode — record audio via MediaRecorder, send to Whisper
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                this.audioChunks = [];
                this.mediaRecorder = new MediaRecorder(stream);

-                this.mediaRecorder.ondataavailable = (event) => {
-                    this.audioChunks.push(event.data);
-                };
+                this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);

                this.mediaRecorder.onstop = async () => {
-                    stream.getTracks().forEach(track => track.stop());
+                    stream.getTracks().forEach(t => t.stop());
                    const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
                    await this._transcribeAPI(blob);
                };

                this.mediaRecorder.start();
-                console.log('[Voice] API recording started');
            } catch (e) {
-                console.error('[Voice] Microphone access error:', e);
                showToast('Microphone access denied', 'error');
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
            }
        } else {
-            // Browser mode — use Web Speech API
            if (this.recognition) {
                try {
                    this.recognition.start();
-                    console.log('[Voice] Browser STT started');
                } catch (e) {
-                    console.error('[Voice] Failed to start recognition:', e);
                    this.isRecording = false;
                    if (this.onStateChange) this.onStateChange(false);
-                    showToast('Voice recognition failed to start. Try again.', 'error');
+                    showToast('Voice recognition failed. Try again.', 'error');
                }
-            } else {
-                console.warn('[Voice] No speech recognition available');
-                this.isRecording = false;
-                if (this.onStateChange) this.onStateChange(false);
-                showToast('Voice input requires Chrome or Edge (with HTTPS).', 'error');
            }
        }
    }

    stopRecording() {
-        console.log('[Voice] Stopping recording...');
        if (this.mode === 'api') {
-            if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
+            if (this.mediaRecorder?.state === 'recording') {
                this.mediaRecorder.stop();
            } else {
                this.isRecording = false;
                if (this.onStateChange) this.onStateChange(false);
            }
        } else {
-            if (this.recognition) {
-                try {
-                    this.recognition.stop();
-                } catch (e) {
-                    // Already stopped
-                }
-            }
+            try { this.recognition?.stop(); } catch (e) { /* already stopped */ }
            this.isRecording = false;
            if (this.onStateChange) this.onStateChange(false);
        }
    }

+    toggleRecording() {
+        if (this.isDisabled) {
+            showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
+            return;
+        }
+        this.isRecording ? this.stopRecording() : this.startRecording();
+    }
+
    async _transcribeAPI(blob) {
        try {
            const formData = new FormData();
            formData.append('audio', blob, 'recording.webm');

-            console.log('[Voice] Sending audio to API for transcription...');
            const response = await api('/voice/transcribe', {
                method: 'POST',
                body: formData,
@@ -192,14 +165,12 @@ class VoiceManager {

            if (response?.ok) {
                const data = await response.json();
-                console.log('[Voice] API transcription result:', data.text);
                this.lastInputWasVoice = true;
                if (this.onResult) this.onResult(data.text);
            } else {
-                showToast('Transcription failed. Please try again.', 'error');
+                showToast('Transcription failed.', 'error');
            }
        } catch (e) {
-            console.error('[Voice] API transcription error:', e);
            showToast('Transcription error', 'error');
        } finally {
            this.isRecording = false;
@@ -207,33 +178,20 @@ class VoiceManager {
        }
    }

+    /**
+     * Speak text via API TTS only. No browser fallback.
+     * Strips markdown formatting before sending.
+     */
    async speak(text) {
-        if (this.mode === 'api' && this.apiAvailable) {
-            return this._speakAPI(text);
-        } else {
-            return this._speakBrowser(text);
+        if (!this.apiAvailable) {
+            console.log('[Voice] API TTS not available, skipping speech');
+            return;
        }
-    }

-    _speakBrowser(text) {
-        return new Promise((resolve) => {
-            // Cancel any ongoing speech
-            this.synthesis.cancel();
-            const utterance = new SpeechSynthesisUtterance(text);
-            utterance.lang = 'de-DE';
-            utterance.rate = 0.95;
-            utterance.onend = resolve;
-            utterance.onerror = () => {
-                console.warn('[Voice] Browser TTS error');
-                resolve();
-            };
-            this.synthesis.speak(utterance);
-        });
-    }
+        const clean = VoiceManager.stripMarkdown(text);

-    async _speakAPI(text) {
        try {
-            const response = await api(`/voice/synthesize?text=${encodeURIComponent(text)}`, {
+            const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
                method: 'POST',
            });

@@ -242,26 +200,33 @@ class VoiceManager {
                const audioUrl = URL.createObjectURL(audioBlob);
                const audio = new Audio(audioUrl);
                await audio.play();
-                return new Promise(resolve => {
-                    audio.onended = resolve;
-                });
+                return new Promise(resolve => { audio.onended = resolve; });
            }
        } catch (e) {
-            console.warn('[Voice] API TTS failed, falling back to browser');
+            console.warn('[Voice] API TTS failed:', e);
        }
-        // Fallback to browser TTS
-        return this._speakBrowser(text);
    }

-    toggleRecording() {
-        if (this.isDisabled) {
-            showToast('Voice input requires Chrome or Edge (with HTTPS). Firefox is not supported.', 'error');
-            return;
-        }
-        if (this.isRecording) {
-            this.stopRecording();
-        } else {
-            this.startRecording();
-        }
+    /**
+     * Strip markdown formatting from text so TTS reads naturally.
+     */
+    static stripMarkdown(text) {
+        return text
+            .replace(/```[\s\S]*?```/g, '')        // code blocks
+            .replace(/`([^`]+)`/g, '$1')            // inline code
+            .replace(/#{1,6}\s+/g, '')              // headings
+            .replace(/\*\*([^*]+)\*\*/g, '$1')      // bold
+            .replace(/\*([^*]+)\*/g, '$1')          // italic
+            .replace(/__([^_]+)__/g, '$1')          // bold alt
+            .replace(/_([^_]+)_/g, '$1')            // italic alt
+            .replace(/~~([^~]+)~~/g, '$1')          // strikethrough
+            .replace(/^\s*[-*+]\s+/gm, '')          // unordered lists
+            .replace(/^\s*\d+\.\s+/gm, '')          // ordered lists
+            .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')// links
+            .replace(/!\[([^\]]*)\]\([^)]+\)/g, '') // images
+            .replace(/>\s+/g, '')                   // blockquotes
+            .replace(/\n{2,}/g, '. ')               // paragraph breaks → pause
+            .replace(/\n/g, ' ')                    // newlines → space
+            .trim();
    }
 }