initial commit

2026-02-12 18:45:10 +01:00
commit be7bbba456
42 changed files with 3767 additions and 0 deletions
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -0,0 +1,230 @@
+/* FluentGerman.ai — Voice module (Web Speech API + API mode) */
+
+class VoiceManager {
+    constructor() {
+        this.mode = 'browser'; // will be set from server config
+        this.recognition = null;
+        this.synthesis = window.speechSynthesis;
+        this.isRecording = false;
+        this.lastInputWasVoice = false; // tracks if last message was spoken
+        this.mediaRecorder = null;
+        this.audioChunks = [];
+        this.onResult = null;
+        this.onStateChange = null;
+    }
+
+    async init() {
+        // Always init browser STT as fallback
+        this._initBrowserSTT();
+
+        // Fetch voice mode from server
+        try {
+            const response = await api('/voice/config');
+            if (response?.ok) {
+                const config = await response.json();
+                this.mode = config.voice_mode;
+                console.log('[Voice] Mode:', this.mode);
+            }
+        } catch (e) {
+            console.warn('[Voice] Could not fetch config, using browser mode');
+            this.mode = 'browser';
+        }
+    }
+
+    _initBrowserSTT() {
+        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+        if (!SpeechRecognition) {
+            console.warn('[Voice] Speech recognition not supported in this browser');
+            return;
+        }
+
+        this.recognition = new SpeechRecognition();
+        this.recognition.continuous = false;
+        this.recognition.interimResults = false;
+        this.recognition.lang = 'de-DE';
+
+        this.recognition.onresult = (event) => {
+            const text = event.results[0][0].transcript;
+            console.log('[Voice] Browser STT result:', text);
+            this.lastInputWasVoice = true;
+            if (this.onResult) this.onResult(text);
+        };
+
+        this.recognition.onend = () => {
+            console.log('[Voice] Browser STT ended');
+            this.isRecording = false;
+            if (this.onStateChange) this.onStateChange(false);
+        };
+
+        this.recognition.onerror = (event) => {
+            console.error('[Voice] Browser STT error:', event.error);
+            this.isRecording = false;
+            if (this.onStateChange) this.onStateChange(false);
+
+            if (event.error === 'not-allowed') {
+                showToast('Microphone access denied. Please allow microphone in browser settings.', 'error');
+            } else if (event.error === 'no-speech') {
+                showToast('No speech detected. Try again.', 'error');
+            }
+        };
+
+        console.log('[Voice] Browser STT initialized');
+    }
+
+    async startRecording() {
+        this.isRecording = true;
+        this.lastInputWasVoice = true;
+        if (this.onStateChange) this.onStateChange(true);
+
+        if (this.mode === 'api') {
+            // API mode — record audio via MediaRecorder, send to Whisper
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                this.audioChunks = [];
+                this.mediaRecorder = new MediaRecorder(stream);
+
+                this.mediaRecorder.ondataavailable = (event) => {
+                    this.audioChunks.push(event.data);
+                };
+
+                this.mediaRecorder.onstop = async () => {
+                    stream.getTracks().forEach(track => track.stop());
+                    const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
+                    await this._transcribeAPI(blob);
+                };
+
+                this.mediaRecorder.start();
+                console.log('[Voice] API recording started');
+            } catch (e) {
+                console.error('[Voice] Microphone access error:', e);
+                showToast('Microphone access denied', 'error');
+                this.isRecording = false;
+                if (this.onStateChange) this.onStateChange(false);
+            }
+        } else {
+            // Browser mode — use Web Speech API
+            if (this.recognition) {
+                try {
+                    this.recognition.start();
+                    console.log('[Voice] Browser STT started');
+                } catch (e) {
+                    console.error('[Voice] Failed to start recognition:', e);
+                    this.isRecording = false;
+                    if (this.onStateChange) this.onStateChange(false);
+                    showToast('Voice recognition failed to start. Try again.', 'error');
+                }
+            } else {
+                console.warn('[Voice] No speech recognition available');
+                showToast('Speech recognition not supported in this browser', 'error');
+                this.isRecording = false;
+                if (this.onStateChange) this.onStateChange(false);
+            }
+        }
+    }
+
+    stopRecording() {
+        console.log('[Voice] Stopping recording...');
+        if (this.mode === 'api') {
+            if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
+                this.mediaRecorder.stop();
+            } else {
+                this.isRecording = false;
+                if (this.onStateChange) this.onStateChange(false);
+            }
+        } else {
+            if (this.recognition) {
+                try {
+                    this.recognition.stop();
+                } catch (e) {
+                    // Already stopped
+                }
+            }
+            this.isRecording = false;
+            if (this.onStateChange) this.onStateChange(false);
+        }
+    }
+
+    async _transcribeAPI(blob) {
+        try {
+            const formData = new FormData();
+            formData.append('audio', blob, 'recording.webm');
+
+            console.log('[Voice] Sending audio to API for transcription...');
+            const response = await api('/voice/transcribe', {
+                method: 'POST',
+                body: formData,
+            });
+
+            if (response?.ok) {
+                const data = await response.json();
+                console.log('[Voice] API transcription result:', data.text);
+                this.lastInputWasVoice = true;
+                if (this.onResult) this.onResult(data.text);
+            } else {
+                showToast('Transcription failed. Falling back to browser voice.', 'error');
+                // Fallback: switch to browser mode for this session
+                this.mode = 'browser';
+            }
+        } catch (e) {
+            console.error('[Voice] API transcription error:', e);
+            showToast('Transcription error', 'error');
+        } finally {
+            this.isRecording = false;
+            if (this.onStateChange) this.onStateChange(false);
+        }
+    }
+
+    async speak(text) {
+        if (this.mode === 'api') {
+            return this._speakAPI(text);
+        } else {
+            return this._speakBrowser(text);
+        }
+    }
+
+    _speakBrowser(text) {
+        return new Promise((resolve) => {
+            // Cancel any ongoing speech
+            this.synthesis.cancel();
+            const utterance = new SpeechSynthesisUtterance(text);
+            utterance.lang = 'de-DE';
+            utterance.rate = 0.95;
+            utterance.onend = resolve;
+            utterance.onerror = () => {
+                console.warn('[Voice] Browser TTS error');
+                resolve();
+            };
+            this.synthesis.speak(utterance);
+        });
+    }
+
+    async _speakAPI(text) {
+        try {
+            const response = await api(`/voice/synthesize?text=${encodeURIComponent(text)}`, {
+                method: 'POST',
+            });
+
+            if (response?.ok) {
+                const audioBlob = await response.blob();
+                const audioUrl = URL.createObjectURL(audioBlob);
+                const audio = new Audio(audioUrl);
+                await audio.play();
+                return new Promise(resolve => {
+                    audio.onended = resolve;
+                });
+            }
+        } catch (e) {
+            console.warn('[Voice] API TTS failed, falling back to browser');
+        }
+        // Fallback to browser TTS
+        return this._speakBrowser(text);
+    }
+
+    toggleRecording() {
+        if (this.isRecording) {
+            this.stopRecording();
+        } else {
+            this.startRecording();
+        }
+    }
+}