From a708f84a07a4a92de34d60834db8c5db586fb166 Mon Sep 17 00:00:00 2001
From: Dennis Thiessen <dennis@thiessen.io>
Date: Mon, 16 Feb 2026 20:23:46 +0100
Subject: [PATCH] updated voice, avatar and frontend to reflect changes

---
 .gitea/workflows/workflow.yml |  8 ++++++
 backend/app/routers/voice.py  | 12 ++++++++-
 backend/app/schemas.py        |  1 +
 frontend/js/chat.js           |  8 ++++++
 frontend/js/voice.js          | 49 ++++++++++++++++++++++++++---------
 5 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/.gitea/workflows/workflow.yml b/.gitea/workflows/workflow.yml
index b4a692d..05ab983 100644
--- a/.gitea/workflows/workflow.yml
+++ b/.gitea/workflows/workflow.yml
@@ -81,6 +81,14 @@ jobs:
             # Restart service
             #sudo systemctl restart fluentgerman
             echo "✓ FluentGerman.ai deployed"
+
+
+            # Run uvicorn
+            cd ${DEPLOY_PATH}/backend
+            source venv/bin/activate
+            uvicorn app.main:app --reload --host 0.0.0.0 --port 8999
+            echo "✓ FluentGerman.ai running"
+            
           REMOTE_SCRIPT
 
           # Cleanup
diff --git a/backend/app/routers/voice.py b/backend/app/routers/voice.py
index 8da4d8c..68c7dea 100644
--- a/backend/app/routers/voice.py
+++ b/backend/app/routers/voice.py
@@ -18,7 +18,17 @@ router = APIRouter(prefix="/api/voice", tags=["voice"])
 @router.get("/config", response_model=VoiceConfigOut)
 async def voice_config(user: User = Depends(get_current_user)):
     """Return current voice mode so frontend knows whether to use browser or API."""
-    return VoiceConfigOut(voice_mode=get_settings().voice_mode)
+    settings = get_settings()
+    # API STT (Whisper) works with OpenAI-compatible providers
+    api_available = bool(
+        settings.voice_mode == "api"
+        and settings.llm_api_key
+        and settings.llm_provider in ("openai",)
+    )
+    return VoiceConfigOut(
+        voice_mode=settings.voice_mode,
+        voice_api_available=api_available,
+    )
 
 
 @router.post("/transcribe")
diff --git a/backend/app/schemas.py b/backend/app/schemas.py
index 7288501..0541a59 100644
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -90,3 +90,4 @@ class VoiceInstructionRequest(BaseModel):
 
 class VoiceConfigOut(BaseModel):
     voice_mode: str  # "api" | "browser"
+    voice_api_available: bool = False  # True if API STT (Whisper) is configured
diff --git a/frontend/js/chat.js b/frontend/js/chat.js
index 2e5989c..43eede2 100644
--- a/frontend/js/chat.js
+++ b/frontend/js/chat.js
@@ -81,6 +81,14 @@ document.addEventListener('DOMContentLoaded', async () => {
     const voice = new VoiceManager();
     await voice.init();
 
+    // Disable mic button if no STT method is available
+    if (voice.isDisabled) {
+        voiceBtn.disabled = true;
+        voiceBtn.title = 'Voice input requires Chrome or Edge (with HTTPS)';
+        voiceBtn.style.opacity = '0.35';
+        voiceBtn.style.cursor = 'not-allowed';
+    }
+
     voice.onResult = (text) => {
         inputEl.value = text;
         voice.lastInputWasVoice = true;
diff --git a/frontend/js/voice.js b/frontend/js/voice.js
index 4c2526d..48df44a 100644
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -6,12 +6,14 @@ class VoiceManager {
         this.recognition = null;
         this.synthesis = window.speechSynthesis;
         this.isRecording = false;
-        this.lastInputWasVoice = false; // tracks if last message was spoken
+        this.isDisabled = false; // true when no STT method is available
+        this.lastInputWasVoice = false;
         this.mediaRecorder = null;
         this.audioChunks = [];
         this.onResult = null;
         this.onStateChange = null;
         this.browserSTTSupported = false;
+        this.apiAvailable = false;
     }
 
     async init() {
@@ -24,21 +26,37 @@ class VoiceManager {
             if (response?.ok) {
                 const config = await response.json();
                 this.mode = config.voice_mode;
-                console.log('[Voice] Server mode:', this.mode);
+                this.apiAvailable = config.voice_api_available || false;
+                console.log('[Voice] Server mode:', this.mode, '| API available:', this.apiAvailable);
             }
         } catch (e) {
             console.warn('[Voice] Could not fetch config, using browser mode');
             this.mode = 'browser';
         }
 
-        // Auto-fallback: if server says "browser" but browser doesn't support STT, use API
+        // Determine the best available mode
         if (this.mode === 'browser' && !this.browserSTTSupported) {
-            console.warn('[Voice] Browser STT not supported, falling back to API mode');
-            this.mode = 'api';
-            showToast('Using cloud voice recognition — your browser doesn\'t support built-in speech recognition.', 'info');
+            if (this.apiAvailable) {
+                console.log('[Voice] Browser STT not supported, falling back to API mode');
+                this.mode = 'api';
+                showToast('Using cloud voice recognition — your browser doesn\'t support built-in speech recognition.', 'info');
+            } else {
+                // Neither method works
+                console.warn('[Voice] No STT method available — disabling voice input');
+                this.isDisabled = true;
+            }
+        } else if (this.mode === 'api' && !this.apiAvailable) {
+            // Server says API but API isn't actually configured
+            if (this.browserSTTSupported) {
+                console.log('[Voice] API STT not configured, using browser STT');
+                this.mode = 'browser';
+            } else {
+                console.warn('[Voice] No STT method available — disabling voice input');
+                this.isDisabled = true;
+            }
         }
 
-        console.log('[Voice] Active mode:', this.mode);
+        console.log('[Voice] Final mode:', this.isDisabled ? 'DISABLED' : this.mode);
     }
 
     _initBrowserSTT() {
@@ -84,6 +102,11 @@ class VoiceManager {
     }
 
     async startRecording() {
+        if (this.isDisabled) {
+            showToast('Voice input requires Chrome or Edge (with HTTPS). Firefox is not supported.', 'error');
+            return;
+        }
+
         this.isRecording = true;
         this.lastInputWasVoice = true;
         if (this.onStateChange) this.onStateChange(true);
@@ -126,12 +149,10 @@ class VoiceManager {
                     showToast('Voice recognition failed to start. Try again.', 'error');
                 }
             } else {
-                // Shouldn't happen after init() fallback, but safety net
-                console.warn('[Voice] No speech recognition available, switching to API');
-                this.mode = 'api';
+                console.warn('[Voice] No speech recognition available');
                 this.isRecording = false;
                 if (this.onStateChange) this.onStateChange(false);
-                showToast('Switched to cloud voice recognition. Please try again.', 'info');
+                showToast('Voice input requires Chrome or Edge (with HTTPS).', 'error');
             }
         }
     }
@@ -187,7 +208,7 @@ class VoiceManager {
     }
 
     async speak(text) {
-        if (this.mode === 'api') {
+        if (this.mode === 'api' && this.apiAvailable) {
             return this._speakAPI(text);
         } else {
             return this._speakBrowser(text);
@@ -233,6 +254,10 @@ class VoiceManager {
     }
 
     toggleRecording() {
+        if (this.isDisabled) {
+            showToast('Voice input requires Chrome or Edge (with HTTPS). Firefox is not supported.', 'error');
+            return;
+        }
         if (this.isRecording) {
             this.stopRecording();
         } else {