From be366777d45ca15c97a0bd6603b639b7e6ee7082 Mon Sep 17 00:00:00 2001
From: Dennis Thiessen <dennis@thiessen.io>
Date: Wed, 18 Feb 2026 10:11:13 +0100
Subject: [PATCH] added voice functionality and increased avatar size

---
 backend/.env.example                  |  3 +++
 backend/app/config.py                 |  3 +++
 backend/app/routers/voice.py          |  9 ++++----
 backend/app/services/voice_service.py |  8 +++++--
 frontend/css/style.css                | 32 +++++++++++++++++++++++++--
 frontend/index.html                   |  2 +-
 frontend/js/voice.js                  | 13 ++++++++++-
 7 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/backend/.env.example b/backend/.env.example
index bad9067..71080a2 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -18,6 +18,9 @@ LLM_PROVIDER=gemini
 LLM_API_KEY=your-api-key-here
 LLM_MODEL=gemini-2.0-flash
 
+# Optional: Dedicated OpenAI Key for Voice (if LLM_PROVIDER is not openai)
+OPENAI_API_KEY=sk-...
+
 # Voice mode: "api" (OpenAI Whisper/TTS) or "browser" (Web Speech API fallback)
 VOICE_MODE=browser
 TTS_MODEL=tts-1
diff --git a/backend/app/config.py b/backend/app/config.py
index b0afd81..ab42101 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -27,6 +27,9 @@ class Settings(BaseSettings):
     llm_provider: str = "openai"  # used by litellm routing
     llm_api_key: str = ""
     llm_model: str = "gpt-4o-mini"
+    
+    # OpenAI API Key (specifically for Voice/TTS if LLM_PROVIDER is different)
+    openai_api_key: str = ""
 
     # Voice feature flag: "api" = LLM provider Whisper/TTS, "browser" = Web Speech API
     voice_mode: Literal["api", "browser"] = "api"
diff --git a/backend/app/routers/voice.py b/backend/app/routers/voice.py
index 68c7dea..b21dd95 100644
--- a/backend/app/routers/voice.py
+++ b/backend/app/routers/voice.py
@@ -20,11 +20,10 @@ async def voice_config(user: User = Depends(get_current_user)):
     """Return current voice mode so frontend knows whether to use browser or API."""
     settings = get_settings()
     # API STT (Whisper) works with OpenAI-compatible providers
-    api_available = bool(
-        settings.voice_mode == "api"
-        and settings.llm_api_key
-        and settings.llm_provider in ("openai",)
-    )
+    # Check if we have a dedicated voice key OR a generic LLM key for OpenAI
+    has_key = bool(settings.openai_api_key or (settings.llm_api_key and settings.llm_provider == "openai"))
+    
+    api_available = bool(settings.voice_mode == "api" and has_key)
     return VoiceConfigOut(
         voice_mode=settings.voice_mode,
         voice_api_available=api_available,
diff --git a/backend/app/services/voice_service.py b/backend/app/services/voice_service.py
index e555bae..c000cca 100644
--- a/backend/app/services/voice_service.py
+++ b/backend/app/services/voice_service.py
@@ -10,7 +10,9 @@ from app.config import get_settings
 async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
     """Transcribe audio to text using OpenAI Whisper API."""
     settings = get_settings()
-    client = openai.AsyncOpenAI(api_key=settings.llm_api_key)
+    # Use dedicated OpenAI key if available, otherwise fallback to LLM key
+    api_key = settings.openai_api_key or settings.llm_api_key
+    client = openai.AsyncOpenAI(api_key=api_key)
 
     audio_file = io.BytesIO(audio_bytes)
     audio_file.name = filename
@@ -25,7 +27,9 @@ async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
 async def synthesize(text: str) -> bytes:
     """Synthesize text to speech using OpenAI TTS API."""
     settings = get_settings()
-    client = openai.AsyncOpenAI(api_key=settings.llm_api_key)
+    # Use dedicated OpenAI key if available, otherwise fallback to LLM key
+    api_key = settings.openai_api_key or settings.llm_api_key
+    client = openai.AsyncOpenAI(api_key=api_key)
 
     response = await client.audio.speech.create(
         model=settings.tts_model,
diff --git a/frontend/css/style.css b/frontend/css/style.css
index d8d7ae0..cd79eb8 100644
--- a/frontend/css/style.css
+++ b/frontend/css/style.css
@@ -845,9 +845,20 @@ tr:hover td {
 /* ── Avatar ───────────────────────────────────────────────────────── */
 .avatar-container {
   position: relative;
-  width: 72px;
-  height: 72px;
+  width: 120px;
+  height: 120px;
   flex-shrink: 0;
+  transition: transform 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
+}
+
+.avatar-container.speaking {
+  transform: scale(1.05);
+}
+
+.avatar-container.speaking .avatar-ring {
+  border-color: var(--accent);
+  box-shadow: 0 0 20px var(--accent-glow);
+  animation: avatarSpeakPulse 1.2s infinite;
 }
 
 .avatar-ring {
@@ -1199,4 +1210,21 @@ tr:hover td {
   color: var(--text-muted);
   opacity: 0.6;
   font-family: monospace;
+}
+
+@keyframes avatarSpeakPulse {
+  0% {
+    transform: scale(1);
+    opacity: 0.8;
+  }
+
+  50% {
+    transform: scale(1.15);
+    opacity: 0.4;
+  }
+
+  100% {
+    transform: scale(1);
+    opacity: 0.8;
+  }
 }
\ No newline at end of file
diff --git a/frontend/index.html b/frontend/index.html
index a481406..19123b3 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -30,7 +30,7 @@
         <button type="submit" class="btn btn-primary btn-block">Sign In</button>
       </form>
     </div>
-    <span class="version-label">v0.1.1</span>
+    <span class="version-label">v0.2.0</span>
   </div>
 
   <script src="/js/api.js"></script>
diff --git a/frontend/js/voice.js b/frontend/js/voice.js
index 12ca8f3..758c10d 100644
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -199,8 +199,19 @@ class VoiceManager {
                 const audioBlob = await response.blob();
                 const audioUrl = URL.createObjectURL(audioBlob);
                 const audio = new Audio(audioUrl);
+
+                // Visual feedback
+                const avatarContainer = document.querySelector('.avatar-container');
+                if (avatarContainer) avatarContainer.classList.add('speaking');
+
                 await audio.play();
-                return new Promise(resolve => { audio.onended = resolve; });
+
+                return new Promise(resolve => {
+                    audio.onended = () => {
+                        if (avatarContainer) avatarContainer.classList.remove('speaking');
+                        resolve();
+                    };
+                });
             }
         } catch (e) {
             console.warn('[Voice] API TTS failed:', e);