added voice functionality and increased avatar size
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 53s

This commit is contained in:
2026-02-18 10:11:13 +01:00
parent 6df7b17261
commit be366777d4
7 changed files with 59 additions and 11 deletions

View File

@@ -18,6 +18,9 @@ LLM_PROVIDER=gemini
LLM_API_KEY=your-api-key-here LLM_API_KEY=your-api-key-here
LLM_MODEL=gemini-2.0-flash LLM_MODEL=gemini-2.0-flash
# Optional: Dedicated OpenAI Key for Voice (if LLM_PROVIDER is not openai)
OPENAI_API_KEY=sk-...
# Voice mode: "api" (OpenAI Whisper/TTS) or "browser" (Web Speech API fallback) # Voice mode: "api" (OpenAI Whisper/TTS) or "browser" (Web Speech API fallback)
VOICE_MODE=browser VOICE_MODE=browser
TTS_MODEL=tts-1 TTS_MODEL=tts-1

View File

@@ -27,6 +27,9 @@ class Settings(BaseSettings):
llm_provider: str = "openai" # used by litellm routing llm_provider: str = "openai" # used by litellm routing
llm_api_key: str = "" llm_api_key: str = ""
llm_model: str = "gpt-4o-mini" llm_model: str = "gpt-4o-mini"
# OpenAI API Key (specifically for Voice/TTS if LLM_PROVIDER is different)
openai_api_key: str = ""
# Voice feature flag: "api" = LLM provider Whisper/TTS, "browser" = Web Speech API # Voice feature flag: "api" = LLM provider Whisper/TTS, "browser" = Web Speech API
voice_mode: Literal["api", "browser"] = "api" voice_mode: Literal["api", "browser"] = "api"

View File

@@ -20,11 +20,10 @@ async def voice_config(user: User = Depends(get_current_user)):
"""Return current voice mode so frontend knows whether to use browser or API.""" """Return current voice mode so frontend knows whether to use browser or API."""
settings = get_settings() settings = get_settings()
# API STT (Whisper) works with OpenAI-compatible providers # API STT (Whisper) works with OpenAI-compatible providers
api_available = bool( # Check if we have a dedicated voice key OR a generic LLM key for OpenAI
settings.voice_mode == "api" has_key = bool(settings.openai_api_key or (settings.llm_api_key and settings.llm_provider == "openai"))
and settings.llm_api_key
and settings.llm_provider in ("openai",) api_available = bool(settings.voice_mode == "api" and has_key)
)
return VoiceConfigOut( return VoiceConfigOut(
voice_mode=settings.voice_mode, voice_mode=settings.voice_mode,
voice_api_available=api_available, voice_api_available=api_available,

View File

@@ -10,7 +10,9 @@ from app.config import get_settings
async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str: async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
"""Transcribe audio to text using OpenAI Whisper API.""" """Transcribe audio to text using OpenAI Whisper API."""
settings = get_settings() settings = get_settings()
client = openai.AsyncOpenAI(api_key=settings.llm_api_key) # Use dedicated OpenAI key if available, otherwise fallback to LLM key
api_key = settings.openai_api_key or settings.llm_api_key
client = openai.AsyncOpenAI(api_key=api_key)
audio_file = io.BytesIO(audio_bytes) audio_file = io.BytesIO(audio_bytes)
audio_file.name = filename audio_file.name = filename
@@ -25,7 +27,9 @@ async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
async def synthesize(text: str) -> bytes: async def synthesize(text: str) -> bytes:
"""Synthesize text to speech using OpenAI TTS API.""" """Synthesize text to speech using OpenAI TTS API."""
settings = get_settings() settings = get_settings()
client = openai.AsyncOpenAI(api_key=settings.llm_api_key) # Use dedicated OpenAI key if available, otherwise fallback to LLM key
api_key = settings.openai_api_key or settings.llm_api_key
client = openai.AsyncOpenAI(api_key=api_key)
response = await client.audio.speech.create( response = await client.audio.speech.create(
model=settings.tts_model, model=settings.tts_model,

View File

@@ -845,9 +845,20 @@ tr:hover td {
/* ── Avatar ───────────────────────────────────────────────────────── */ /* ── Avatar ───────────────────────────────────────────────────────── */
.avatar-container { .avatar-container {
position: relative; position: relative;
width: 72px; width: 120px;
height: 72px; height: 120px;
flex-shrink: 0; flex-shrink: 0;
transition: transform 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
}
.avatar-container.speaking {
transform: scale(1.05);
}
.avatar-container.speaking .avatar-ring {
border-color: var(--accent);
box-shadow: 0 0 20px var(--accent-glow);
animation: avatarSpeakPulse 1.2s infinite;
} }
.avatar-ring { .avatar-ring {
@@ -1199,4 +1210,21 @@ tr:hover td {
color: var(--text-muted); color: var(--text-muted);
opacity: 0.6; opacity: 0.6;
font-family: monospace; font-family: monospace;
}
@keyframes avatarSpeakPulse {
0% {
transform: scale(1);
opacity: 0.8;
}
50% {
transform: scale(1.15);
opacity: 0.4;
}
100% {
transform: scale(1);
opacity: 0.8;
}
} }

View File

@@ -30,7 +30,7 @@
<button type="submit" class="btn btn-primary btn-block">Sign In</button> <button type="submit" class="btn btn-primary btn-block">Sign In</button>
</form> </form>
</div> </div>
<span class="version-label">v0.1.1</span> <span class="version-label">v0.2.0</span>
</div> </div>
<script src="/js/api.js"></script> <script src="/js/api.js"></script>

View File

@@ -199,8 +199,19 @@ class VoiceManager {
const audioBlob = await response.blob(); const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob); const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl); const audio = new Audio(audioUrl);
// Visual feedback
const avatarContainer = document.querySelector('.avatar-container');
if (avatarContainer) avatarContainer.classList.add('speaking');
await audio.play(); await audio.play();
return new Promise(resolve => { audio.onended = resolve; });
return new Promise(resolve => {
audio.onended = () => {
if (avatarContainer) avatarContainer.classList.remove('speaking');
resolve();
};
});
} }
} catch (e) { } catch (e) {
console.warn('[Voice] API TTS failed:', e); console.warn('[Voice] API TTS failed:', e);