added voice functionality and increased avatar size
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 53s
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 53s
This commit is contained in:
@@ -18,6 +18,9 @@ LLM_PROVIDER=gemini
|
|||||||
LLM_API_KEY=your-api-key-here
|
LLM_API_KEY=your-api-key-here
|
||||||
LLM_MODEL=gemini-2.0-flash
|
LLM_MODEL=gemini-2.0-flash
|
||||||
|
|
||||||
|
# Optional: Dedicated OpenAI Key for Voice (if LLM_PROVIDER is not openai)
|
||||||
|
OPENAI_API_KEY=sk-...
|
||||||
|
|
||||||
# Voice mode: "api" (OpenAI Whisper/TTS) or "browser" (Web Speech API fallback)
|
# Voice mode: "api" (OpenAI Whisper/TTS) or "browser" (Web Speech API fallback)
|
||||||
VOICE_MODE=browser
|
VOICE_MODE=browser
|
||||||
TTS_MODEL=tts-1
|
TTS_MODEL=tts-1
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ class Settings(BaseSettings):
|
|||||||
llm_provider: str = "openai" # used by litellm routing
|
llm_provider: str = "openai" # used by litellm routing
|
||||||
llm_api_key: str = ""
|
llm_api_key: str = ""
|
||||||
llm_model: str = "gpt-4o-mini"
|
llm_model: str = "gpt-4o-mini"
|
||||||
|
|
||||||
|
# OpenAI API Key (specifically for Voice/TTS if LLM_PROVIDER is different)
|
||||||
|
openai_api_key: str = ""
|
||||||
|
|
||||||
# Voice feature flag: "api" = LLM provider Whisper/TTS, "browser" = Web Speech API
|
# Voice feature flag: "api" = LLM provider Whisper/TTS, "browser" = Web Speech API
|
||||||
voice_mode: Literal["api", "browser"] = "api"
|
voice_mode: Literal["api", "browser"] = "api"
|
||||||
|
|||||||
@@ -20,11 +20,10 @@ async def voice_config(user: User = Depends(get_current_user)):
|
|||||||
"""Return current voice mode so frontend knows whether to use browser or API."""
|
"""Return current voice mode so frontend knows whether to use browser or API."""
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
# API STT (Whisper) works with OpenAI-compatible providers
|
# API STT (Whisper) works with OpenAI-compatible providers
|
||||||
api_available = bool(
|
# Check if we have a dedicated voice key OR a generic LLM key for OpenAI
|
||||||
settings.voice_mode == "api"
|
has_key = bool(settings.openai_api_key or (settings.llm_api_key and settings.llm_provider == "openai"))
|
||||||
and settings.llm_api_key
|
|
||||||
and settings.llm_provider in ("openai",)
|
api_available = bool(settings.voice_mode == "api" and has_key)
|
||||||
)
|
|
||||||
return VoiceConfigOut(
|
return VoiceConfigOut(
|
||||||
voice_mode=settings.voice_mode,
|
voice_mode=settings.voice_mode,
|
||||||
voice_api_available=api_available,
|
voice_api_available=api_available,
|
||||||
|
|||||||
@@ -10,7 +10,9 @@ from app.config import get_settings
|
|||||||
async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
|
async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
|
||||||
"""Transcribe audio to text using OpenAI Whisper API."""
|
"""Transcribe audio to text using OpenAI Whisper API."""
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
client = openai.AsyncOpenAI(api_key=settings.llm_api_key)
|
# Use dedicated OpenAI key if available, otherwise fallback to LLM key
|
||||||
|
api_key = settings.openai_api_key or settings.llm_api_key
|
||||||
|
client = openai.AsyncOpenAI(api_key=api_key)
|
||||||
|
|
||||||
audio_file = io.BytesIO(audio_bytes)
|
audio_file = io.BytesIO(audio_bytes)
|
||||||
audio_file.name = filename
|
audio_file.name = filename
|
||||||
@@ -25,7 +27,9 @@ async def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
|
|||||||
async def synthesize(text: str) -> bytes:
|
async def synthesize(text: str) -> bytes:
|
||||||
"""Synthesize text to speech using OpenAI TTS API."""
|
"""Synthesize text to speech using OpenAI TTS API."""
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
client = openai.AsyncOpenAI(api_key=settings.llm_api_key)
|
# Use dedicated OpenAI key if available, otherwise fallback to LLM key
|
||||||
|
api_key = settings.openai_api_key or settings.llm_api_key
|
||||||
|
client = openai.AsyncOpenAI(api_key=api_key)
|
||||||
|
|
||||||
response = await client.audio.speech.create(
|
response = await client.audio.speech.create(
|
||||||
model=settings.tts_model,
|
model=settings.tts_model,
|
||||||
|
|||||||
@@ -845,9 +845,20 @@ tr:hover td {
|
|||||||
/* ── Avatar ───────────────────────────────────────────────────────── */
|
/* ── Avatar ───────────────────────────────────────────────────────── */
|
||||||
.avatar-container {
|
.avatar-container {
|
||||||
position: relative;
|
position: relative;
|
||||||
width: 72px;
|
width: 120px;
|
||||||
height: 72px;
|
height: 120px;
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
|
transition: transform 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.avatar-container.speaking {
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.avatar-container.speaking .avatar-ring {
|
||||||
|
border-color: var(--accent);
|
||||||
|
box-shadow: 0 0 20px var(--accent-glow);
|
||||||
|
animation: avatarSpeakPulse 1.2s infinite;
|
||||||
}
|
}
|
||||||
|
|
||||||
.avatar-ring {
|
.avatar-ring {
|
||||||
@@ -1199,4 +1210,21 @@ tr:hover td {
|
|||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
opacity: 0.6;
|
opacity: 0.6;
|
||||||
font-family: monospace;
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes avatarSpeakPulse {
|
||||||
|
0% {
|
||||||
|
transform: scale(1);
|
||||||
|
opacity: 0.8;
|
||||||
|
}
|
||||||
|
|
||||||
|
50% {
|
||||||
|
transform: scale(1.15);
|
||||||
|
opacity: 0.4;
|
||||||
|
}
|
||||||
|
|
||||||
|
100% {
|
||||||
|
transform: scale(1);
|
||||||
|
opacity: 0.8;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -30,7 +30,7 @@
|
|||||||
<button type="submit" class="btn btn-primary btn-block">Sign In</button>
|
<button type="submit" class="btn btn-primary btn-block">Sign In</button>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
<span class="version-label">v0.1.1</span>
|
<span class="version-label">v0.2.0</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/js/api.js"></script>
|
<script src="/js/api.js"></script>
|
||||||
|
|||||||
@@ -199,8 +199,19 @@ class VoiceManager {
|
|||||||
const audioBlob = await response.blob();
|
const audioBlob = await response.blob();
|
||||||
const audioUrl = URL.createObjectURL(audioBlob);
|
const audioUrl = URL.createObjectURL(audioBlob);
|
||||||
const audio = new Audio(audioUrl);
|
const audio = new Audio(audioUrl);
|
||||||
|
|
||||||
|
// Visual feedback
|
||||||
|
const avatarContainer = document.querySelector('.avatar-container');
|
||||||
|
if (avatarContainer) avatarContainer.classList.add('speaking');
|
||||||
|
|
||||||
await audio.play();
|
await audio.play();
|
||||||
return new Promise(resolve => { audio.onended = resolve; });
|
|
||||||
|
return new Promise(resolve => {
|
||||||
|
audio.onended = () => {
|
||||||
|
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||||
|
resolve();
|
||||||
|
};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn('[Voice] API TTS failed:', e);
|
console.warn('[Voice] API TTS failed:', e);
|
||||||
|
|||||||
Reference in New Issue
Block a user