improved feedback for voice mode
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 48s

This commit is contained in:
2026-02-18 12:19:21 +01:00
parent 9143c27af8
commit 3805157f67
3 changed files with 194 additions and 60 deletions

View File

@@ -1228,3 +1228,36 @@ tr:hover td {
opacity: 0.8; opacity: 0.8;
} }
} }
/* ── Thinking dots ────────────────────────────────────────────────── */
.message-thinking {
color: var(--text-muted);
font-style: italic;
font-size: 0.9em;
}
.thinking-dots span {
animation: thinkingDots 1.4s infinite ease-in-out both;
margin-left: 2px;
}
.thinking-dots span:nth-child(1) {
animation-delay: -0.32s;
}
.thinking-dots span:nth-child(2) {
animation-delay: -0.16s;
}
@keyframes thinkingDots {
0%,
80%,
100% {
transform: scale(0);
}
40% {
transform: scale(1);
}
}

View File

@@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => {
micBtn.classList.toggle('recording', recording); micBtn.classList.toggle('recording', recording);
}; };
// Show "Transcribing..." state
voice.onProcessing = (processing) => {
if (processing) {
inputEl.placeholder = 'Transcribing...';
inputEl.disabled = true;
} else {
inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...';
inputEl.disabled = false;
inputEl.focus();
}
};
micBtn.addEventListener('click', () => voice.toggleRecording()); micBtn.addEventListener('click', () => voice.toggleRecording());
// ── Chat ────────────────────────────────────────────────────────── // ── Chat ──────────────────────────────────────────────────────────
@@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => {
div.className = `message message-${role}`; div.className = `message message-${role}`;
if (role === 'assistant') { if (role === 'assistant') {
// content might be empty initially for thinking state
if (content === 'Thinking...') {
div.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
div.classList.add('message-thinking');
} else {
div.innerHTML = renderMarkdown(content); div.innerHTML = renderMarkdown(content);
}
} else { } else {
div.textContent = content; div.textContent = content;
} }
@@ -168,6 +186,49 @@ document.addEventListener('DOMContentLoaded', async () => {
const reader = response.body.getReader(); const reader = response.body.getReader();
const decoder = new TextDecoder(); const decoder = new TextDecoder();
// Special handling for Voice Mode: Buffer text, wait for TTS, then show & play
if (voiceModeOn) {
// Show thinking state
assistantEl.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
assistantEl.classList.add('message-thinking');
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6).trim();
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
if (parsed.token) fullResponse += parsed.token;
if (parsed.error) showToast(parsed.error, 'error');
} catch (e) { }
}
}
}
// Text complete. Now fetch audio.
if (fullResponse) {
history.push({ role: 'assistant', content: fullResponse });
// Keep "Thinking..." until audio is ready or failed
const audioUrl = await voice.fetchAudio(fullResponse);
// Visual update: Remove thinking, show text
assistantEl.classList.remove('message-thinking');
assistantEl.innerHTML = renderMarkdown(fullResponse);
messagesEl.scrollTop = messagesEl.scrollHeight;
if (audioUrl) {
await voice.playAudio(audioUrl);
}
}
} else {
// Normal Text Mode: Stream directly to UI
while (true) { while (true) {
const { done, value } = await reader.read(); const { done, value } = await reader.read();
if (done) break; if (done) break;
@@ -199,10 +260,6 @@ document.addEventListener('DOMContentLoaded', async () => {
if (fullResponse) { if (fullResponse) {
history.push({ role: 'assistant', content: fullResponse }); history.push({ role: 'assistant', content: fullResponse });
// Auto-speak if voice mode is ON (regardless of input method)
if (voiceModeOn) {
await voice.speak(fullResponse);
} }
} }
} catch (e) { } catch (e) {

View File

@@ -13,6 +13,7 @@ class VoiceManager {
this.onStateChange = null; this.onStateChange = null;
this.browserSTTSupported = false; this.browserSTTSupported = false;
this.apiAvailable = false; this.apiAvailable = false;
this.onProcessing = null; // New callback for "Transcribing..." state
} }
async init() { async init() {
@@ -93,17 +94,18 @@ class VoiceManager {
return; return;
} }
this.isRecording = true; // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
this.lastInputWasVoice = true; // if hardware access fails or takes time.
if (this.onStateChange) this.onStateChange(true);
if (this.mode === 'api') { if (this.mode === 'api') {
try { try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.audioChunks = []; this.audioChunks = [];
this.mediaRecorder = new MediaRecorder(stream); this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data); this.mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) this.audioChunks.push(e.data);
};
this.mediaRecorder.onstop = async () => { this.mediaRecorder.onstop = async () => {
stream.getTracks().forEach(t => t.stop()); stream.getTracks().forEach(t => t.stop());
@@ -111,9 +113,17 @@ class VoiceManager {
await this._transcribeAPI(blob); await this._transcribeAPI(blob);
}; };
// Wait for recorder to actually start before updating UI
this.mediaRecorder.start(); this.mediaRecorder.start();
// Now we are truly recording
this.isRecording = true;
this.lastInputWasVoice = true;
if (this.onStateChange) this.onStateChange(true);
} catch (e) { } catch (e) {
showToast('Microphone access denied', 'error'); console.error('[Voice] Mic access error:', e);
showToast('Microphone access denied or error', 'error');
this.isRecording = false; this.isRecording = false;
if (this.onStateChange) this.onStateChange(false); if (this.onStateChange) this.onStateChange(false);
} }
@@ -121,6 +131,10 @@ class VoiceManager {
if (this.recognition) { if (this.recognition) {
try { try {
this.recognition.start(); this.recognition.start();
// Recognition 'onstart' would be better, but this is okay for browser mode
this.isRecording = true;
this.lastInputWasVoice = true;
if (this.onStateChange) this.onStateChange(true);
} catch (e) { } catch (e) {
this.isRecording = false; this.isRecording = false;
if (this.onStateChange) this.onStateChange(false); if (this.onStateChange) this.onStateChange(false);
@@ -154,6 +168,8 @@ class VoiceManager {
} }
async _transcribeAPI(blob) { async _transcribeAPI(blob) {
if (this.onProcessing) this.onProcessing(true);
try { try {
const formData = new FormData(); const formData = new FormData();
formData.append('audio', blob, 'recording.webm'); formData.append('audio', blob, 'recording.webm');
@@ -175,22 +191,20 @@ class VoiceManager {
showToast('Transcription network error', 'error'); showToast('Transcription network error', 'error');
} finally { } finally {
this.isRecording = false; this.isRecording = false;
// Stop processing state
if (this.onProcessing) this.onProcessing(false);
if (this.onStateChange) this.onStateChange(false); if (this.onStateChange) this.onStateChange(false);
} }
} }
/** /**
* Speak text via API TTS only. No browser fallback. * Fetch TTS audio blob for text (API only).
* Strips markdown formatting before sending. * Returns audio URL or null.
*/ */
async speak(text) { async fetchAudio(text) {
if (!this.apiAvailable) { if (!this.apiAvailable) return null;
console.log('[Voice] API TTS not available, skipping speech');
return;
}
const clean = VoiceManager.stripMarkdown(text); const clean = VoiceManager.stripMarkdown(text);
try { try {
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, { const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
method: 'POST', method: 'POST',
@@ -198,27 +212,57 @@ class VoiceManager {
if (response?.ok) { if (response?.ok) {
const audioBlob = await response.blob(); const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob); return URL.createObjectURL(audioBlob);
} else {
const err = await response.json().catch(() => ({}));
console.warn('[Voice] TTS error:', err);
}
} catch (e) {
console.warn('[Voice] TTS network error:', e);
}
return null;
}
/**
* Play pre-fetched audio URL with visual feedback.
*/
async playAudio(audioUrl) {
if (!audioUrl) return;
const audio = new Audio(audioUrl); const audio = new Audio(audioUrl);
// Visual feedback // Visual feedback
const avatarContainer = document.querySelector('.avatar-container'); const avatarContainer = document.querySelector('.avatar-container');
if (avatarContainer) avatarContainer.classList.add('speaking'); if (avatarContainer) avatarContainer.classList.add('speaking');
try {
await audio.play(); await audio.play();
return new Promise(resolve => { return new Promise(resolve => {
audio.onended = () => { audio.onended = () => {
if (avatarContainer) avatarContainer.classList.remove('speaking'); if (avatarContainer) avatarContainer.classList.remove('speaking');
resolve(); resolve();
}; };
// Handle errors during playback (e.g. format issues)
audio.onerror = () => {
if (avatarContainer) avatarContainer.classList.remove('speaking');
resolve();
}
}); });
}
} catch (e) { } catch (e) {
console.warn('[Voice] API TTS failed:', e); console.error("Playback failed", e);
if (avatarContainer) avatarContainer.classList.remove('speaking');
} }
} }
/**
* Legacy method for backward compatibility if needed,
* or for simple direct speech.
*/
async speak(text) {
const url = await this.fetchAudio(text);
if (url) await this.playAudio(url);
}
/** /**
* Strip markdown formatting from text so TTS reads naturally. * Strip markdown formatting from text so TTS reads naturally.
*/ */