improved feedback for voice mode
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 48s
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 48s
This commit is contained in:
@@ -1227,4 +1227,37 @@ tr:hover td {
|
||||
transform: scale(1);
|
||||
opacity: 0.8;
|
||||
}
|
||||
}
|
||||
|
||||
/* ── Thinking dots ────────────────────────────────────────────────── */
|
||||
.message-thinking {
|
||||
color: var(--text-muted);
|
||||
font-style: italic;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
.thinking-dots span {
|
||||
animation: thinkingDots 1.4s infinite ease-in-out both;
|
||||
margin-left: 2px;
|
||||
}
|
||||
|
||||
.thinking-dots span:nth-child(1) {
|
||||
animation-delay: -0.32s;
|
||||
}
|
||||
|
||||
.thinking-dots span:nth-child(2) {
|
||||
animation-delay: -0.16s;
|
||||
}
|
||||
|
||||
@keyframes thinkingDots {
|
||||
|
||||
0%,
|
||||
80%,
|
||||
100% {
|
||||
transform: scale(0);
|
||||
}
|
||||
|
||||
40% {
|
||||
transform: scale(1);
|
||||
}
|
||||
}
|
||||
@@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => {
|
||||
micBtn.classList.toggle('recording', recording);
|
||||
};
|
||||
|
||||
// Show "Transcribing..." state
|
||||
voice.onProcessing = (processing) => {
|
||||
if (processing) {
|
||||
inputEl.placeholder = 'Transcribing...';
|
||||
inputEl.disabled = true;
|
||||
} else {
|
||||
inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...';
|
||||
inputEl.disabled = false;
|
||||
inputEl.focus();
|
||||
}
|
||||
};
|
||||
|
||||
micBtn.addEventListener('click', () => voice.toggleRecording());
|
||||
|
||||
// ── Chat ──────────────────────────────────────────────────────────
|
||||
@@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => {
|
||||
div.className = `message message-${role}`;
|
||||
|
||||
if (role === 'assistant') {
|
||||
div.innerHTML = renderMarkdown(content);
|
||||
// content might be empty initially for thinking state
|
||||
if (content === 'Thinking...') {
|
||||
div.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
|
||||
div.classList.add('message-thinking');
|
||||
} else {
|
||||
div.innerHTML = renderMarkdown(content);
|
||||
}
|
||||
} else {
|
||||
div.textContent = content;
|
||||
}
|
||||
@@ -168,41 +186,80 @@ document.addEventListener('DOMContentLoaded', async () => {
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
// Special handling for Voice Mode: Buffer text, wait for TTS, then show & play
|
||||
if (voiceModeOn) {
|
||||
// Show thinking state
|
||||
assistantEl.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
|
||||
assistantEl.classList.add('message-thinking');
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6).trim();
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
if (parsed.token) {
|
||||
fullResponse += parsed.token;
|
||||
assistantEl.innerHTML = renderMarkdown(fullResponse);
|
||||
messagesEl.scrollTop = messagesEl.scrollHeight;
|
||||
}
|
||||
if (parsed.error) {
|
||||
showToast(parsed.error, 'error');
|
||||
}
|
||||
} catch (e) {
|
||||
// skip unparseable chunks
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
const chunk = decoder.decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6).trim();
|
||||
if (data === '[DONE]') break;
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
if (parsed.token) fullResponse += parsed.token;
|
||||
if (parsed.error) showToast(parsed.error, 'error');
|
||||
} catch (e) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fullResponse) {
|
||||
history.push({ role: 'assistant', content: fullResponse });
|
||||
// Text complete. Now fetch audio.
|
||||
if (fullResponse) {
|
||||
history.push({ role: 'assistant', content: fullResponse });
|
||||
|
||||
// Auto-speak if voice mode is ON (regardless of input method)
|
||||
if (voiceModeOn) {
|
||||
await voice.speak(fullResponse);
|
||||
// Keep "Thinking..." until audio is ready or failed
|
||||
const audioUrl = await voice.fetchAudio(fullResponse);
|
||||
|
||||
// Visual update: Remove thinking, show text
|
||||
assistantEl.classList.remove('message-thinking');
|
||||
assistantEl.innerHTML = renderMarkdown(fullResponse);
|
||||
messagesEl.scrollTop = messagesEl.scrollHeight;
|
||||
|
||||
if (audioUrl) {
|
||||
await voice.playAudio(audioUrl);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// Normal Text Mode: Stream directly to UI
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6).trim();
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
if (parsed.token) {
|
||||
fullResponse += parsed.token;
|
||||
assistantEl.innerHTML = renderMarkdown(fullResponse);
|
||||
messagesEl.scrollTop = messagesEl.scrollHeight;
|
||||
}
|
||||
if (parsed.error) {
|
||||
showToast(parsed.error, 'error');
|
||||
}
|
||||
} catch (e) {
|
||||
// skip unparseable chunks
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fullResponse) {
|
||||
history.push({ role: 'assistant', content: fullResponse });
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
|
||||
@@ -13,6 +13,7 @@ class VoiceManager {
|
||||
this.onStateChange = null;
|
||||
this.browserSTTSupported = false;
|
||||
this.apiAvailable = false;
|
||||
this.onProcessing = null; // New callback for "Transcribing..." state
|
||||
}
|
||||
|
||||
async init() {
|
||||
@@ -93,17 +94,18 @@ class VoiceManager {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isRecording = true;
|
||||
this.lastInputWasVoice = true;
|
||||
if (this.onStateChange) this.onStateChange(true);
|
||||
// Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
|
||||
// if hardware access fails or takes time.
|
||||
|
||||
if (this.mode === 'api') {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
this.audioChunks = [];
|
||||
this.mediaRecorder = new MediaRecorder(stream);
|
||||
this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
|
||||
|
||||
this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
|
||||
this.mediaRecorder.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) this.audioChunks.push(e.data);
|
||||
};
|
||||
|
||||
this.mediaRecorder.onstop = async () => {
|
||||
stream.getTracks().forEach(t => t.stop());
|
||||
@@ -111,9 +113,17 @@ class VoiceManager {
|
||||
await this._transcribeAPI(blob);
|
||||
};
|
||||
|
||||
// Wait for recorder to actually start before updating UI
|
||||
this.mediaRecorder.start();
|
||||
|
||||
// Now we are truly recording
|
||||
this.isRecording = true;
|
||||
this.lastInputWasVoice = true;
|
||||
if (this.onStateChange) this.onStateChange(true);
|
||||
|
||||
} catch (e) {
|
||||
showToast('Microphone access denied', 'error');
|
||||
console.error('[Voice] Mic access error:', e);
|
||||
showToast('Microphone access denied or error', 'error');
|
||||
this.isRecording = false;
|
||||
if (this.onStateChange) this.onStateChange(false);
|
||||
}
|
||||
@@ -121,6 +131,10 @@ class VoiceManager {
|
||||
if (this.recognition) {
|
||||
try {
|
||||
this.recognition.start();
|
||||
// Recognition 'onstart' would be better, but this is okay for browser mode
|
||||
this.isRecording = true;
|
||||
this.lastInputWasVoice = true;
|
||||
if (this.onStateChange) this.onStateChange(true);
|
||||
} catch (e) {
|
||||
this.isRecording = false;
|
||||
if (this.onStateChange) this.onStateChange(false);
|
||||
@@ -154,6 +168,8 @@ class VoiceManager {
|
||||
}
|
||||
|
||||
async _transcribeAPI(blob) {
|
||||
if (this.onProcessing) this.onProcessing(true);
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('audio', blob, 'recording.webm');
|
||||
@@ -175,22 +191,20 @@ class VoiceManager {
|
||||
showToast('Transcription network error', 'error');
|
||||
} finally {
|
||||
this.isRecording = false;
|
||||
// Stop processing state
|
||||
if (this.onProcessing) this.onProcessing(false);
|
||||
if (this.onStateChange) this.onStateChange(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text via API TTS only. No browser fallback.
|
||||
* Strips markdown formatting before sending.
|
||||
* Fetch TTS audio blob for text (API only).
|
||||
* Returns audio URL or null.
|
||||
*/
|
||||
async speak(text) {
|
||||
if (!this.apiAvailable) {
|
||||
console.log('[Voice] API TTS not available, skipping speech');
|
||||
return;
|
||||
}
|
||||
async fetchAudio(text) {
|
||||
if (!this.apiAvailable) return null;
|
||||
|
||||
const clean = VoiceManager.stripMarkdown(text);
|
||||
|
||||
try {
|
||||
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
|
||||
method: 'POST',
|
||||
@@ -198,25 +212,55 @@ class VoiceManager {
|
||||
|
||||
if (response?.ok) {
|
||||
const audioBlob = await response.blob();
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
const audio = new Audio(audioUrl);
|
||||
|
||||
// Visual feedback
|
||||
const avatarContainer = document.querySelector('.avatar-container');
|
||||
if (avatarContainer) avatarContainer.classList.add('speaking');
|
||||
|
||||
await audio.play();
|
||||
|
||||
return new Promise(resolve => {
|
||||
audio.onended = () => {
|
||||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||
resolve();
|
||||
};
|
||||
});
|
||||
return URL.createObjectURL(audioBlob);
|
||||
} else {
|
||||
const err = await response.json().catch(() => ({}));
|
||||
console.warn('[Voice] TTS error:', err);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[Voice] API TTS failed:', e);
|
||||
console.warn('[Voice] TTS network error:', e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Play pre-fetched audio URL with visual feedback.
|
||||
*/
|
||||
async playAudio(audioUrl) {
|
||||
if (!audioUrl) return;
|
||||
|
||||
const audio = new Audio(audioUrl);
|
||||
|
||||
// Visual feedback
|
||||
const avatarContainer = document.querySelector('.avatar-container');
|
||||
if (avatarContainer) avatarContainer.classList.add('speaking');
|
||||
|
||||
try {
|
||||
await audio.play();
|
||||
return new Promise(resolve => {
|
||||
audio.onended = () => {
|
||||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||
resolve();
|
||||
};
|
||||
// Handle errors during playback (e.g. format issues)
|
||||
audio.onerror = () => {
|
||||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
} catch (e) {
|
||||
console.error("Playback failed", e);
|
||||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy method for backward compatibility if needed,
|
||||
* or for simple direct speech.
|
||||
*/
|
||||
async speak(text) {
|
||||
const url = await this.fetchAudio(text);
|
||||
if (url) await this.playAudio(url);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user