improved feedback for voice mode
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 48s
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 48s
This commit is contained in:
@@ -1228,3 +1228,36 @@ tr:hover td {
|
|||||||
opacity: 0.8;
|
opacity: 0.8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Thinking dots ────────────────────────────────────────────────── */
|
||||||
|
.message-thinking {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-style: italic;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.thinking-dots span {
|
||||||
|
animation: thinkingDots 1.4s infinite ease-in-out both;
|
||||||
|
margin-left: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.thinking-dots span:nth-child(1) {
|
||||||
|
animation-delay: -0.32s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.thinking-dots span:nth-child(2) {
|
||||||
|
animation-delay: -0.16s;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes thinkingDots {
|
||||||
|
|
||||||
|
0%,
|
||||||
|
80%,
|
||||||
|
100% {
|
||||||
|
transform: scale(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
40% {
|
||||||
|
transform: scale(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||||||
micBtn.classList.toggle('recording', recording);
|
micBtn.classList.toggle('recording', recording);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Show "Transcribing..." state
|
||||||
|
voice.onProcessing = (processing) => {
|
||||||
|
if (processing) {
|
||||||
|
inputEl.placeholder = 'Transcribing...';
|
||||||
|
inputEl.disabled = true;
|
||||||
|
} else {
|
||||||
|
inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...';
|
||||||
|
inputEl.disabled = false;
|
||||||
|
inputEl.focus();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
micBtn.addEventListener('click', () => voice.toggleRecording());
|
micBtn.addEventListener('click', () => voice.toggleRecording());
|
||||||
|
|
||||||
// ── Chat ──────────────────────────────────────────────────────────
|
// ── Chat ──────────────────────────────────────────────────────────
|
||||||
@@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||||||
div.className = `message message-${role}`;
|
div.className = `message message-${role}`;
|
||||||
|
|
||||||
if (role === 'assistant') {
|
if (role === 'assistant') {
|
||||||
|
// content might be empty initially for thinking state
|
||||||
|
if (content === 'Thinking...') {
|
||||||
|
div.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
|
||||||
|
div.classList.add('message-thinking');
|
||||||
|
} else {
|
||||||
div.innerHTML = renderMarkdown(content);
|
div.innerHTML = renderMarkdown(content);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
div.textContent = content;
|
div.textContent = content;
|
||||||
}
|
}
|
||||||
@@ -168,6 +186,49 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||||||
const reader = response.body.getReader();
|
const reader = response.body.getReader();
|
||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
|
|
||||||
|
// Special handling for Voice Mode: Buffer text, wait for TTS, then show & play
|
||||||
|
if (voiceModeOn) {
|
||||||
|
// Show thinking state
|
||||||
|
assistantEl.innerHTML = '<span class="thinking-dots">Thinking<span>.</span><span>.</span><span>.</span></span>';
|
||||||
|
assistantEl.classList.add('message-thinking');
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
const chunk = decoder.decode(value);
|
||||||
|
const lines = chunk.split('\n');
|
||||||
|
for (const line of lines) {
|
||||||
|
if (line.startsWith('data: ')) {
|
||||||
|
const data = line.slice(6).trim();
|
||||||
|
if (data === '[DONE]') break;
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(data);
|
||||||
|
if (parsed.token) fullResponse += parsed.token;
|
||||||
|
if (parsed.error) showToast(parsed.error, 'error');
|
||||||
|
} catch (e) { }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text complete. Now fetch audio.
|
||||||
|
if (fullResponse) {
|
||||||
|
history.push({ role: 'assistant', content: fullResponse });
|
||||||
|
|
||||||
|
// Keep "Thinking..." until audio is ready or failed
|
||||||
|
const audioUrl = await voice.fetchAudio(fullResponse);
|
||||||
|
|
||||||
|
// Visual update: Remove thinking, show text
|
||||||
|
assistantEl.classList.remove('message-thinking');
|
||||||
|
assistantEl.innerHTML = renderMarkdown(fullResponse);
|
||||||
|
messagesEl.scrollTop = messagesEl.scrollHeight;
|
||||||
|
|
||||||
|
if (audioUrl) {
|
||||||
|
await voice.playAudio(audioUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Normal Text Mode: Stream directly to UI
|
||||||
while (true) {
|
while (true) {
|
||||||
const { done, value } = await reader.read();
|
const { done, value } = await reader.read();
|
||||||
if (done) break;
|
if (done) break;
|
||||||
@@ -199,10 +260,6 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||||||
|
|
||||||
if (fullResponse) {
|
if (fullResponse) {
|
||||||
history.push({ role: 'assistant', content: fullResponse });
|
history.push({ role: 'assistant', content: fullResponse });
|
||||||
|
|
||||||
// Auto-speak if voice mode is ON (regardless of input method)
|
|
||||||
if (voiceModeOn) {
|
|
||||||
await voice.speak(fullResponse);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ class VoiceManager {
|
|||||||
this.onStateChange = null;
|
this.onStateChange = null;
|
||||||
this.browserSTTSupported = false;
|
this.browserSTTSupported = false;
|
||||||
this.apiAvailable = false;
|
this.apiAvailable = false;
|
||||||
|
this.onProcessing = null; // New callback for "Transcribing..." state
|
||||||
}
|
}
|
||||||
|
|
||||||
async init() {
|
async init() {
|
||||||
@@ -93,17 +94,18 @@ class VoiceManager {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.isRecording = true;
|
// Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
|
||||||
this.lastInputWasVoice = true;
|
// if hardware access fails or takes time.
|
||||||
if (this.onStateChange) this.onStateChange(true);
|
|
||||||
|
|
||||||
if (this.mode === 'api') {
|
if (this.mode === 'api') {
|
||||||
try {
|
try {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
this.audioChunks = [];
|
this.audioChunks = [];
|
||||||
this.mediaRecorder = new MediaRecorder(stream);
|
this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
|
||||||
|
|
||||||
this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
|
this.mediaRecorder.ondataavailable = (e) => {
|
||||||
|
if (e.data.size > 0) this.audioChunks.push(e.data);
|
||||||
|
};
|
||||||
|
|
||||||
this.mediaRecorder.onstop = async () => {
|
this.mediaRecorder.onstop = async () => {
|
||||||
stream.getTracks().forEach(t => t.stop());
|
stream.getTracks().forEach(t => t.stop());
|
||||||
@@ -111,9 +113,17 @@ class VoiceManager {
|
|||||||
await this._transcribeAPI(blob);
|
await this._transcribeAPI(blob);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Wait for recorder to actually start before updating UI
|
||||||
this.mediaRecorder.start();
|
this.mediaRecorder.start();
|
||||||
|
|
||||||
|
// Now we are truly recording
|
||||||
|
this.isRecording = true;
|
||||||
|
this.lastInputWasVoice = true;
|
||||||
|
if (this.onStateChange) this.onStateChange(true);
|
||||||
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
showToast('Microphone access denied', 'error');
|
console.error('[Voice] Mic access error:', e);
|
||||||
|
showToast('Microphone access denied or error', 'error');
|
||||||
this.isRecording = false;
|
this.isRecording = false;
|
||||||
if (this.onStateChange) this.onStateChange(false);
|
if (this.onStateChange) this.onStateChange(false);
|
||||||
}
|
}
|
||||||
@@ -121,6 +131,10 @@ class VoiceManager {
|
|||||||
if (this.recognition) {
|
if (this.recognition) {
|
||||||
try {
|
try {
|
||||||
this.recognition.start();
|
this.recognition.start();
|
||||||
|
// Recognition 'onstart' would be better, but this is okay for browser mode
|
||||||
|
this.isRecording = true;
|
||||||
|
this.lastInputWasVoice = true;
|
||||||
|
if (this.onStateChange) this.onStateChange(true);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.isRecording = false;
|
this.isRecording = false;
|
||||||
if (this.onStateChange) this.onStateChange(false);
|
if (this.onStateChange) this.onStateChange(false);
|
||||||
@@ -154,6 +168,8 @@ class VoiceManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _transcribeAPI(blob) {
|
async _transcribeAPI(blob) {
|
||||||
|
if (this.onProcessing) this.onProcessing(true);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('audio', blob, 'recording.webm');
|
formData.append('audio', blob, 'recording.webm');
|
||||||
@@ -175,22 +191,20 @@ class VoiceManager {
|
|||||||
showToast('Transcription network error', 'error');
|
showToast('Transcription network error', 'error');
|
||||||
} finally {
|
} finally {
|
||||||
this.isRecording = false;
|
this.isRecording = false;
|
||||||
|
// Stop processing state
|
||||||
|
if (this.onProcessing) this.onProcessing(false);
|
||||||
if (this.onStateChange) this.onStateChange(false);
|
if (this.onStateChange) this.onStateChange(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Speak text via API TTS only. No browser fallback.
|
* Fetch TTS audio blob for text (API only).
|
||||||
* Strips markdown formatting before sending.
|
* Returns audio URL or null.
|
||||||
*/
|
*/
|
||||||
async speak(text) {
|
async fetchAudio(text) {
|
||||||
if (!this.apiAvailable) {
|
if (!this.apiAvailable) return null;
|
||||||
console.log('[Voice] API TTS not available, skipping speech');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const clean = VoiceManager.stripMarkdown(text);
|
const clean = VoiceManager.stripMarkdown(text);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
|
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@@ -198,27 +212,57 @@ class VoiceManager {
|
|||||||
|
|
||||||
if (response?.ok) {
|
if (response?.ok) {
|
||||||
const audioBlob = await response.blob();
|
const audioBlob = await response.blob();
|
||||||
const audioUrl = URL.createObjectURL(audioBlob);
|
return URL.createObjectURL(audioBlob);
|
||||||
|
} else {
|
||||||
|
const err = await response.json().catch(() => ({}));
|
||||||
|
console.warn('[Voice] TTS error:', err);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[Voice] TTS network error:', e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Play pre-fetched audio URL with visual feedback.
|
||||||
|
*/
|
||||||
|
async playAudio(audioUrl) {
|
||||||
|
if (!audioUrl) return;
|
||||||
|
|
||||||
const audio = new Audio(audioUrl);
|
const audio = new Audio(audioUrl);
|
||||||
|
|
||||||
// Visual feedback
|
// Visual feedback
|
||||||
const avatarContainer = document.querySelector('.avatar-container');
|
const avatarContainer = document.querySelector('.avatar-container');
|
||||||
if (avatarContainer) avatarContainer.classList.add('speaking');
|
if (avatarContainer) avatarContainer.classList.add('speaking');
|
||||||
|
|
||||||
|
try {
|
||||||
await audio.play();
|
await audio.play();
|
||||||
|
|
||||||
return new Promise(resolve => {
|
return new Promise(resolve => {
|
||||||
audio.onended = () => {
|
audio.onended = () => {
|
||||||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||||
resolve();
|
resolve();
|
||||||
};
|
};
|
||||||
|
// Handle errors during playback (e.g. format issues)
|
||||||
|
audio.onerror = () => {
|
||||||
|
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn('[Voice] API TTS failed:', e);
|
console.error("Playback failed", e);
|
||||||
|
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Legacy method for backward compatibility if needed,
|
||||||
|
* or for simple direct speech.
|
||||||
|
*/
|
||||||
|
async speak(text) {
|
||||||
|
const url = await this.fetchAudio(text);
|
||||||
|
if (url) await this.playAudio(url);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Strip markdown formatting from text so TTS reads naturally.
|
* Strip markdown formatting from text so TTS reads naturally.
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user