diff --git a/frontend/css/style.css b/frontend/css/style.css
index cd79eb8..d431533 100644
--- a/frontend/css/style.css
+++ b/frontend/css/style.css
@@ -1227,4 +1227,37 @@ tr:hover td {
transform: scale(1);
opacity: 0.8;
}
+}
+
+/* ── Thinking dots ────────────────────────────────────────────────── */
+.message-thinking {
+ color: var(--text-muted);
+ font-style: italic;
+ font-size: 0.9em;
+}
+
+.thinking-dots span {
+ animation: thinkingDots 1.4s infinite ease-in-out both;
+ margin-left: 2px;
+}
+
+.thinking-dots span:nth-child(1) {
+ animation-delay: -0.32s;
+}
+
+.thinking-dots span:nth-child(2) {
+ animation-delay: -0.16s;
+}
+
+@keyframes thinkingDots {
+
+ 0%,
+ 80%,
+ 100% {
+ transform: scale(0);
+ }
+
+ 40% {
+ transform: scale(1);
+ }
}
\ No newline at end of file
diff --git a/frontend/js/chat.js b/frontend/js/chat.js
index ebae714..8954c77 100644
--- a/frontend/js/chat.js
+++ b/frontend/js/chat.js
@@ -120,6 +120,18 @@ document.addEventListener('DOMContentLoaded', async () => {
micBtn.classList.toggle('recording', recording);
};
+ // Show "Transcribing..." state
+ voice.onProcessing = (processing) => {
+ if (processing) {
+ inputEl.placeholder = 'Transcribing...';
+ inputEl.disabled = true;
+ } else {
+ inputEl.placeholder = voiceModeOn ? 'Voice mode ON — click the mic to speak...' : 'Type your message...';
+ inputEl.disabled = false;
+ inputEl.focus();
+ }
+ };
+
micBtn.addEventListener('click', () => voice.toggleRecording());
// ── Chat ──────────────────────────────────────────────────────────
@@ -128,7 +140,13 @@ document.addEventListener('DOMContentLoaded', async () => {
div.className = `message message-${role}`;
if (role === 'assistant') {
- div.innerHTML = renderMarkdown(content);
+ // content might be empty initially for thinking state
+ if (content === 'Thinking...') {
+ div.innerHTML = 'Thinking...';
+ div.classList.add('message-thinking');
+ } else {
+ div.innerHTML = renderMarkdown(content);
+ }
} else {
div.textContent = content;
}
@@ -168,41 +186,80 @@ document.addEventListener('DOMContentLoaded', async () => {
const reader = response.body.getReader();
const decoder = new TextDecoder();
- while (true) {
- const { done, value } = await reader.read();
- if (done) break;
+ // Special handling for Voice Mode: Buffer text, wait for TTS, then show & play
+ if (voiceModeOn) {
+ // Show thinking state
+ assistantEl.innerHTML = 'Thinking...';
+ assistantEl.classList.add('message-thinking');
- const chunk = decoder.decode(value);
- const lines = chunk.split('\n');
-
- for (const line of lines) {
- if (line.startsWith('data: ')) {
- const data = line.slice(6).trim();
- if (data === '[DONE]') break;
-
- try {
- const parsed = JSON.parse(data);
- if (parsed.token) {
- fullResponse += parsed.token;
- assistantEl.innerHTML = renderMarkdown(fullResponse);
- messagesEl.scrollTop = messagesEl.scrollHeight;
- }
- if (parsed.error) {
- showToast(parsed.error, 'error');
- }
- } catch (e) {
- // skip unparseable chunks
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+ const chunk = decoder.decode(value);
+ const lines = chunk.split('\n');
+ for (const line of lines) {
+ if (line.startsWith('data: ')) {
+ const data = line.slice(6).trim();
+ if (data === '[DONE]') break;
+ try {
+ const parsed = JSON.parse(data);
+ if (parsed.token) fullResponse += parsed.token;
+ if (parsed.error) showToast(parsed.error, 'error');
+ } catch (e) { }
}
}
}
- }
- if (fullResponse) {
- history.push({ role: 'assistant', content: fullResponse });
+ // Text complete. Now fetch audio.
+ if (fullResponse) {
+ history.push({ role: 'assistant', content: fullResponse });
- // Auto-speak if voice mode is ON (regardless of input method)
- if (voiceModeOn) {
- await voice.speak(fullResponse);
+ // Keep "Thinking..." until audio is ready or failed
+ const audioUrl = await voice.fetchAudio(fullResponse);
+
+ // Visual update: Remove thinking, show text
+ assistantEl.classList.remove('message-thinking');
+ assistantEl.innerHTML = renderMarkdown(fullResponse);
+ messagesEl.scrollTop = messagesEl.scrollHeight;
+
+ if (audioUrl) {
+ await voice.playAudio(audioUrl);
+ }
+ }
+
+ } else {
+ // Normal Text Mode: Stream directly to UI
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+
+ const chunk = decoder.decode(value);
+ const lines = chunk.split('\n');
+
+ for (const line of lines) {
+ if (line.startsWith('data: ')) {
+ const data = line.slice(6).trim();
+ if (data === '[DONE]') break;
+
+ try {
+ const parsed = JSON.parse(data);
+ if (parsed.token) {
+ fullResponse += parsed.token;
+ assistantEl.innerHTML = renderMarkdown(fullResponse);
+ messagesEl.scrollTop = messagesEl.scrollHeight;
+ }
+ if (parsed.error) {
+ showToast(parsed.error, 'error');
+ }
+ } catch (e) {
+ // skip unparseable chunks
+ }
+ }
+ }
+ }
+
+ if (fullResponse) {
+ history.push({ role: 'assistant', content: fullResponse });
}
}
} catch (e) {
diff --git a/frontend/js/voice.js b/frontend/js/voice.js
index ed294b0..4d0bea2 100644
--- a/frontend/js/voice.js
+++ b/frontend/js/voice.js
@@ -13,6 +13,7 @@ class VoiceManager {
this.onStateChange = null;
this.browserSTTSupported = false;
this.apiAvailable = false;
+ this.onProcessing = null; // New callback for "Transcribing..." state
}
async init() {
@@ -93,17 +94,18 @@ class VoiceManager {
return;
}
- this.isRecording = true;
- this.lastInputWasVoice = true;
- if (this.onStateChange) this.onStateChange(true);
+ // Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
+ // if hardware access fails or takes time.
if (this.mode === 'api') {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.audioChunks = [];
- this.mediaRecorder = new MediaRecorder(stream);
+ this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
- this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
+ this.mediaRecorder.ondataavailable = (e) => {
+ if (e.data.size > 0) this.audioChunks.push(e.data);
+ };
this.mediaRecorder.onstop = async () => {
stream.getTracks().forEach(t => t.stop());
@@ -111,9 +113,17 @@ class VoiceManager {
await this._transcribeAPI(blob);
};
+ // Wait for recorder to actually start before updating UI
this.mediaRecorder.start();
+
+ // Now we are truly recording
+ this.isRecording = true;
+ this.lastInputWasVoice = true;
+ if (this.onStateChange) this.onStateChange(true);
+
} catch (e) {
- showToast('Microphone access denied', 'error');
+ console.error('[Voice] Mic access error:', e);
+ showToast('Microphone access denied or error', 'error');
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
@@ -121,6 +131,10 @@ class VoiceManager {
if (this.recognition) {
try {
this.recognition.start();
+ // Recognition 'onstart' would be better, but this is okay for browser mode
+ this.isRecording = true;
+ this.lastInputWasVoice = true;
+ if (this.onStateChange) this.onStateChange(true);
} catch (e) {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
@@ -154,6 +168,8 @@ class VoiceManager {
}
async _transcribeAPI(blob) {
+ if (this.onProcessing) this.onProcessing(true);
+
try {
const formData = new FormData();
formData.append('audio', blob, 'recording.webm');
@@ -175,22 +191,20 @@ class VoiceManager {
showToast('Transcription network error', 'error');
} finally {
this.isRecording = false;
+ // Stop processing state
+ if (this.onProcessing) this.onProcessing(false);
if (this.onStateChange) this.onStateChange(false);
}
}
/**
- * Speak text via API TTS only. No browser fallback.
- * Strips markdown formatting before sending.
+ * Fetch TTS audio blob for text (API only).
+ * Returns audio URL or null.
*/
- async speak(text) {
- if (!this.apiAvailable) {
- console.log('[Voice] API TTS not available, skipping speech');
- return;
- }
+ async fetchAudio(text) {
+ if (!this.apiAvailable) return null;
const clean = VoiceManager.stripMarkdown(text);
-
try {
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
method: 'POST',
@@ -198,25 +212,55 @@ class VoiceManager {
if (response?.ok) {
const audioBlob = await response.blob();
- const audioUrl = URL.createObjectURL(audioBlob);
- const audio = new Audio(audioUrl);
-
- // Visual feedback
- const avatarContainer = document.querySelector('.avatar-container');
- if (avatarContainer) avatarContainer.classList.add('speaking');
-
- await audio.play();
-
- return new Promise(resolve => {
- audio.onended = () => {
- if (avatarContainer) avatarContainer.classList.remove('speaking');
- resolve();
- };
- });
+ return URL.createObjectURL(audioBlob);
+ } else {
+ const err = await response.json().catch(() => ({}));
+ console.warn('[Voice] TTS error:', err);
}
} catch (e) {
- console.warn('[Voice] API TTS failed:', e);
+ console.warn('[Voice] TTS network error:', e);
}
+ return null;
+ }
+
+ /**
+ * Play pre-fetched audio URL with visual feedback.
+ */
+ async playAudio(audioUrl) {
+ if (!audioUrl) return;
+
+ const audio = new Audio(audioUrl);
+
+ // Visual feedback
+ const avatarContainer = document.querySelector('.avatar-container');
+ if (avatarContainer) avatarContainer.classList.add('speaking');
+
+ try {
+ await audio.play();
+ return new Promise(resolve => {
+ audio.onended = () => {
+ if (avatarContainer) avatarContainer.classList.remove('speaking');
+ resolve();
+ };
+ // Handle errors during playback (e.g. format issues)
+ audio.onerror = () => {
+ if (avatarContainer) avatarContainer.classList.remove('speaking');
+ resolve();
+ }
+ });
+ } catch (e) {
+ console.error("Playback failed", e);
+ if (avatarContainer) avatarContainer.classList.remove('speaking');
+ }
+ }
+
+ /**
+ * Legacy method for backward compatibility if needed,
+ * or for simple direct speech.
+ */
+ async speak(text) {
+ const url = await this.fetchAudio(text);
+ if (url) await this.playAudio(url);
}
/**