Files
language-llm/frontend/js/voice.js
Dennis Thiessen e9f12bc2ba
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 49s
recording cutoff bug in admin fixed
2026-02-18 13:18:49 +01:00

393 lines
15 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* FluentGerman.ai — Voice module (API-only TTS, browser + API STT) */
class VoiceManager {
constructor() {
this.mode = 'browser';
this.recognition = null;
this.isRecording = false;
this.isDisabled = false;
this.lastInputWasVoice = false;
this.mediaRecorder = null;
this.audioChunks = [];
this.onResult = null;
this.onStateChange = null;
this.browserSTTSupported = false;
this.apiAvailable = false;
this.onProcessing = null; // New callback for "Transcribing..." state
}
async init() {
this._initBrowserSTT();
try {
const response = await api('/voice/config');
if (response?.ok) {
const config = await response.json();
this.mode = config.voice_mode;
this.apiAvailable = config.voice_api_available || false;
console.log('[Voice] Server mode:', this.mode, '| API available:', this.apiAvailable);
}
} catch (e) {
console.warn('[Voice] Could not fetch config, using browser mode');
this.mode = 'browser';
}
// Determine best STT method
if (this.mode === 'browser' && !this.browserSTTSupported) {
if (this.apiAvailable) {
this.mode = 'api';
} else {
this.isDisabled = true;
}
} else if (this.mode === 'api' && !this.apiAvailable) {
if (this.browserSTTSupported) {
this.mode = 'browser';
} else {
this.isDisabled = true;
}
}
console.log('[Voice] Final mode:', this.isDisabled ? 'DISABLED' : this.mode);
}
_initBrowserSTT() {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
this.browserSTTSupported = false;
return;
}
this.browserSTTSupported = true;
this.recognition = new SpeechRecognition();
this.recognition.continuous = false;
this.recognition.interimResults = false;
this.recognition.lang = 'de-DE';
this.recognition.onresult = (event) => {
const text = event.results[0][0].transcript;
console.log('[Voice] STT result:', text);
this.lastInputWasVoice = true;
if (this.onResult) this.onResult(text);
};
this.recognition.onend = () => {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
};
this.recognition.onerror = (event) => {
console.error('[Voice] STT error:', event.error);
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
if (event.error === 'not-allowed') {
showToast('Microphone access denied. Allow it in browser settings.', 'error');
} else if (event.error === 'no-speech') {
showToast('No speech detected. Try again.', 'error');
}
};
}
async startRecording() {
if (this.isDisabled) {
showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
return;
}
// Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
// if hardware access fails or takes time.
if (this.mode === 'api') {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.audioChunks = [];
this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
this.mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) this.audioChunks.push(e.data);
};
this.mediaRecorder.onstop = async () => {
stream.getTracks().forEach(t => t.stop());
const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
await this._transcribeAPI(blob);
};
// Only update UI once recording has truly started
this.mediaRecorder.onstart = () => {
this.isRecording = true;
this.lastInputWasVoice = true;
if (this.onStateChange) this.onStateChange(true);
};
this.mediaRecorder.start();
} catch (e) {
console.error('[Voice] Mic access error:', e);
showToast('Microphone access denied or error', 'error');
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
} else {
if (this.recognition) {
try {
this.recognition.start();
// Recognition 'onstart' would be better, but this is okay for browser mode
this.isRecording = true;
this.lastInputWasVoice = true;
if (this.onStateChange) this.onStateChange(true);
} catch (e) {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
showToast('Voice recognition failed. Try again.', 'error');
}
}
}
}
stopRecording() {
if (this.mode === 'api') {
if (this.mediaRecorder?.state === 'recording') {
// Show processing state immediately (don't wait for onstop callback)
if (this.onProcessing) this.onProcessing(true);
this.mediaRecorder.stop();
} else {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
} else {
try { this.recognition?.stop(); } catch (e) { /* already stopped */ }
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
}
toggleRecording() {
if (this.isDisabled) {
showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
return;
}
this.isRecording ? this.stopRecording() : this.startRecording();
}
async _transcribeAPI(blob) {
if (this.onProcessing) this.onProcessing(true);
try {
const formData = new FormData();
formData.append('audio', blob, 'recording.webm');
const response = await api('/voice/transcribe', {
method: 'POST',
body: formData,
});
if (response?.ok) {
const data = await response.json();
this.lastInputWasVoice = true;
if (this.onResult) this.onResult(data.text);
} else {
const err = await response.json().catch(() => ({}));
showToast(`Transcription failed: ${err.detail || 'Unknown error'}`, 'error');
}
} catch (e) {
showToast('Transcription network error', 'error');
} finally {
this.isRecording = false;
// Stop processing state
if (this.onProcessing) this.onProcessing(false);
if (this.onStateChange) this.onStateChange(false);
}
}
/**
* Fetch TTS audio blob for text (API only).
* Returns audio URL or null.
*/
async fetchAudio(text) {
if (!this.apiAvailable) return null;
const clean = VoiceManager.stripMarkdown(text);
try {
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
method: 'POST',
});
if (response?.ok) {
const audioBlob = await response.blob();
return URL.createObjectURL(audioBlob);
} else {
const err = await response.json().catch(() => ({}));
console.warn('[Voice] TTS error:', err);
}
} catch (e) {
console.warn('[Voice] TTS network error:', e);
}
return null;
}
/**
* Play audio with an inline mini-player (progress bar, seek, replay).
* @param {string} audioUrl blob URL from fetchAudio()
* @param {HTMLElement} [containerEl] element to append the player into
* @returns {Promise} resolves when first playback ends
*/
async playAudio(audioUrl, containerEl) {
if (!audioUrl) return;
const audio = new Audio(audioUrl);
// Visual feedback — avatar pulse
const avatarContainer = document.querySelector('.avatar-container');
if (avatarContainer) avatarContainer.classList.add('speaking');
// ── Build player DOM ──────────────────────────────────────────
const player = document.createElement('div');
player.className = 'audio-player';
const playBtn = document.createElement('button');
playBtn.className = 'audio-player-btn playing';
playBtn.innerHTML = VoiceManager._pauseIcon();
playBtn.title = 'Pause';
const track = document.createElement('div');
track.className = 'audio-player-track';
const fill = document.createElement('div');
fill.className = 'audio-player-fill';
track.appendChild(fill);
const timeLabel = document.createElement('span');
timeLabel.className = 'audio-player-time';
timeLabel.textContent = '0:00 / 0:00';
player.appendChild(playBtn);
player.appendChild(track);
player.appendChild(timeLabel);
if (containerEl) {
containerEl.appendChild(player);
}
// ── Helpers ───────────────────────────────────────────────────
function fmt(s) {
if (!isFinite(s)) return '0:00';
const m = Math.floor(s / 60);
const sec = Math.floor(s % 60);
return `${m}:${sec.toString().padStart(2, '0')}`;
}
function updateProgress() {
if (!audio.duration) return;
const pct = (audio.currentTime / audio.duration) * 100;
fill.style.width = pct + '%';
timeLabel.textContent = `${fmt(audio.currentTime)} / ${fmt(audio.duration)}`;
}
// ── Events ────────────────────────────────────────────────────
audio.addEventListener('timeupdate', updateProgress);
audio.addEventListener('loadedmetadata', () => {
timeLabel.textContent = `0:00 / ${fmt(audio.duration)}`;
});
// Seek on track click
track.addEventListener('click', (e) => {
const rect = track.getBoundingClientRect();
const pct = (e.clientX - rect.left) / rect.width;
audio.currentTime = pct * audio.duration;
updateProgress();
});
// Play/pause toggle
playBtn.addEventListener('click', () => {
if (audio.paused) {
audio.play();
playBtn.classList.add('playing');
playBtn.innerHTML = VoiceManager._pauseIcon();
playBtn.title = 'Pause';
if (avatarContainer) avatarContainer.classList.add('speaking');
} else {
audio.pause();
playBtn.classList.remove('playing');
playBtn.innerHTML = VoiceManager._playIcon();
playBtn.title = 'Play';
if (avatarContainer) avatarContainer.classList.remove('speaking');
}
});
// ── Playback ──────────────────────────────────────────────────
try {
// Wait for audio to be fully buffered before playing
await new Promise((resolve, reject) => {
audio.addEventListener('canplaythrough', resolve, { once: true });
audio.addEventListener('error', reject, { once: true });
audio.load(); // Explicitly trigger loading
});
audio.currentTime = 0; // Ensure we start from the very beginning
await audio.play();
return new Promise(resolve => {
audio.onended = () => {
if (avatarContainer) avatarContainer.classList.remove('speaking');
playBtn.classList.remove('playing');
playBtn.innerHTML = VoiceManager._playIcon();
playBtn.title = 'Replay';
fill.style.width = '100%';
// Reset to beginning for replay
audio.currentTime = 0;
resolve();
};
audio.onerror = () => {
if (avatarContainer) avatarContainer.classList.remove('speaking');
resolve();
};
});
} catch (e) {
console.error('Playback failed', e);
if (avatarContainer) avatarContainer.classList.remove('speaking');
playBtn.classList.remove('playing');
playBtn.innerHTML = VoiceManager._playIcon();
}
}
// ── SVG icons (inline, no external deps) ──────────────────────────
static _playIcon() {
return `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><polygon points="6,3 20,12 6,21"/></svg>`;
}
static _pauseIcon() {
return `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><rect x="5" y="3" width="4" height="18"/><rect x="15" y="3" width="4" height="18"/></svg>`;
}
/**
* Legacy method for backward compatibility if needed,
* or for simple direct speech.
*/
async speak(text) {
const url = await this.fetchAudio(text);
if (url) await this.playAudio(url);
}
/**
* Strip markdown formatting from text so TTS reads naturally.
*/
static stripMarkdown(text) {
return text
.replace(/```[\s\S]*?```/g, '') // code blocks
.replace(/`([^`]+)`/g, '$1') // inline code
.replace(/#{1,6}\s+/g, '') // headings
.replace(/\*\*([^*]+)\*\*/g, '$1') // bold
.replace(/\*([^*]+)\*/g, '$1') // italic
.replace(/__([^_]+)__/g, '$1') // bold alt
.replace(/_([^_]+)_/g, '$1') // italic alt
.replace(/~~([^~]+)~~/g, '$1') // strikethrough
.replace(/^\s*[-*+]\s+/gm, '') // unordered lists
.replace(/^\s*\d+\.\s+/gm, '') // ordered lists
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')// links
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '') // images
.replace(/>\s+/g, '') // blockquotes
.replace(/\n{2,}/g, '. ') // paragraph breaks → pause
.replace(/\n/g, ' ') // newlines → space
.trim();
}
}