All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 49s
392 lines
15 KiB
JavaScript
392 lines
15 KiB
JavaScript
/* FluentGerman.ai — Voice module (API-only TTS, browser + API STT) */
|
||
|
||
class VoiceManager {
|
||
constructor() {
|
||
this.mode = 'browser';
|
||
this.recognition = null;
|
||
this.isRecording = false;
|
||
this.isDisabled = false;
|
||
this.lastInputWasVoice = false;
|
||
this.mediaRecorder = null;
|
||
this.audioChunks = [];
|
||
this.onResult = null;
|
||
this.onStateChange = null;
|
||
this.browserSTTSupported = false;
|
||
this.apiAvailable = false;
|
||
this.onProcessing = null; // New callback for "Transcribing..." state
|
||
}
|
||
|
||
async init() {
|
||
this._initBrowserSTT();
|
||
|
||
try {
|
||
const response = await api('/voice/config');
|
||
if (response?.ok) {
|
||
const config = await response.json();
|
||
this.mode = config.voice_mode;
|
||
this.apiAvailable = config.voice_api_available || false;
|
||
console.log('[Voice] Server mode:', this.mode, '| API available:', this.apiAvailable);
|
||
}
|
||
} catch (e) {
|
||
console.warn('[Voice] Could not fetch config, using browser mode');
|
||
this.mode = 'browser';
|
||
}
|
||
|
||
// Determine best STT method
|
||
if (this.mode === 'browser' && !this.browserSTTSupported) {
|
||
if (this.apiAvailable) {
|
||
this.mode = 'api';
|
||
} else {
|
||
this.isDisabled = true;
|
||
}
|
||
} else if (this.mode === 'api' && !this.apiAvailable) {
|
||
if (this.browserSTTSupported) {
|
||
this.mode = 'browser';
|
||
} else {
|
||
this.isDisabled = true;
|
||
}
|
||
}
|
||
|
||
console.log('[Voice] Final mode:', this.isDisabled ? 'DISABLED' : this.mode);
|
||
}
|
||
|
||
_initBrowserSTT() {
|
||
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
||
if (!SpeechRecognition) {
|
||
this.browserSTTSupported = false;
|
||
return;
|
||
}
|
||
|
||
this.browserSTTSupported = true;
|
||
this.recognition = new SpeechRecognition();
|
||
this.recognition.continuous = false;
|
||
this.recognition.interimResults = false;
|
||
this.recognition.lang = 'de-DE';
|
||
|
||
this.recognition.onresult = (event) => {
|
||
const text = event.results[0][0].transcript;
|
||
console.log('[Voice] STT result:', text);
|
||
this.lastInputWasVoice = true;
|
||
if (this.onResult) this.onResult(text);
|
||
};
|
||
|
||
this.recognition.onend = () => {
|
||
this.isRecording = false;
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
};
|
||
|
||
this.recognition.onerror = (event) => {
|
||
console.error('[Voice] STT error:', event.error);
|
||
this.isRecording = false;
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
|
||
if (event.error === 'not-allowed') {
|
||
showToast('Microphone access denied. Allow it in browser settings.', 'error');
|
||
} else if (event.error === 'no-speech') {
|
||
showToast('No speech detected. Try again.', 'error');
|
||
}
|
||
};
|
||
}
|
||
|
||
async startRecording() {
|
||
if (this.isDisabled) {
|
||
showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
|
||
return;
|
||
}
|
||
|
||
// Optimistic UI updates moved inside specific start blocks to prevent "fake" recording state
|
||
// if hardware access fails or takes time.
|
||
|
||
if (this.mode === 'api') {
|
||
try {
|
||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||
this.audioChunks = [];
|
||
this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
|
||
|
||
this.mediaRecorder.ondataavailable = (e) => {
|
||
if (e.data.size > 0) this.audioChunks.push(e.data);
|
||
};
|
||
|
||
this.mediaRecorder.onstop = async () => {
|
||
stream.getTracks().forEach(t => t.stop());
|
||
const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
|
||
await this._transcribeAPI(blob);
|
||
};
|
||
|
||
// Wait for recorder to actually start before updating UI
|
||
this.mediaRecorder.start();
|
||
|
||
// Now we are truly recording
|
||
this.isRecording = true;
|
||
this.lastInputWasVoice = true;
|
||
if (this.onStateChange) this.onStateChange(true);
|
||
|
||
} catch (e) {
|
||
console.error('[Voice] Mic access error:', e);
|
||
showToast('Microphone access denied or error', 'error');
|
||
this.isRecording = false;
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
}
|
||
} else {
|
||
if (this.recognition) {
|
||
try {
|
||
this.recognition.start();
|
||
// Recognition 'onstart' would be better, but this is okay for browser mode
|
||
this.isRecording = true;
|
||
this.lastInputWasVoice = true;
|
||
if (this.onStateChange) this.onStateChange(true);
|
||
} catch (e) {
|
||
this.isRecording = false;
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
showToast('Voice recognition failed. Try again.', 'error');
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
stopRecording() {
|
||
if (this.mode === 'api') {
|
||
if (this.mediaRecorder?.state === 'recording') {
|
||
// Show processing state immediately (don't wait for onstop callback)
|
||
if (this.onProcessing) this.onProcessing(true);
|
||
this.mediaRecorder.stop();
|
||
} else {
|
||
this.isRecording = false;
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
}
|
||
} else {
|
||
try { this.recognition?.stop(); } catch (e) { /* already stopped */ }
|
||
this.isRecording = false;
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
}
|
||
}
|
||
|
||
toggleRecording() {
|
||
if (this.isDisabled) {
|
||
showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
|
||
return;
|
||
}
|
||
this.isRecording ? this.stopRecording() : this.startRecording();
|
||
}
|
||
|
||
async _transcribeAPI(blob) {
|
||
if (this.onProcessing) this.onProcessing(true);
|
||
|
||
try {
|
||
const formData = new FormData();
|
||
formData.append('audio', blob, 'recording.webm');
|
||
|
||
const response = await api('/voice/transcribe', {
|
||
method: 'POST',
|
||
body: formData,
|
||
});
|
||
|
||
if (response?.ok) {
|
||
const data = await response.json();
|
||
this.lastInputWasVoice = true;
|
||
if (this.onResult) this.onResult(data.text);
|
||
} else {
|
||
const err = await response.json().catch(() => ({}));
|
||
showToast(`Transcription failed: ${err.detail || 'Unknown error'}`, 'error');
|
||
}
|
||
} catch (e) {
|
||
showToast('Transcription network error', 'error');
|
||
} finally {
|
||
this.isRecording = false;
|
||
// Stop processing state
|
||
if (this.onProcessing) this.onProcessing(false);
|
||
if (this.onStateChange) this.onStateChange(false);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Fetch TTS audio blob for text (API only).
|
||
* Returns audio URL or null.
|
||
*/
|
||
async fetchAudio(text) {
|
||
if (!this.apiAvailable) return null;
|
||
|
||
const clean = VoiceManager.stripMarkdown(text);
|
||
try {
|
||
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
|
||
method: 'POST',
|
||
});
|
||
|
||
if (response?.ok) {
|
||
const audioBlob = await response.blob();
|
||
return URL.createObjectURL(audioBlob);
|
||
} else {
|
||
const err = await response.json().catch(() => ({}));
|
||
console.warn('[Voice] TTS error:', err);
|
||
}
|
||
} catch (e) {
|
||
console.warn('[Voice] TTS network error:', e);
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* Play audio with an inline mini-player (progress bar, seek, replay).
|
||
* @param {string} audioUrl – blob URL from fetchAudio()
|
||
* @param {HTMLElement} [containerEl] – element to append the player into
|
||
* @returns {Promise} resolves when first playback ends
|
||
*/
|
||
async playAudio(audioUrl, containerEl) {
|
||
if (!audioUrl) return;
|
||
|
||
const audio = new Audio(audioUrl);
|
||
|
||
// Visual feedback — avatar pulse
|
||
const avatarContainer = document.querySelector('.avatar-container');
|
||
if (avatarContainer) avatarContainer.classList.add('speaking');
|
||
|
||
// ── Build player DOM ──────────────────────────────────────────
|
||
const player = document.createElement('div');
|
||
player.className = 'audio-player';
|
||
|
||
const playBtn = document.createElement('button');
|
||
playBtn.className = 'audio-player-btn playing';
|
||
playBtn.innerHTML = VoiceManager._pauseIcon();
|
||
playBtn.title = 'Pause';
|
||
|
||
const track = document.createElement('div');
|
||
track.className = 'audio-player-track';
|
||
const fill = document.createElement('div');
|
||
fill.className = 'audio-player-fill';
|
||
track.appendChild(fill);
|
||
|
||
const timeLabel = document.createElement('span');
|
||
timeLabel.className = 'audio-player-time';
|
||
timeLabel.textContent = '0:00 / 0:00';
|
||
|
||
player.appendChild(playBtn);
|
||
player.appendChild(track);
|
||
player.appendChild(timeLabel);
|
||
|
||
if (containerEl) {
|
||
containerEl.appendChild(player);
|
||
}
|
||
|
||
// ── Helpers ───────────────────────────────────────────────────
|
||
function fmt(s) {
|
||
if (!isFinite(s)) return '0:00';
|
||
const m = Math.floor(s / 60);
|
||
const sec = Math.floor(s % 60);
|
||
return `${m}:${sec.toString().padStart(2, '0')}`;
|
||
}
|
||
|
||
function updateProgress() {
|
||
if (!audio.duration) return;
|
||
const pct = (audio.currentTime / audio.duration) * 100;
|
||
fill.style.width = pct + '%';
|
||
timeLabel.textContent = `${fmt(audio.currentTime)} / ${fmt(audio.duration)}`;
|
||
}
|
||
|
||
// ── Events ────────────────────────────────────────────────────
|
||
audio.addEventListener('timeupdate', updateProgress);
|
||
|
||
audio.addEventListener('loadedmetadata', () => {
|
||
timeLabel.textContent = `0:00 / ${fmt(audio.duration)}`;
|
||
});
|
||
|
||
// Seek on track click
|
||
track.addEventListener('click', (e) => {
|
||
const rect = track.getBoundingClientRect();
|
||
const pct = (e.clientX - rect.left) / rect.width;
|
||
audio.currentTime = pct * audio.duration;
|
||
updateProgress();
|
||
});
|
||
|
||
// Play/pause toggle
|
||
playBtn.addEventListener('click', () => {
|
||
if (audio.paused) {
|
||
audio.play();
|
||
playBtn.classList.add('playing');
|
||
playBtn.innerHTML = VoiceManager._pauseIcon();
|
||
playBtn.title = 'Pause';
|
||
if (avatarContainer) avatarContainer.classList.add('speaking');
|
||
} else {
|
||
audio.pause();
|
||
playBtn.classList.remove('playing');
|
||
playBtn.innerHTML = VoiceManager._playIcon();
|
||
playBtn.title = 'Play';
|
||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||
}
|
||
});
|
||
|
||
// ── Playback ──────────────────────────────────────────────────
|
||
try {
|
||
// Wait for audio to be fully buffered before playing
|
||
await new Promise((resolve, reject) => {
|
||
audio.addEventListener('canplaythrough', resolve, { once: true });
|
||
audio.addEventListener('error', reject, { once: true });
|
||
audio.load(); // Explicitly trigger loading
|
||
});
|
||
audio.currentTime = 0; // Ensure we start from the very beginning
|
||
await audio.play();
|
||
return new Promise(resolve => {
|
||
audio.onended = () => {
|
||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||
playBtn.classList.remove('playing');
|
||
playBtn.innerHTML = VoiceManager._playIcon();
|
||
playBtn.title = 'Replay';
|
||
fill.style.width = '100%';
|
||
// Reset to beginning for replay
|
||
audio.currentTime = 0;
|
||
resolve();
|
||
};
|
||
audio.onerror = () => {
|
||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||
resolve();
|
||
};
|
||
});
|
||
} catch (e) {
|
||
console.error('Playback failed', e);
|
||
if (avatarContainer) avatarContainer.classList.remove('speaking');
|
||
playBtn.classList.remove('playing');
|
||
playBtn.innerHTML = VoiceManager._playIcon();
|
||
}
|
||
}
|
||
|
||
// ── SVG icons (inline, no external deps) ──────────────────────────
|
||
static _playIcon() {
|
||
return `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><polygon points="6,3 20,12 6,21"/></svg>`;
|
||
}
|
||
|
||
static _pauseIcon() {
|
||
return `<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><rect x="5" y="3" width="4" height="18"/><rect x="15" y="3" width="4" height="18"/></svg>`;
|
||
}
|
||
|
||
/**
|
||
* Legacy method for backward compatibility if needed,
|
||
* or for simple direct speech.
|
||
*/
|
||
async speak(text) {
|
||
const url = await this.fetchAudio(text);
|
||
if (url) await this.playAudio(url);
|
||
}
|
||
|
||
/**
|
||
* Strip markdown formatting from text so TTS reads naturally.
|
||
*/
|
||
static stripMarkdown(text) {
|
||
return text
|
||
.replace(/```[\s\S]*?```/g, '') // code blocks
|
||
.replace(/`([^`]+)`/g, '$1') // inline code
|
||
.replace(/#{1,6}\s+/g, '') // headings
|
||
.replace(/\*\*([^*]+)\*\*/g, '$1') // bold
|
||
.replace(/\*([^*]+)\*/g, '$1') // italic
|
||
.replace(/__([^_]+)__/g, '$1') // bold alt
|
||
.replace(/_([^_]+)_/g, '$1') // italic alt
|
||
.replace(/~~([^~]+)~~/g, '$1') // strikethrough
|
||
.replace(/^\s*[-*+]\s+/gm, '') // unordered lists
|
||
.replace(/^\s*\d+\.\s+/gm, '') // ordered lists
|
||
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')// links
|
||
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '') // images
|
||
.replace(/>\s+/g, '') // blockquotes
|
||
.replace(/\n{2,}/g, '. ') // paragraph breaks → pause
|
||
.replace(/\n/g, ' ') // newlines → space
|
||
.trim();
|
||
}
|
||
}
|