Files
language-llm/frontend/js/voice.js
Dennis Thiessen be366777d4
All checks were successful
Deploy FluentGerman.ai / deploy (push) Successful in 53s
added voice functionality and increased avatar size
2026-02-18 10:11:13 +01:00

244 lines
8.5 KiB
JavaScript

/* FluentGerman.ai — Voice module (API-only TTS, browser + API STT) */
class VoiceManager {
constructor() {
this.mode = 'browser';
this.recognition = null;
this.isRecording = false;
this.isDisabled = false;
this.lastInputWasVoice = false;
this.mediaRecorder = null;
this.audioChunks = [];
this.onResult = null;
this.onStateChange = null;
this.browserSTTSupported = false;
this.apiAvailable = false;
}
async init() {
this._initBrowserSTT();
try {
const response = await api('/voice/config');
if (response?.ok) {
const config = await response.json();
this.mode = config.voice_mode;
this.apiAvailable = config.voice_api_available || false;
console.log('[Voice] Server mode:', this.mode, '| API available:', this.apiAvailable);
}
} catch (e) {
console.warn('[Voice] Could not fetch config, using browser mode');
this.mode = 'browser';
}
// Determine best STT method
if (this.mode === 'browser' && !this.browserSTTSupported) {
if (this.apiAvailable) {
this.mode = 'api';
} else {
this.isDisabled = true;
}
} else if (this.mode === 'api' && !this.apiAvailable) {
if (this.browserSTTSupported) {
this.mode = 'browser';
} else {
this.isDisabled = true;
}
}
console.log('[Voice] Final mode:', this.isDisabled ? 'DISABLED' : this.mode);
}
_initBrowserSTT() {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
this.browserSTTSupported = false;
return;
}
this.browserSTTSupported = true;
this.recognition = new SpeechRecognition();
this.recognition.continuous = false;
this.recognition.interimResults = false;
this.recognition.lang = 'de-DE';
this.recognition.onresult = (event) => {
const text = event.results[0][0].transcript;
console.log('[Voice] STT result:', text);
this.lastInputWasVoice = true;
if (this.onResult) this.onResult(text);
};
this.recognition.onend = () => {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
};
this.recognition.onerror = (event) => {
console.error('[Voice] STT error:', event.error);
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
if (event.error === 'not-allowed') {
showToast('Microphone access denied. Allow it in browser settings.', 'error');
} else if (event.error === 'no-speech') {
showToast('No speech detected. Try again.', 'error');
}
};
}
async startRecording() {
if (this.isDisabled) {
showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
return;
}
this.isRecording = true;
this.lastInputWasVoice = true;
if (this.onStateChange) this.onStateChange(true);
if (this.mode === 'api') {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.audioChunks = [];
this.mediaRecorder = new MediaRecorder(stream);
this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
this.mediaRecorder.onstop = async () => {
stream.getTracks().forEach(t => t.stop());
const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
await this._transcribeAPI(blob);
};
this.mediaRecorder.start();
} catch (e) {
showToast('Microphone access denied', 'error');
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
} else {
if (this.recognition) {
try {
this.recognition.start();
} catch (e) {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
showToast('Voice recognition failed. Try again.', 'error');
}
}
}
}
stopRecording() {
if (this.mode === 'api') {
if (this.mediaRecorder?.state === 'recording') {
this.mediaRecorder.stop();
} else {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
} else {
try { this.recognition?.stop(); } catch (e) { /* already stopped */ }
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
}
toggleRecording() {
if (this.isDisabled) {
showToast('Voice requires Chrome or Edge (HTTPS).', 'error');
return;
}
this.isRecording ? this.stopRecording() : this.startRecording();
}
async _transcribeAPI(blob) {
try {
const formData = new FormData();
formData.append('audio', blob, 'recording.webm');
const response = await api('/voice/transcribe', {
method: 'POST',
body: formData,
});
if (response?.ok) {
const data = await response.json();
this.lastInputWasVoice = true;
if (this.onResult) this.onResult(data.text);
} else {
showToast('Transcription failed.', 'error');
}
} catch (e) {
showToast('Transcription error', 'error');
} finally {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
}
/**
* Speak text via API TTS only. No browser fallback.
* Strips markdown formatting before sending.
*/
async speak(text) {
if (!this.apiAvailable) {
console.log('[Voice] API TTS not available, skipping speech');
return;
}
const clean = VoiceManager.stripMarkdown(text);
try {
const response = await api(`/voice/synthesize?text=${encodeURIComponent(clean)}`, {
method: 'POST',
});
if (response?.ok) {
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
// Visual feedback
const avatarContainer = document.querySelector('.avatar-container');
if (avatarContainer) avatarContainer.classList.add('speaking');
await audio.play();
return new Promise(resolve => {
audio.onended = () => {
if (avatarContainer) avatarContainer.classList.remove('speaking');
resolve();
};
});
}
} catch (e) {
console.warn('[Voice] API TTS failed:', e);
}
}
/**
* Strip markdown formatting from text so TTS reads naturally.
*/
static stripMarkdown(text) {
return text
.replace(/```[\s\S]*?```/g, '') // code blocks
.replace(/`([^`]+)`/g, '$1') // inline code
.replace(/#{1,6}\s+/g, '') // headings
.replace(/\*\*([^*]+)\*\*/g, '$1') // bold
.replace(/\*([^*]+)\*/g, '$1') // italic
.replace(/__([^_]+)__/g, '$1') // bold alt
.replace(/_([^_]+)_/g, '$1') // italic alt
.replace(/~~([^~]+)~~/g, '$1') // strikethrough
.replace(/^\s*[-*+]\s+/gm, '') // unordered lists
.replace(/^\s*\d+\.\s+/gm, '') // ordered lists
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')// links
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '') // images
.replace(/>\s+/g, '') // blockquotes
.replace(/\n{2,}/g, '. ') // paragraph breaks → pause
.replace(/\n/g, ' ') // newlines → space
.trim();
}
}