initial commit

This commit is contained in:
2026-02-12 18:45:10 +01:00
commit be7bbba456
42 changed files with 3767 additions and 0 deletions

230
frontend/js/voice.js Normal file
View File

@@ -0,0 +1,230 @@
/* FluentGerman.ai — Voice module (Web Speech API + API mode) */
class VoiceManager {
constructor() {
this.mode = 'browser'; // will be set from server config
this.recognition = null;
this.synthesis = window.speechSynthesis;
this.isRecording = false;
this.lastInputWasVoice = false; // tracks if last message was spoken
this.mediaRecorder = null;
this.audioChunks = [];
this.onResult = null;
this.onStateChange = null;
}
async init() {
// Always init browser STT as fallback
this._initBrowserSTT();
// Fetch voice mode from server
try {
const response = await api('/voice/config');
if (response?.ok) {
const config = await response.json();
this.mode = config.voice_mode;
console.log('[Voice] Mode:', this.mode);
}
} catch (e) {
console.warn('[Voice] Could not fetch config, using browser mode');
this.mode = 'browser';
}
}
_initBrowserSTT() {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
console.warn('[Voice] Speech recognition not supported in this browser');
return;
}
this.recognition = new SpeechRecognition();
this.recognition.continuous = false;
this.recognition.interimResults = false;
this.recognition.lang = 'de-DE';
this.recognition.onresult = (event) => {
const text = event.results[0][0].transcript;
console.log('[Voice] Browser STT result:', text);
this.lastInputWasVoice = true;
if (this.onResult) this.onResult(text);
};
this.recognition.onend = () => {
console.log('[Voice] Browser STT ended');
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
};
this.recognition.onerror = (event) => {
console.error('[Voice] Browser STT error:', event.error);
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
if (event.error === 'not-allowed') {
showToast('Microphone access denied. Please allow microphone in browser settings.', 'error');
} else if (event.error === 'no-speech') {
showToast('No speech detected. Try again.', 'error');
}
};
console.log('[Voice] Browser STT initialized');
}
async startRecording() {
this.isRecording = true;
this.lastInputWasVoice = true;
if (this.onStateChange) this.onStateChange(true);
if (this.mode === 'api') {
// API mode — record audio via MediaRecorder, send to Whisper
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.audioChunks = [];
this.mediaRecorder = new MediaRecorder(stream);
this.mediaRecorder.ondataavailable = (event) => {
this.audioChunks.push(event.data);
};
this.mediaRecorder.onstop = async () => {
stream.getTracks().forEach(track => track.stop());
const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
await this._transcribeAPI(blob);
};
this.mediaRecorder.start();
console.log('[Voice] API recording started');
} catch (e) {
console.error('[Voice] Microphone access error:', e);
showToast('Microphone access denied', 'error');
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
} else {
// Browser mode — use Web Speech API
if (this.recognition) {
try {
this.recognition.start();
console.log('[Voice] Browser STT started');
} catch (e) {
console.error('[Voice] Failed to start recognition:', e);
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
showToast('Voice recognition failed to start. Try again.', 'error');
}
} else {
console.warn('[Voice] No speech recognition available');
showToast('Speech recognition not supported in this browser', 'error');
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
}
}
stopRecording() {
console.log('[Voice] Stopping recording...');
if (this.mode === 'api') {
if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
this.mediaRecorder.stop();
} else {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
} else {
if (this.recognition) {
try {
this.recognition.stop();
} catch (e) {
// Already stopped
}
}
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
}
async _transcribeAPI(blob) {
try {
const formData = new FormData();
formData.append('audio', blob, 'recording.webm');
console.log('[Voice] Sending audio to API for transcription...');
const response = await api('/voice/transcribe', {
method: 'POST',
body: formData,
});
if (response?.ok) {
const data = await response.json();
console.log('[Voice] API transcription result:', data.text);
this.lastInputWasVoice = true;
if (this.onResult) this.onResult(data.text);
} else {
showToast('Transcription failed. Falling back to browser voice.', 'error');
// Fallback: switch to browser mode for this session
this.mode = 'browser';
}
} catch (e) {
console.error('[Voice] API transcription error:', e);
showToast('Transcription error', 'error');
} finally {
this.isRecording = false;
if (this.onStateChange) this.onStateChange(false);
}
}
async speak(text) {
if (this.mode === 'api') {
return this._speakAPI(text);
} else {
return this._speakBrowser(text);
}
}
_speakBrowser(text) {
return new Promise((resolve) => {
// Cancel any ongoing speech
this.synthesis.cancel();
const utterance = new SpeechSynthesisUtterance(text);
utterance.lang = 'de-DE';
utterance.rate = 0.95;
utterance.onend = resolve;
utterance.onerror = () => {
console.warn('[Voice] Browser TTS error');
resolve();
};
this.synthesis.speak(utterance);
});
}
async _speakAPI(text) {
try {
const response = await api(`/voice/synthesize?text=${encodeURIComponent(text)}`, {
method: 'POST',
});
if (response?.ok) {
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
await audio.play();
return new Promise(resolve => {
audio.onended = resolve;
});
}
} catch (e) {
console.warn('[Voice] API TTS failed, falling back to browser');
}
// Fallback to browser TTS
return this._speakBrowser(text);
}
toggleRecording() {
if (this.isRecording) {
this.stopRecording();
} else {
this.startRecording();
}
}
}