stt UI fix + more responsibility
This commit is contained in:
parent
55961c956a
commit
9977387fbc
@ -48,11 +48,11 @@ async function createSTT({ apiKey, language = 'en', callbacks = {}, usePortkey =
|
|||||||
turn_detection: {
|
turn_detection: {
|
||||||
type: 'server_vad',
|
type: 'server_vad',
|
||||||
threshold: 0.5,
|
threshold: 0.5,
|
||||||
prefix_padding_ms: 50,
|
prefix_padding_ms: 200,
|
||||||
silence_duration_ms: 25,
|
silence_duration_ms: 100,
|
||||||
},
|
},
|
||||||
input_audio_noise_reduction: {
|
input_audio_noise_reduction: {
|
||||||
type: 'near_field'
|
type: 'far_field'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -12,11 +12,6 @@ class SttService {
|
|||||||
this.myCurrentUtterance = '';
|
this.myCurrentUtterance = '';
|
||||||
this.theirCurrentUtterance = '';
|
this.theirCurrentUtterance = '';
|
||||||
|
|
||||||
this.myLastPartialText = '';
|
|
||||||
this.theirLastPartialText = '';
|
|
||||||
this.myInactivityTimer = null;
|
|
||||||
this.theirInactivityTimer = null;
|
|
||||||
|
|
||||||
// Turn-completion debouncing
|
// Turn-completion debouncing
|
||||||
this.myCompletionBuffer = '';
|
this.myCompletionBuffer = '';
|
||||||
this.theirCompletionBuffer = '';
|
this.theirCompletionBuffer = '';
|
||||||
@ -38,33 +33,6 @@ class SttService {
|
|||||||
this.onStatusUpdate = onStatusUpdate;
|
this.onStatusUpdate = onStatusUpdate;
|
||||||
}
|
}
|
||||||
|
|
||||||
// async getApiKey() {
|
|
||||||
// const storedKey = await getStoredApiKey();
|
|
||||||
// if (storedKey) {
|
|
||||||
// console.log('[SttService] Using stored API key');
|
|
||||||
// return storedKey;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// const envKey = process.env.OPENAI_API_KEY;
|
|
||||||
// if (envKey) {
|
|
||||||
// console.log('[SttService] Using environment API key');
|
|
||||||
// return envKey;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// console.error('[SttService] No API key found in storage or environment');
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// async getAiProvider() {
|
|
||||||
// try {
|
|
||||||
// const { ipcRenderer } = require('electron');
|
|
||||||
// const provider = await ipcRenderer.invoke('get-ai-provider');
|
|
||||||
// return provider || 'openai';
|
|
||||||
// } catch (error) {
|
|
||||||
// return getStoredProvider ? getStoredProvider() : 'openai';
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
sendToRenderer(channel, data) {
|
sendToRenderer(channel, data) {
|
||||||
BrowserWindow.getAllWindows().forEach(win => {
|
BrowserWindow.getAllWindows().forEach(win => {
|
||||||
if (!win.isDestroyed()) {
|
if (!win.isDestroyed()) {
|
||||||
@ -74,10 +42,9 @@ class SttService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
flushMyCompletion() {
|
flushMyCompletion() {
|
||||||
if (!this.modelInfo || !this.myCompletionBuffer.trim()) return;
|
const finalText = (this.myCompletionBuffer + this.myCurrentUtterance).trim();
|
||||||
|
if (!this.modelInfo || !finalText) return;
|
||||||
|
|
||||||
const finalText = this.myCompletionBuffer.trim();
|
|
||||||
|
|
||||||
// Notify completion callback
|
// Notify completion callback
|
||||||
if (this.onTranscriptionComplete) {
|
if (this.onTranscriptionComplete) {
|
||||||
this.onTranscriptionComplete('Me', finalText);
|
this.onTranscriptionComplete('Me', finalText);
|
||||||
@ -102,9 +69,8 @@ class SttService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
flushTheirCompletion() {
|
flushTheirCompletion() {
|
||||||
if (!this.modelInfo || !this.theirCompletionBuffer.trim()) return;
|
const finalText = (this.theirCompletionBuffer + this.theirCurrentUtterance).trim();
|
||||||
|
if (!this.modelInfo || !finalText) return;
|
||||||
const finalText = this.theirCompletionBuffer.trim();
|
|
||||||
|
|
||||||
// Notify completion callback
|
// Notify completion callback
|
||||||
if (this.onTranscriptionComplete) {
|
if (this.onTranscriptionComplete) {
|
||||||
@ -130,39 +96,29 @@ class SttService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
debounceMyCompletion(text) {
|
debounceMyCompletion(text) {
|
||||||
// 상대방이 말하고 있던 경우, 화자가 변경되었으므로 즉시 상대방의 말풍선을 완성합니다.
|
if (this.modelInfo?.provider === 'gemini') {
|
||||||
if (this.theirCompletionTimer) {
|
this.myCompletionBuffer += text;
|
||||||
clearTimeout(this.theirCompletionTimer);
|
} else {
|
||||||
this.flushTheirCompletion();
|
this.myCompletionBuffer += (this.myCompletionBuffer ? ' ' : '') + text;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.myCompletionBuffer += (this.myCompletionBuffer ? ' ' : '') + text;
|
|
||||||
|
|
||||||
if (this.myCompletionTimer) clearTimeout(this.myCompletionTimer);
|
if (this.myCompletionTimer) clearTimeout(this.myCompletionTimer);
|
||||||
this.myCompletionTimer = setTimeout(() => this.flushMyCompletion(), COMPLETION_DEBOUNCE_MS);
|
this.myCompletionTimer = setTimeout(() => this.flushMyCompletion(), COMPLETION_DEBOUNCE_MS);
|
||||||
}
|
}
|
||||||
|
|
||||||
debounceTheirCompletion(text) {
|
debounceTheirCompletion(text) {
|
||||||
// 내가 말하고 있던 경우, 화자가 변경되었으므로 즉시 내 말풍선을 완성합니다.
|
if (this.modelInfo?.provider === 'gemini') {
|
||||||
if (this.myCompletionTimer) {
|
this.theirCompletionBuffer += text;
|
||||||
clearTimeout(this.myCompletionTimer);
|
} else {
|
||||||
this.flushMyCompletion();
|
this.theirCompletionBuffer += (this.theirCompletionBuffer ? ' ' : '') + text;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.theirCompletionBuffer += (this.theirCompletionBuffer ? ' ' : '') + text;
|
|
||||||
|
|
||||||
if (this.theirCompletionTimer) clearTimeout(this.theirCompletionTimer);
|
if (this.theirCompletionTimer) clearTimeout(this.theirCompletionTimer);
|
||||||
this.theirCompletionTimer = setTimeout(() => this.flushTheirCompletion(), COMPLETION_DEBOUNCE_MS);
|
this.theirCompletionTimer = setTimeout(() => this.flushTheirCompletion(), COMPLETION_DEBOUNCE_MS);
|
||||||
}
|
}
|
||||||
|
|
||||||
async initializeSttSessions(language = 'en') {
|
async initializeSttSessions(language = 'en') {
|
||||||
const effectiveLanguage = process.env.OPENAI_TRANSCRIBE_LANG || language || 'en';
|
const effectiveLanguage = process.env.OPENAI_TRANSCRIBE_LANG || language || 'en';
|
||||||
|
|
||||||
// const API_KEY = await this.getApiKey();
|
|
||||||
// if (!API_KEY) {
|
|
||||||
// throw new Error('No API key available');
|
|
||||||
// }
|
|
||||||
// const provider = await this.getAiProvider();
|
|
||||||
|
|
||||||
const modelInfo = await getCurrentModelInfo(null, { type: 'stt' });
|
const modelInfo = await getCurrentModelInfo(null, { type: 'stt' });
|
||||||
if (!modelInfo || !modelInfo.apiKey) {
|
if (!modelInfo || !modelInfo.apiKey) {
|
||||||
@ -171,10 +127,6 @@ class SttService {
|
|||||||
this.modelInfo = modelInfo;
|
this.modelInfo = modelInfo;
|
||||||
console.log(`[SttService] Initializing STT for ${modelInfo.provider} using model ${modelInfo.model}`);
|
console.log(`[SttService] Initializing STT for ${modelInfo.provider} using model ${modelInfo.model}`);
|
||||||
|
|
||||||
|
|
||||||
// const isGemini = modelInfo.provider === 'gemini';
|
|
||||||
// console.log(`[SttService] Initializing STT for provider: ${modelInfo.provider}`);
|
|
||||||
|
|
||||||
const handleMyMessage = message => {
|
const handleMyMessage = message => {
|
||||||
if (!this.modelInfo) {
|
if (!this.modelInfo) {
|
||||||
console.log('[SttService] Ignoring message - session already closed');
|
console.log('[SttService] Ignoring message - session already closed');
|
||||||
@ -182,13 +134,35 @@ class SttService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this.modelInfo.provider === 'gemini') {
|
if (this.modelInfo.provider === 'gemini') {
|
||||||
const text = message.serverContent?.inputTranscription?.text || '';
|
if (!message.serverContent?.modelTurn) {
|
||||||
if (text && text.trim()) {
|
console.log('[Gemini STT - Me]', JSON.stringify(message, null, 2));
|
||||||
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
|
|
||||||
if (finalUtteranceText && finalUtteranceText !== '.') {
|
|
||||||
this.debounceMyCompletion(finalUtteranceText);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (message.serverContent?.turnComplete) {
|
||||||
|
if (this.myCompletionTimer) {
|
||||||
|
clearTimeout(this.myCompletionTimer);
|
||||||
|
this.flushMyCompletion();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const transcription = message.serverContent?.inputTranscription;
|
||||||
|
if (!transcription || !transcription.text) return;
|
||||||
|
|
||||||
|
const textChunk = transcription.text;
|
||||||
|
if (!textChunk.trim() || textChunk.trim() === '<noise>') {
|
||||||
|
return; // 1. Ignore whitespace-only chunks or noise
|
||||||
|
}
|
||||||
|
|
||||||
|
this.debounceMyCompletion(textChunk);
|
||||||
|
|
||||||
|
this.sendToRenderer('stt-update', {
|
||||||
|
speaker: 'Me',
|
||||||
|
text: this.myCompletionBuffer,
|
||||||
|
isPartial: true,
|
||||||
|
isFinal: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
const type = message.type;
|
const type = message.type;
|
||||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||||
@ -230,13 +204,35 @@ class SttService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this.modelInfo.provider === 'gemini') {
|
if (this.modelInfo.provider === 'gemini') {
|
||||||
const text = message.serverContent?.inputTranscription?.text || '';
|
if (!message.serverContent?.modelTurn) {
|
||||||
if (text && text.trim()) {
|
console.log('[Gemini STT - Them]', JSON.stringify(message, null, 2));
|
||||||
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
|
|
||||||
if (finalUtteranceText && finalUtteranceText !== '.') {
|
|
||||||
this.debounceTheirCompletion(finalUtteranceText);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (message.serverContent?.turnComplete) {
|
||||||
|
if (this.theirCompletionTimer) {
|
||||||
|
clearTimeout(this.theirCompletionTimer);
|
||||||
|
this.flushTheirCompletion();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const transcription = message.serverContent?.inputTranscription;
|
||||||
|
if (!transcription || !transcription.text) return;
|
||||||
|
|
||||||
|
const textChunk = transcription.text;
|
||||||
|
if (!textChunk.trim() || textChunk.trim() === '<noise>') {
|
||||||
|
return; // 1. Ignore whitespace-only chunks or noise
|
||||||
|
}
|
||||||
|
|
||||||
|
this.debounceTheirCompletion(textChunk);
|
||||||
|
|
||||||
|
this.sendToRenderer('stt-update', {
|
||||||
|
speaker: 'Them',
|
||||||
|
text: this.theirCompletionBuffer,
|
||||||
|
isPartial: true,
|
||||||
|
isFinal: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
const type = message.type;
|
const type = message.type;
|
||||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||||
@ -496,14 +492,6 @@ class SttService {
|
|||||||
this.stopMacOSAudioCapture();
|
this.stopMacOSAudioCapture();
|
||||||
|
|
||||||
// Clear timers
|
// Clear timers
|
||||||
if (this.myInactivityTimer) {
|
|
||||||
clearTimeout(this.myInactivityTimer);
|
|
||||||
this.myInactivityTimer = null;
|
|
||||||
}
|
|
||||||
if (this.theirInactivityTimer) {
|
|
||||||
clearTimeout(this.theirInactivityTimer);
|
|
||||||
this.theirInactivityTimer = null;
|
|
||||||
}
|
|
||||||
if (this.myCompletionTimer) {
|
if (this.myCompletionTimer) {
|
||||||
clearTimeout(this.myCompletionTimer);
|
clearTimeout(this.myCompletionTimer);
|
||||||
this.myCompletionTimer = null;
|
this.myCompletionTimer = null;
|
||||||
@ -529,8 +517,6 @@ class SttService {
|
|||||||
// Reset state
|
// Reset state
|
||||||
this.myCurrentUtterance = '';
|
this.myCurrentUtterance = '';
|
||||||
this.theirCurrentUtterance = '';
|
this.theirCurrentUtterance = '';
|
||||||
this.myLastPartialText = '';
|
|
||||||
this.theirLastPartialText = '';
|
|
||||||
this.myCompletionBuffer = '';
|
this.myCompletionBuffer = '';
|
||||||
this.theirCompletionBuffer = '';
|
this.theirCompletionBuffer = '';
|
||||||
this.modelInfo = null;
|
this.modelInfo = null;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user