stt UI fix + more responsibility

This commit is contained in:
samtiz 2025-07-09 00:19:46 +09:00
parent 55961c956a
commit 9977387fbc
2 changed files with 71 additions and 85 deletions

View File

@ -48,11 +48,11 @@ async function createSTT({ apiKey, language = 'en', callbacks = {}, usePortkey =
turn_detection: {
type: 'server_vad',
threshold: 0.5,
prefix_padding_ms: 50,
silence_duration_ms: 25,
prefix_padding_ms: 200,
silence_duration_ms: 100,
},
input_audio_noise_reduction: {
type: 'near_field'
type: 'far_field'
}
}
};

View File

@ -12,11 +12,6 @@ class SttService {
this.myCurrentUtterance = '';
this.theirCurrentUtterance = '';
this.myLastPartialText = '';
this.theirLastPartialText = '';
this.myInactivityTimer = null;
this.theirInactivityTimer = null;
// Turn-completion debouncing
this.myCompletionBuffer = '';
this.theirCompletionBuffer = '';
@ -38,33 +33,6 @@ class SttService {
this.onStatusUpdate = onStatusUpdate;
}
// async getApiKey() {
// const storedKey = await getStoredApiKey();
// if (storedKey) {
// console.log('[SttService] Using stored API key');
// return storedKey;
// }
// const envKey = process.env.OPENAI_API_KEY;
// if (envKey) {
// console.log('[SttService] Using environment API key');
// return envKey;
// }
// console.error('[SttService] No API key found in storage or environment');
// return null;
// }
// async getAiProvider() {
// try {
// const { ipcRenderer } = require('electron');
// const provider = await ipcRenderer.invoke('get-ai-provider');
// return provider || 'openai';
// } catch (error) {
// return getStoredProvider ? getStoredProvider() : 'openai';
// }
// }
sendToRenderer(channel, data) {
BrowserWindow.getAllWindows().forEach(win => {
if (!win.isDestroyed()) {
@ -74,9 +42,8 @@ class SttService {
}
flushMyCompletion() {
if (!this.modelInfo || !this.myCompletionBuffer.trim()) return;
const finalText = this.myCompletionBuffer.trim();
const finalText = (this.myCompletionBuffer + this.myCurrentUtterance).trim();
if (!this.modelInfo || !finalText) return;
// Notify completion callback
if (this.onTranscriptionComplete) {
@ -102,9 +69,8 @@ class SttService {
}
flushTheirCompletion() {
if (!this.modelInfo || !this.theirCompletionBuffer.trim()) return;
const finalText = this.theirCompletionBuffer.trim();
const finalText = (this.theirCompletionBuffer + this.theirCurrentUtterance).trim();
if (!this.modelInfo || !finalText) return;
// Notify completion callback
if (this.onTranscriptionComplete) {
@ -130,27 +96,23 @@ class SttService {
}
debounceMyCompletion(text) {
// 상대방이 말하고 있던 경우, 화자가 변경되었으므로 즉시 상대방의 말풍선을 완성합니다.
if (this.theirCompletionTimer) {
clearTimeout(this.theirCompletionTimer);
this.flushTheirCompletion();
if (this.modelInfo?.provider === 'gemini') {
this.myCompletionBuffer += text;
} else {
this.myCompletionBuffer += (this.myCompletionBuffer ? ' ' : '') + text;
}
this.myCompletionBuffer += (this.myCompletionBuffer ? ' ' : '') + text;
if (this.myCompletionTimer) clearTimeout(this.myCompletionTimer);
this.myCompletionTimer = setTimeout(() => this.flushMyCompletion(), COMPLETION_DEBOUNCE_MS);
}
debounceTheirCompletion(text) {
// 내가 말하고 있던 경우, 화자가 변경되었으므로 즉시 내 말풍선을 완성합니다.
if (this.myCompletionTimer) {
clearTimeout(this.myCompletionTimer);
this.flushMyCompletion();
if (this.modelInfo?.provider === 'gemini') {
this.theirCompletionBuffer += text;
} else {
this.theirCompletionBuffer += (this.theirCompletionBuffer ? ' ' : '') + text;
}
this.theirCompletionBuffer += (this.theirCompletionBuffer ? ' ' : '') + text;
if (this.theirCompletionTimer) clearTimeout(this.theirCompletionTimer);
this.theirCompletionTimer = setTimeout(() => this.flushTheirCompletion(), COMPLETION_DEBOUNCE_MS);
}
@ -158,12 +120,6 @@ class SttService {
async initializeSttSessions(language = 'en') {
const effectiveLanguage = process.env.OPENAI_TRANSCRIBE_LANG || language || 'en';
// const API_KEY = await this.getApiKey();
// if (!API_KEY) {
// throw new Error('No API key available');
// }
// const provider = await this.getAiProvider();
const modelInfo = await getCurrentModelInfo(null, { type: 'stt' });
if (!modelInfo || !modelInfo.apiKey) {
throw new Error('AI model or API key is not configured.');
@ -171,10 +127,6 @@ class SttService {
this.modelInfo = modelInfo;
console.log(`[SttService] Initializing STT for ${modelInfo.provider} using model ${modelInfo.model}`);
// const isGemini = modelInfo.provider === 'gemini';
// console.log(`[SttService] Initializing STT for provider: ${modelInfo.provider}`);
const handleMyMessage = message => {
if (!this.modelInfo) {
console.log('[SttService] Ignoring message - session already closed');
@ -182,13 +134,35 @@ class SttService {
}
if (this.modelInfo.provider === 'gemini') {
const text = message.serverContent?.inputTranscription?.text || '';
if (text && text.trim()) {
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
if (finalUtteranceText && finalUtteranceText !== '.') {
this.debounceMyCompletion(finalUtteranceText);
}
if (!message.serverContent?.modelTurn) {
console.log('[Gemini STT - Me]', JSON.stringify(message, null, 2));
}
if (message.serverContent?.turnComplete) {
if (this.myCompletionTimer) {
clearTimeout(this.myCompletionTimer);
this.flushMyCompletion();
}
return;
}
const transcription = message.serverContent?.inputTranscription;
if (!transcription || !transcription.text) return;
const textChunk = transcription.text;
if (!textChunk.trim() || textChunk.trim() === '<noise>') {
return; // 1. Ignore whitespace-only chunks or noise
}
this.debounceMyCompletion(textChunk);
this.sendToRenderer('stt-update', {
speaker: 'Me',
text: this.myCompletionBuffer,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
} else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
@ -230,13 +204,35 @@ class SttService {
}
if (this.modelInfo.provider === 'gemini') {
const text = message.serverContent?.inputTranscription?.text || '';
if (text && text.trim()) {
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
if (finalUtteranceText && finalUtteranceText !== '.') {
this.debounceTheirCompletion(finalUtteranceText);
}
if (!message.serverContent?.modelTurn) {
console.log('[Gemini STT - Them]', JSON.stringify(message, null, 2));
}
if (message.serverContent?.turnComplete) {
if (this.theirCompletionTimer) {
clearTimeout(this.theirCompletionTimer);
this.flushTheirCompletion();
}
return;
}
const transcription = message.serverContent?.inputTranscription;
if (!transcription || !transcription.text) return;
const textChunk = transcription.text;
if (!textChunk.trim() || textChunk.trim() === '<noise>') {
return; // 1. Ignore whitespace-only chunks or noise
}
this.debounceTheirCompletion(textChunk);
this.sendToRenderer('stt-update', {
speaker: 'Them',
text: this.theirCompletionBuffer,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
} else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
@ -496,14 +492,6 @@ class SttService {
this.stopMacOSAudioCapture();
// Clear timers
if (this.myInactivityTimer) {
clearTimeout(this.myInactivityTimer);
this.myInactivityTimer = null;
}
if (this.theirInactivityTimer) {
clearTimeout(this.theirInactivityTimer);
this.theirInactivityTimer = null;
}
if (this.myCompletionTimer) {
clearTimeout(this.myCompletionTimer);
this.myCompletionTimer = null;
@ -529,8 +517,6 @@ class SttService {
// Reset state
this.myCurrentUtterance = '';
this.theirCurrentUtterance = '';
this.myLastPartialText = '';
this.theirLastPartialText = '';
this.myCompletionBuffer = '';
this.theirCompletionBuffer = '';
this.modelInfo = null;