diff --git a/package.json b/package.json index 2f9b5ec..68cb6e8 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,7 @@ }, "license": "GPL-3.0", "dependencies": { + "@google/genai": "^1.8.0", "@google/generative-ai": "^0.24.1", "axios": "^1.10.0", "better-sqlite3": "^9.4.3", diff --git a/src/app/ApiKeyHeader.js b/src/app/ApiKeyHeader.js index e992df0..bf4ea41 100644 --- a/src/app/ApiKeyHeader.js +++ b/src/app/ApiKeyHeader.js @@ -268,6 +268,7 @@ export class ApiKeyHeader extends LitElement { this.handleAnimationEnd = this.handleAnimationEnd.bind(this); this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this); this.handleProviderChange = this.handleProviderChange.bind(this); + this.checkAndRequestPermissions = this.checkAndRequestPermissions.bind(this); } reset() { @@ -404,12 +405,20 @@ export class ApiKeyHeader extends LitElement { let isValid = false; try { const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider); - + if (isValid) { - console.log('API key valid - starting slide out animation'); - this.startSlideOutAnimation(); - this.validatedApiKey = this.apiKey.trim(); - this.validatedProvider = this.selectedProvider; + console.log('API key valid – checking system permissions…'); + const permissionResult = await this.checkAndRequestPermissions(); + + if (permissionResult.success) { + console.log('All permissions granted – starting slide-out animation'); + this.startSlideOutAnimation(); + this.validatedApiKey = this.apiKey.trim(); + this.validatedProvider = this.selectedProvider; + } else { + this.errorMessage = permissionResult.error || 'Permission setup required'; + console.log('Permission setup incomplete:', permissionResult); + } } else { this.errorMessage = 'Invalid API key - please check and try again'; console.log('API key validation failed'); @@ -488,6 +497,45 @@ export class ApiKeyHeader extends LitElement { return false; } + async checkAndRequestPermissions() { + if (!window.require) return { success: true }; + + const { ipcRenderer } = window.require('electron'); + + try { + const permissions = await ipcRenderer.invoke('check-system-permissions'); + console.log('[Permissions] Current status:', permissions); + + if (!permissions.needsSetup) return { success: true }; + + if (!permissions.microphone) { + console.log('[Permissions] Requesting microphone permission…'); + const micResult = await ipcRenderer.invoke('request-microphone-permission'); + if (!micResult.success) { + await ipcRenderer.invoke('open-system-preferences', 'microphone'); + return { + success: false, + error: 'Please grant microphone access in System Preferences', + }; + } + } + + if (!permissions.screen) { + console.log('[Permissions] Screen-recording permission needed'); + await ipcRenderer.invoke('open-system-preferences', 'screen-recording'); + return { + success: false, + error: 'Please grant screen recording access in System Preferences', + }; + } + + return { success: true }; + } catch (err) { + console.error('[Permissions] Error checking/requesting permissions:', err); + return { success: false, error: 'Failed to check permissions' }; + } + } + startSlideOutAnimation() { this.classList.add('sliding-out'); } diff --git a/src/common/services/googleGeminiClient.js b/src/common/services/googleGeminiClient.js index 9078879..877c82e 100644 --- a/src/common/services/googleGeminiClient.js +++ b/src/common/services/googleGeminiClient.js @@ -1,4 +1,5 @@ const { GoogleGenerativeAI } = require('@google/generative-ai'); +const { GoogleGenAI } = require('@google/genai'); /** * Creates and returns a Google Gemini client instance for generative AI. @@ -113,8 +114,58 @@ function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) { }; } +// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) { +// const liveClient = new GoogleGenAI({ +// vertexai: false, // Vertex AI 사용 안함 +// apiKey, +// }); + +// // 라이브 STT 세션 열기 +// const session = await liveClient.live.connect({ +// model: 'gemini-live-2.5-flash-preview', +// callbacks, +// config: { +// inputAudioTranscription: {}, // 실시간 STT 필수 +// speechConfig: { languageCode: language }, +// }, +// }); + +// return { +// sendRealtimeInput: async data => session.send({ +// audio: { data, mimeType: 'audio/pcm;rate=24000' } +// }), +// close: async () => session.close(), +// }; +// } + +async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) { + // ① 옛날 스타일 helper 재사용 + const liveClient = new GoogleGenAI({ vertexai: false, apiKey }); + + // ② 언어 코드 강제 BCP-47 변환 + const lang = language.includes('-') ? language : `${language}-US`; + + const session = await liveClient.live.connect({ + model: 'gemini-live-2.5-flash-preview', + callbacks, + config: { + inputAudioTranscription: {}, + speechConfig: { languageCode: lang }, + }, + }); + + // ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름 + return { + sendRealtimeInput: async payload => session.sendRealtimeInput(payload), + close: async () => session.close(), + }; + } + + + module.exports = { createGeminiClient, getGeminiGenerativeModel, - createGeminiChat + createGeminiChat, + connectToGeminiSession, }; \ No newline at end of file diff --git a/src/electron/windowManager.js b/src/electron/windowManager.js index e5fee72..eccb995 100644 --- a/src/electron/windowManager.js +++ b/src/electron/windowManager.js @@ -1833,8 +1833,103 @@ function setupIpcHandlers(openaiSessionRef) { header.webContents.send('request-firebase-logout'); } }); + + ipcMain.handle('check-system-permissions', async () => { + const { systemPreferences } = require('electron'); + const permissions = { + microphone: false, + screen: false, + needsSetup: false + }; + + try { + if (process.platform === 'darwin') { + // Check microphone permission on macOS + const micStatus = systemPreferences.getMediaAccessStatus('microphone'); + permissions.microphone = micStatus === 'granted'; + + try { + const sources = await desktopCapturer.getSources({ + types: ['screen'], + thumbnailSize: { width: 1, height: 1 } + }); + permissions.screen = sources && sources.length > 0; + } catch (err) { + console.log('[Permissions] Screen capture test failed:', err); + permissions.screen = false; + } + + permissions.needsSetup = !permissions.microphone || !permissions.screen; + } else { + permissions.microphone = true; + permissions.screen = true; + permissions.needsSetup = false; + } + + console.log('[Permissions] System permissions status:', permissions); + return permissions; + } catch (error) { + console.error('[Permissions] Error checking permissions:', error); + return { + microphone: false, + screen: false, + needsSetup: true, + error: error.message + }; + } + }); + + ipcMain.handle('request-microphone-permission', async () => { + if (process.platform !== 'darwin') { + return { success: true }; + } + + const { systemPreferences } = require('electron'); + try { + const status = systemPreferences.getMediaAccessStatus('microphone'); + if (status === 'granted') { + return { success: true, status: 'already-granted' }; + } + + // Req mic permission + const granted = await systemPreferences.askForMediaAccess('microphone'); + return { + success: granted, + status: granted ? 'granted' : 'denied' + }; + } catch (error) { + console.error('[Permissions] Error requesting microphone permission:', error); + return { + success: false, + error: error.message + }; + } + }); + + ipcMain.handle('open-system-preferences', async (event, section) => { + if (process.platform !== 'darwin') { + return { success: false, error: 'Not supported on this platform' }; + } + + try { + // Open System Preferences to Privacy & Security > Screen Recording + if (section === 'screen-recording') { + await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'); + } else if (section === 'microphone') { + await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone'); + } else { + await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy'); + } + return { success: true }; + } catch (error) { + console.error('[Permissions] Error opening system preferences:', error); + return { success: false, error: error.message }; + } + }); } + + let storedApiKey = null; let storedProvider = 'openai'; diff --git a/src/features/listen/liveSummaryService.js b/src/features/listen/liveSummaryService.js index 1f367e3..4ea1a1a 100644 --- a/src/features/listen/liveSummaryService.js +++ b/src/features/listen/liveSummaryService.js @@ -3,6 +3,7 @@ const { BrowserWindow, ipcMain } = require('electron'); const { spawn } = require('child_process'); const { saveDebugAudio } = require('./audioUtils.js'); const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js'); +const { connectToGeminiSession } = require('../../common/services/googleGeminiClient.js'); const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js'); const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js'); const sqliteClient = require('../../common/services/sqliteClient'); @@ -538,7 +539,6 @@ async function initializeLiveSummarySession(language = 'en') { sendToRenderer('session-initializing', true); sendToRenderer('update-status', 'Initializing sessions...'); - // Merged block const API_KEY = getApiKey(); if (!API_KEY) { console.error('FATAL ERROR: API Key is not defined.'); @@ -550,73 +550,90 @@ async function initializeLiveSummarySession(language = 'en') { initializeNewSession(); + const provider = await getAiProvider(); + const isGemini = provider === 'gemini'; + console.log(`[LiveSummaryService] Initializing STT for provider: ${provider}`); + try { const handleMyMessage = message => { - const type = message.type; - const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; - - if (type === 'conversation.item.input_audio_transcription.delta') { - if (myCompletionTimer) { - clearTimeout(myCompletionTimer); - myCompletionTimer = null; - } - - myCurrentUtterance += text; - - const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance; - - if (text && !text.includes('vq_lbr_audio_')) { - sendToRenderer('stt-update', { - speaker: 'Me', - text: continuousText, - isPartial: true, - isFinal: false, - timestamp: Date.now(), - }); - } - } else if (type === 'conversation.item.input_audio_transcription.completed') { + if (isGemini) { + // console.log('[Gemini Raw Message - Me]:', JSON.stringify(message, null, 2)); + const text = message.serverContent?.inputTranscription?.text || ''; if (text && text.trim()) { - const finalUtteranceText = text.trim(); - myCurrentUtterance = ''; - - debounceMyCompletion(finalUtteranceText); + const finalUtteranceText = text.trim().replace(//g, '').trim(); + if (finalUtteranceText && finalUtteranceText !== '.') { + debounceMyCompletion(finalUtteranceText); + } } - } else if (message.error) { + } else { + const type = message.type; + const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; + + if (type === 'conversation.item.input_audio_transcription.delta') { + if (myCompletionTimer) clearTimeout(myCompletionTimer); + myCompletionTimer = null; + myCurrentUtterance += text; + const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance; + if (text && !text.includes('vq_lbr_audio_')) { + sendToRenderer('stt-update', { + speaker: 'Me', + text: continuousText, + isPartial: true, + isFinal: false, + timestamp: Date.now(), + }); + } + } else if (type === 'conversation.item.input_audio_transcription.completed') { + if (text && text.trim()) { + const finalUtteranceText = text.trim(); + myCurrentUtterance = ''; + debounceMyCompletion(finalUtteranceText); + } + } + } + + if (message.error) { console.error('[Me] STT Session Error:', message.error); } }; const handleTheirMessage = message => { - const type = message.type; - const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; - - if (type === 'conversation.item.input_audio_transcription.delta') { - if (theirCompletionTimer) { - clearTimeout(theirCompletionTimer); - theirCompletionTimer = null; - } - - theirCurrentUtterance += text; - - const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance; - - if (text && !text.includes('vq_lbr_audio_')) { - sendToRenderer('stt-update', { - speaker: 'Them', - text: continuousText, - isPartial: true, - isFinal: false, - timestamp: Date.now(), - }); - } - } else if (type === 'conversation.item.input_audio_transcription.completed') { + if (isGemini) { + // console.log('[Gemini Raw Message - Them]:', JSON.stringify(message, null, 2)); + const text = message.serverContent?.inputTranscription?.text || ''; if (text && text.trim()) { - const finalUtteranceText = text.trim(); - theirCurrentUtterance = ''; - - debounceTheirCompletion(finalUtteranceText); + const finalUtteranceText = text.trim().replace(//g, '').trim(); + if (finalUtteranceText && finalUtteranceText !== '.') { + debounceTheirCompletion(finalUtteranceText); + } } - } else if (message.error) { + } else { + const type = message.type; + const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; + if (type === 'conversation.item.input_audio_transcription.delta') { + if (theirCompletionTimer) clearTimeout(theirCompletionTimer); + theirCompletionTimer = null; + theirCurrentUtterance += text; + const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance; + if (text && !text.includes('vq_lbr_audio_')) { + sendToRenderer('stt-update', { + speaker: 'Them', + text: continuousText, + isPartial: true, + isFinal: false, + timestamp: Date.now(), + }); + } + } else if (type === 'conversation.item.input_audio_transcription.completed') { + if (text && text.trim()) { + const finalUtteranceText = text.trim(); + theirCurrentUtterance = ''; + debounceTheirCompletion(finalUtteranceText); + } + } + } + + if (message.error) { console.error('[Them] STT Session Error:', message.error); } }; @@ -638,10 +655,17 @@ async function initializeLiveSummarySession(language = 'en') { }, }; - [mySttSession, theirSttSession] = await Promise.all([ - connectToOpenAiSession(API_KEY, mySttConfig, keyType), - connectToOpenAiSession(API_KEY, theirSttConfig, keyType), - ]); + if (isGemini) { + [mySttSession, theirSttSession] = await Promise.all([ + connectToGeminiSession(API_KEY, mySttConfig), + connectToGeminiSession(API_KEY, theirSttConfig), + ]); + } else { + [mySttSession, theirSttSession] = await Promise.all([ + connectToOpenAiSession(API_KEY, mySttConfig, keyType), + connectToOpenAiSession(API_KEY, theirSttConfig, keyType), + ]); + } console.log('✅ Both STT sessions initialized successfully.'); triggerAnalysisIfNeeded(); @@ -653,7 +677,7 @@ async function initializeLiveSummarySession(language = 'en') { sendToRenderer('update-status', 'Connected. Ready to listen.'); return true; } catch (error) { - console.error('❌ Failed to initialize OpenAI STT sessions:', error); + console.error('❌ Failed to initialize STT sessions:', error); isInitializingSession = false; sendToRenderer('session-initializing', false); sendToRenderer('update-status', 'Initialization failed.'); @@ -725,6 +749,9 @@ async function startMacOSAudioCapture() { let audioBuffer = Buffer.alloc(0); + const provider = await getAiProvider(); + const isGemini = provider === 'gemini'; + systemAudioProc.stdout.on('data', async data => { audioBuffer = Buffer.concat([audioBuffer, data]); @@ -739,10 +766,11 @@ async function startMacOSAudioCapture() { if (theirSttSession) { try { - // await theirSttSession.sendRealtimeInput({ - // audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' }, - // }); - await theirSttSession.sendRealtimeInput(base64Data); + // await theirSttSession.sendRealtimeInput(base64Data); + const payload = isGemini + ? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } } + : base64Data; + await theirSttSession.sendRealtimeInput(payload); } catch (err) { console.error('Error sending system audio:', err.message); } @@ -861,9 +889,17 @@ function setupLiveSummaryIpcHandlers() { }); ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => { + const provider = await getAiProvider(); + const isGemini = provider === 'gemini'; if (!mySttSession) return { success: false, error: 'User STT session not active' }; try { - await mySttSession.sendRealtimeInput(data); + // await mySttSession.sendRealtimeInput(data); + // provider에 맞는 형식으로 래핑 + const payload = isGemini + ? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } } + : data; // OpenAI는 base64 string 그대로 + + await mySttSession.sendRealtimeInput(payload); return { success: true }; } catch (error) { console.error('Error sending user audio:', error); diff --git a/src/features/listen/renderer.js b/src/features/listen/renderer.js index 2de3399..64f321d 100644 --- a/src/features/listen/renderer.js +++ b/src/features/listen/renderer.js @@ -233,7 +233,11 @@ class SimpleAEC { this.echoGain = 0.5; this.noiseFloor = 0.01; - console.log('🎯 Weakened AEC initialized'); + // 🔧 Adaptive-gain parameters (User-tuned, very aggressive) + this.targetErr = 0.002; + this.adaptRate = 0.1; + + console.log('🎯 AEC initialized (hyper-aggressive)'); } process(micData, systemData) { @@ -241,6 +245,19 @@ class SimpleAEC { return micData; } + for (let i = 0; i < systemData.length; i++) { + if (systemData[i] > 0.98) systemData[i] = 0.98; + else if (systemData[i] < -0.98) systemData[i] = -0.98; + + systemData[i] = Math.tanh(systemData[i] * 4); + } + + let sum2 = 0; + for (let i = 0; i < systemData.length; i++) sum2 += systemData[i] * systemData[i]; + const rms = Math.sqrt(sum2 / systemData.length); + const targetRms = 0.08; // 🔧 기준 RMS (기존 0.1) + const scale = targetRms / (rms + 1e-6); // 1e-6: 0-division 방지 + const output = new Float32Array(micData.length); const optimalDelay = this.findOptimalDelay(micData, systemData); @@ -252,23 +269,32 @@ class SimpleAEC { const delayIndex = i - optimalDelay - d; if (delayIndex >= 0 && delayIndex < systemData.length) { const weight = Math.exp(-Math.abs(d) / 1000); - echoEstimate += systemData[delayIndex] * this.echoGain * weight; + echoEstimate += systemData[delayIndex] * scale * this.echoGain * weight; } } - output[i] = micData[i] - echoEstimate * 0.5; + output[i] = micData[i] - echoEstimate * 0.9; if (Math.abs(output[i]) < this.noiseFloor) { output[i] *= 0.5; } if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) { - output[i] *= 0.5; + output[i] *= 0.25; } output[i] = Math.max(-1, Math.min(1, output[i])); } + + let errSum = 0; + for (let i = 0; i < output.length; i++) errSum += output[i] * output[i]; + const errRms = Math.sqrt(errSum / output.length); + + const err = errRms - this.targetErr; + this.echoGain += this.adaptRate * err; // 비례 제어 + this.echoGain = Math.max(0, Math.min(1, this.echoGain)); + return output; } @@ -310,7 +336,7 @@ class SimpleAEC { } } - return similarity / (2 * windowSize + 1) < 0.2; + return similarity / (2 * windowSize + 1) < 0.15; } }