enable gemini stt, solve wondowmanager conflict
This commit is contained in:
		
							parent
							
								
									413ff96966
								
							
						
					
					
						commit
						8af7aae2b5
					
				@ -29,6 +29,7 @@
 | 
			
		||||
    },
 | 
			
		||||
    "license": "GPL-3.0",
 | 
			
		||||
    "dependencies": {
 | 
			
		||||
        "@google/genai": "^1.8.0",
 | 
			
		||||
        "@google/generative-ai": "^0.24.1",
 | 
			
		||||
        "axios": "^1.10.0",
 | 
			
		||||
        "better-sqlite3": "^9.4.3",
 | 
			
		||||
 | 
			
		||||
@ -268,6 +268,7 @@ export class ApiKeyHeader extends LitElement {
 | 
			
		||||
        this.handleAnimationEnd = this.handleAnimationEnd.bind(this);
 | 
			
		||||
        this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this);
 | 
			
		||||
        this.handleProviderChange = this.handleProviderChange.bind(this);
 | 
			
		||||
        this.checkAndRequestPermissions = this.checkAndRequestPermissions.bind(this);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    reset() {
 | 
			
		||||
@ -404,12 +405,20 @@ export class ApiKeyHeader extends LitElement {
 | 
			
		||||
        let isValid = false;
 | 
			
		||||
        try {
 | 
			
		||||
            const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider);
 | 
			
		||||
 | 
			
		||||
            
 | 
			
		||||
            if (isValid) {
 | 
			
		||||
                console.log('API key valid - starting slide out animation');
 | 
			
		||||
                this.startSlideOutAnimation();
 | 
			
		||||
                this.validatedApiKey = this.apiKey.trim();
 | 
			
		||||
                this.validatedProvider = this.selectedProvider;
 | 
			
		||||
                console.log('API key valid – checking system permissions…');
 | 
			
		||||
                const permissionResult = await this.checkAndRequestPermissions();
 | 
			
		||||
 | 
			
		||||
                if (permissionResult.success) {
 | 
			
		||||
                    console.log('All permissions granted – starting slide-out animation');
 | 
			
		||||
                    this.startSlideOutAnimation();
 | 
			
		||||
                    this.validatedApiKey = this.apiKey.trim();
 | 
			
		||||
                    this.validatedProvider = this.selectedProvider;
 | 
			
		||||
                } else {
 | 
			
		||||
                    this.errorMessage = permissionResult.error || 'Permission setup required';
 | 
			
		||||
                    console.log('Permission setup incomplete:', permissionResult);
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                this.errorMessage = 'Invalid API key - please check and try again';
 | 
			
		||||
                console.log('API key validation failed');
 | 
			
		||||
@ -488,6 +497,45 @@ export class ApiKeyHeader extends LitElement {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async checkAndRequestPermissions() {
 | 
			
		||||
        if (!window.require) return { success: true };
 | 
			
		||||
    
 | 
			
		||||
        const { ipcRenderer } = window.require('electron');
 | 
			
		||||
    
 | 
			
		||||
        try {
 | 
			
		||||
            const permissions = await ipcRenderer.invoke('check-system-permissions');
 | 
			
		||||
            console.log('[Permissions] Current status:', permissions);
 | 
			
		||||
    
 | 
			
		||||
            if (!permissions.needsSetup) return { success: true };
 | 
			
		||||
    
 | 
			
		||||
            if (!permissions.microphone) {
 | 
			
		||||
                console.log('[Permissions] Requesting microphone permission…');
 | 
			
		||||
                const micResult = await ipcRenderer.invoke('request-microphone-permission');
 | 
			
		||||
                if (!micResult.success) {
 | 
			
		||||
                    await ipcRenderer.invoke('open-system-preferences', 'microphone');
 | 
			
		||||
                    return {
 | 
			
		||||
                        success: false,
 | 
			
		||||
                        error: 'Please grant microphone access in System Preferences',
 | 
			
		||||
                    };
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
    
 | 
			
		||||
            if (!permissions.screen) {
 | 
			
		||||
                console.log('[Permissions] Screen-recording permission needed');
 | 
			
		||||
                await ipcRenderer.invoke('open-system-preferences', 'screen-recording');
 | 
			
		||||
                return {
 | 
			
		||||
                    success: false,
 | 
			
		||||
                    error: 'Please grant screen recording access in System Preferences',
 | 
			
		||||
                };
 | 
			
		||||
            }
 | 
			
		||||
    
 | 
			
		||||
            return { success: true };
 | 
			
		||||
        } catch (err) {
 | 
			
		||||
            console.error('[Permissions] Error checking/requesting permissions:', err);
 | 
			
		||||
            return { success: false, error: 'Failed to check permissions' };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    startSlideOutAnimation() {
 | 
			
		||||
        this.classList.add('sliding-out');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
const { GoogleGenerativeAI } = require('@google/generative-ai');
 | 
			
		||||
const { GoogleGenAI } = require('@google/genai');
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Creates and returns a Google Gemini client instance for generative AI.
 | 
			
		||||
@ -113,8 +114,58 @@ function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) {
 | 
			
		||||
    };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
 | 
			
		||||
//         const liveClient = new GoogleGenAI({
 | 
			
		||||
//                 vertexai: false,   // Vertex AI 사용 안함
 | 
			
		||||
//                 apiKey,
 | 
			
		||||
//             });
 | 
			
		||||
        
 | 
			
		||||
//             // 라이브 STT 세션 열기
 | 
			
		||||
//             const session = await liveClient.live.connect({
 | 
			
		||||
//                 model: 'gemini-live-2.5-flash-preview',
 | 
			
		||||
//                 callbacks,
 | 
			
		||||
//                 config: {
 | 
			
		||||
//                     inputAudioTranscription: {},                 // 실시간 STT 필수
 | 
			
		||||
//                     speechConfig: { languageCode: language },
 | 
			
		||||
//                 },
 | 
			
		||||
//             });
 | 
			
		||||
 | 
			
		||||
//         return {
 | 
			
		||||
//             sendRealtimeInput: async data => session.send({
 | 
			
		||||
//                 audio: { data, mimeType: 'audio/pcm;rate=24000' }
 | 
			
		||||
//             }),
 | 
			
		||||
//             close: async () => session.close(),
 | 
			
		||||
//         };
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
 | 
			
		||||
        // ① 옛날 스타일 helper 재사용
 | 
			
		||||
        const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
 | 
			
		||||
    
 | 
			
		||||
        // ② 언어 코드 강제 BCP-47 변환
 | 
			
		||||
        const lang = language.includes('-') ? language : `${language}-US`;
 | 
			
		||||
    
 | 
			
		||||
        const session = await liveClient.live.connect({
 | 
			
		||||
            model: 'gemini-live-2.5-flash-preview',
 | 
			
		||||
            callbacks,
 | 
			
		||||
            config: {
 | 
			
		||||
                inputAudioTranscription: {},
 | 
			
		||||
                speechConfig: { languageCode: lang },
 | 
			
		||||
            },
 | 
			
		||||
        });
 | 
			
		||||
    
 | 
			
		||||
        // ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름
 | 
			
		||||
        return {
 | 
			
		||||
            sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
 | 
			
		||||
            close: async () => session.close(),
 | 
			
		||||
        };
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    createGeminiClient,
 | 
			
		||||
    getGeminiGenerativeModel,
 | 
			
		||||
    createGeminiChat
 | 
			
		||||
    createGeminiChat,
 | 
			
		||||
    connectToGeminiSession,
 | 
			
		||||
};
 | 
			
		||||
@ -1833,8 +1833,103 @@ function setupIpcHandlers(openaiSessionRef) {
 | 
			
		||||
            header.webContents.send('request-firebase-logout');
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    ipcMain.handle('check-system-permissions', async () => {
 | 
			
		||||
        const { systemPreferences } = require('electron');
 | 
			
		||||
        const permissions = {
 | 
			
		||||
            microphone: false,
 | 
			
		||||
            screen: false,
 | 
			
		||||
            needsSetup: false
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        try {
 | 
			
		||||
            if (process.platform === 'darwin') {
 | 
			
		||||
                // Check microphone permission on macOS
 | 
			
		||||
                const micStatus = systemPreferences.getMediaAccessStatus('microphone');
 | 
			
		||||
                permissions.microphone = micStatus === 'granted';
 | 
			
		||||
 | 
			
		||||
                try {
 | 
			
		||||
                    const sources = await desktopCapturer.getSources({ 
 | 
			
		||||
                        types: ['screen'], 
 | 
			
		||||
                        thumbnailSize: { width: 1, height: 1 } 
 | 
			
		||||
                    });
 | 
			
		||||
                    permissions.screen = sources && sources.length > 0;
 | 
			
		||||
                } catch (err) {
 | 
			
		||||
                    console.log('[Permissions] Screen capture test failed:', err);
 | 
			
		||||
                    permissions.screen = false;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                permissions.needsSetup = !permissions.microphone || !permissions.screen;
 | 
			
		||||
            } else {
 | 
			
		||||
                permissions.microphone = true;
 | 
			
		||||
                permissions.screen = true;
 | 
			
		||||
                permissions.needsSetup = false;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            console.log('[Permissions] System permissions status:', permissions);
 | 
			
		||||
            return permissions;
 | 
			
		||||
        } catch (error) {
 | 
			
		||||
            console.error('[Permissions] Error checking permissions:', error);
 | 
			
		||||
            return {
 | 
			
		||||
                microphone: false,
 | 
			
		||||
                screen: false,
 | 
			
		||||
                needsSetup: true,
 | 
			
		||||
                error: error.message
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    ipcMain.handle('request-microphone-permission', async () => {
 | 
			
		||||
        if (process.platform !== 'darwin') {
 | 
			
		||||
            return { success: true };
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const { systemPreferences } = require('electron');
 | 
			
		||||
        try {
 | 
			
		||||
            const status = systemPreferences.getMediaAccessStatus('microphone');
 | 
			
		||||
            if (status === 'granted') {
 | 
			
		||||
                return { success: true, status: 'already-granted' };
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Req mic permission
 | 
			
		||||
            const granted = await systemPreferences.askForMediaAccess('microphone');
 | 
			
		||||
            return { 
 | 
			
		||||
                success: granted, 
 | 
			
		||||
                status: granted ? 'granted' : 'denied' 
 | 
			
		||||
            };
 | 
			
		||||
        } catch (error) {
 | 
			
		||||
            console.error('[Permissions] Error requesting microphone permission:', error);
 | 
			
		||||
            return { 
 | 
			
		||||
                success: false, 
 | 
			
		||||
                error: error.message 
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    ipcMain.handle('open-system-preferences', async (event, section) => {
 | 
			
		||||
        if (process.platform !== 'darwin') {
 | 
			
		||||
            return { success: false, error: 'Not supported on this platform' };
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        try {
 | 
			
		||||
            // Open System Preferences to Privacy & Security > Screen Recording
 | 
			
		||||
            if (section === 'screen-recording') {
 | 
			
		||||
                await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
 | 
			
		||||
            } else if (section === 'microphone') {
 | 
			
		||||
                await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone');
 | 
			
		||||
            } else {
 | 
			
		||||
                await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy');
 | 
			
		||||
            }
 | 
			
		||||
            return { success: true };
 | 
			
		||||
        } catch (error) {
 | 
			
		||||
            console.error('[Permissions] Error opening system preferences:', error);
 | 
			
		||||
            return { success: false, error: error.message };
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
let storedApiKey = null;
 | 
			
		||||
let storedProvider = 'openai';
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -3,6 +3,7 @@ const { BrowserWindow, ipcMain } = require('electron');
 | 
			
		||||
const { spawn } = require('child_process');
 | 
			
		||||
const { saveDebugAudio } = require('./audioUtils.js');
 | 
			
		||||
const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js');
 | 
			
		||||
const { connectToGeminiSession } = require('../../common/services/googleGeminiClient.js');
 | 
			
		||||
const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js');
 | 
			
		||||
const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
 | 
			
		||||
const sqliteClient = require('../../common/services/sqliteClient');
 | 
			
		||||
@ -538,7 +539,6 @@ async function initializeLiveSummarySession(language = 'en') {
 | 
			
		||||
    sendToRenderer('session-initializing', true);
 | 
			
		||||
    sendToRenderer('update-status', 'Initializing sessions...');
 | 
			
		||||
 | 
			
		||||
    // Merged block
 | 
			
		||||
    const API_KEY = getApiKey();
 | 
			
		||||
    if (!API_KEY) {
 | 
			
		||||
        console.error('FATAL ERROR: API Key is not defined.');
 | 
			
		||||
@ -550,73 +550,90 @@ async function initializeLiveSummarySession(language = 'en') {
 | 
			
		||||
 | 
			
		||||
    initializeNewSession();
 | 
			
		||||
 | 
			
		||||
    const provider = await getAiProvider();
 | 
			
		||||
    const isGemini  = provider === 'gemini';
 | 
			
		||||
    console.log(`[LiveSummaryService] Initializing STT for provider: ${provider}`);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
        const handleMyMessage = message => {
 | 
			
		||||
            const type = message.type;
 | 
			
		||||
            const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
 | 
			
		||||
 | 
			
		||||
            if (type === 'conversation.item.input_audio_transcription.delta') {
 | 
			
		||||
                if (myCompletionTimer) {
 | 
			
		||||
                    clearTimeout(myCompletionTimer);
 | 
			
		||||
                    myCompletionTimer = null;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                myCurrentUtterance += text;
 | 
			
		||||
 | 
			
		||||
                const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
 | 
			
		||||
 | 
			
		||||
                if (text && !text.includes('vq_lbr_audio_')) {
 | 
			
		||||
                    sendToRenderer('stt-update', {
 | 
			
		||||
                        speaker: 'Me',
 | 
			
		||||
                        text: continuousText,
 | 
			
		||||
                        isPartial: true,
 | 
			
		||||
                        isFinal: false,
 | 
			
		||||
                        timestamp: Date.now(),
 | 
			
		||||
                    });
 | 
			
		||||
                }
 | 
			
		||||
            } else if (type === 'conversation.item.input_audio_transcription.completed') {
 | 
			
		||||
            if (isGemini) {
 | 
			
		||||
                // console.log('[Gemini Raw Message - Me]:', JSON.stringify(message, null, 2));
 | 
			
		||||
                const text = message.serverContent?.inputTranscription?.text || '';
 | 
			
		||||
                if (text && text.trim()) {
 | 
			
		||||
                    const finalUtteranceText = text.trim();
 | 
			
		||||
                    myCurrentUtterance = '';
 | 
			
		||||
 | 
			
		||||
                    debounceMyCompletion(finalUtteranceText);
 | 
			
		||||
                    const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
 | 
			
		||||
                    if (finalUtteranceText && finalUtteranceText !== '.') {
 | 
			
		||||
                        debounceMyCompletion(finalUtteranceText);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            } else if (message.error) {
 | 
			
		||||
            } else {
 | 
			
		||||
                const type = message.type;
 | 
			
		||||
                const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
 | 
			
		||||
 | 
			
		||||
                if (type === 'conversation.item.input_audio_transcription.delta') {
 | 
			
		||||
                    if (myCompletionTimer) clearTimeout(myCompletionTimer);
 | 
			
		||||
                    myCompletionTimer = null;
 | 
			
		||||
                    myCurrentUtterance += text;
 | 
			
		||||
                    const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
 | 
			
		||||
                    if (text && !text.includes('vq_lbr_audio_')) {
 | 
			
		||||
                        sendToRenderer('stt-update', {
 | 
			
		||||
                            speaker: 'Me',
 | 
			
		||||
                            text: continuousText,
 | 
			
		||||
                            isPartial: true,
 | 
			
		||||
                            isFinal: false,
 | 
			
		||||
                            timestamp: Date.now(),
 | 
			
		||||
                        });
 | 
			
		||||
                    }
 | 
			
		||||
                } else if (type === 'conversation.item.input_audio_transcription.completed') {
 | 
			
		||||
                    if (text && text.trim()) {
 | 
			
		||||
                        const finalUtteranceText = text.trim();
 | 
			
		||||
                        myCurrentUtterance = '';
 | 
			
		||||
                        debounceMyCompletion(finalUtteranceText);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (message.error) {
 | 
			
		||||
                console.error('[Me] STT Session Error:', message.error);
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        const handleTheirMessage = message => {
 | 
			
		||||
            const type = message.type;
 | 
			
		||||
            const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
 | 
			
		||||
 | 
			
		||||
            if (type === 'conversation.item.input_audio_transcription.delta') {
 | 
			
		||||
                if (theirCompletionTimer) {
 | 
			
		||||
                    clearTimeout(theirCompletionTimer);
 | 
			
		||||
                    theirCompletionTimer = null;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                theirCurrentUtterance += text;
 | 
			
		||||
 | 
			
		||||
                const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
 | 
			
		||||
 | 
			
		||||
                if (text && !text.includes('vq_lbr_audio_')) {
 | 
			
		||||
                    sendToRenderer('stt-update', {
 | 
			
		||||
                        speaker: 'Them',
 | 
			
		||||
                        text: continuousText,
 | 
			
		||||
                        isPartial: true,
 | 
			
		||||
                        isFinal: false,
 | 
			
		||||
                        timestamp: Date.now(),
 | 
			
		||||
                    });
 | 
			
		||||
                }
 | 
			
		||||
            } else if (type === 'conversation.item.input_audio_transcription.completed') {
 | 
			
		||||
            if (isGemini) {
 | 
			
		||||
                // console.log('[Gemini Raw Message - Them]:', JSON.stringify(message, null, 2));
 | 
			
		||||
                const text = message.serverContent?.inputTranscription?.text || '';
 | 
			
		||||
                if (text && text.trim()) {
 | 
			
		||||
                    const finalUtteranceText = text.trim();
 | 
			
		||||
                    theirCurrentUtterance = '';
 | 
			
		||||
 | 
			
		||||
                    debounceTheirCompletion(finalUtteranceText);
 | 
			
		||||
                    const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
 | 
			
		||||
                    if (finalUtteranceText && finalUtteranceText !== '.') {
 | 
			
		||||
                        debounceTheirCompletion(finalUtteranceText);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            } else if (message.error) {
 | 
			
		||||
            } else {
 | 
			
		||||
                const type = message.type;
 | 
			
		||||
                const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
 | 
			
		||||
                if (type === 'conversation.item.input_audio_transcription.delta') {
 | 
			
		||||
                    if (theirCompletionTimer) clearTimeout(theirCompletionTimer);
 | 
			
		||||
                    theirCompletionTimer = null;
 | 
			
		||||
                    theirCurrentUtterance += text;
 | 
			
		||||
                    const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
 | 
			
		||||
                    if (text && !text.includes('vq_lbr_audio_')) {
 | 
			
		||||
                        sendToRenderer('stt-update', {
 | 
			
		||||
                            speaker: 'Them',
 | 
			
		||||
                            text: continuousText,
 | 
			
		||||
                            isPartial: true,
 | 
			
		||||
                            isFinal: false,
 | 
			
		||||
                            timestamp: Date.now(),
 | 
			
		||||
                        });
 | 
			
		||||
                    }
 | 
			
		||||
                } else if (type === 'conversation.item.input_audio_transcription.completed') {
 | 
			
		||||
                    if (text && text.trim()) {
 | 
			
		||||
                        const finalUtteranceText = text.trim();
 | 
			
		||||
                        theirCurrentUtterance = '';
 | 
			
		||||
                        debounceTheirCompletion(finalUtteranceText);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            
 | 
			
		||||
            if (message.error) {
 | 
			
		||||
                console.error('[Them] STT Session Error:', message.error);
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
@ -638,10 +655,17 @@ async function initializeLiveSummarySession(language = 'en') {
 | 
			
		||||
            },
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        [mySttSession, theirSttSession] = await Promise.all([
 | 
			
		||||
            connectToOpenAiSession(API_KEY, mySttConfig, keyType),
 | 
			
		||||
            connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
 | 
			
		||||
        ]);
 | 
			
		||||
        if (isGemini) {
 | 
			
		||||
            [mySttSession, theirSttSession] = await Promise.all([
 | 
			
		||||
                connectToGeminiSession(API_KEY, mySttConfig),
 | 
			
		||||
                connectToGeminiSession(API_KEY, theirSttConfig),
 | 
			
		||||
            ]);
 | 
			
		||||
        } else {
 | 
			
		||||
            [mySttSession, theirSttSession] = await Promise.all([
 | 
			
		||||
                connectToOpenAiSession(API_KEY, mySttConfig, keyType),
 | 
			
		||||
                connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
 | 
			
		||||
            ]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        console.log('✅ Both STT sessions initialized successfully.');
 | 
			
		||||
        triggerAnalysisIfNeeded();
 | 
			
		||||
@ -653,7 +677,7 @@ async function initializeLiveSummarySession(language = 'en') {
 | 
			
		||||
        sendToRenderer('update-status', 'Connected. Ready to listen.');
 | 
			
		||||
        return true;
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
        console.error('❌ Failed to initialize OpenAI STT sessions:', error);
 | 
			
		||||
        console.error('❌ Failed to initialize STT sessions:', error);
 | 
			
		||||
        isInitializingSession = false;
 | 
			
		||||
        sendToRenderer('session-initializing', false);
 | 
			
		||||
        sendToRenderer('update-status', 'Initialization failed.');
 | 
			
		||||
@ -725,6 +749,9 @@ async function startMacOSAudioCapture() {
 | 
			
		||||
 | 
			
		||||
    let audioBuffer = Buffer.alloc(0);
 | 
			
		||||
 | 
			
		||||
    const provider = await getAiProvider();
 | 
			
		||||
    const isGemini  = provider === 'gemini';
 | 
			
		||||
 | 
			
		||||
    systemAudioProc.stdout.on('data', async data => {
 | 
			
		||||
        audioBuffer = Buffer.concat([audioBuffer, data]);
 | 
			
		||||
 | 
			
		||||
@ -739,10 +766,11 @@ async function startMacOSAudioCapture() {
 | 
			
		||||
 | 
			
		||||
            if (theirSttSession) {
 | 
			
		||||
                try {
 | 
			
		||||
                    // await theirSttSession.sendRealtimeInput({
 | 
			
		||||
                    //     audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' },
 | 
			
		||||
                    // });
 | 
			
		||||
                    await theirSttSession.sendRealtimeInput(base64Data);
 | 
			
		||||
                    // await theirSttSession.sendRealtimeInput(base64Data);
 | 
			
		||||
                    const payload = isGemini
 | 
			
		||||
                        ? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
 | 
			
		||||
                        : base64Data;
 | 
			
		||||
                    await theirSttSession.sendRealtimeInput(payload);
 | 
			
		||||
                } catch (err) {
 | 
			
		||||
                    console.error('Error sending system audio:', err.message);
 | 
			
		||||
                }
 | 
			
		||||
@ -861,9 +889,17 @@ function setupLiveSummaryIpcHandlers() {
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => {
 | 
			
		||||
    const provider = await getAiProvider();
 | 
			
		||||
    const isGemini  = provider === 'gemini';
 | 
			
		||||
        if (!mySttSession) return { success: false, error: 'User STT session not active' };
 | 
			
		||||
        try {
 | 
			
		||||
            await mySttSession.sendRealtimeInput(data);
 | 
			
		||||
            // await mySttSession.sendRealtimeInput(data);
 | 
			
		||||
                   // provider에 맞는 형식으로 래핑
 | 
			
		||||
       const payload = isGemini
 | 
			
		||||
           ? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
 | 
			
		||||
           : data;   // OpenAI는 base64 string 그대로
 | 
			
		||||
 | 
			
		||||
       await mySttSession.sendRealtimeInput(payload);
 | 
			
		||||
            return { success: true };
 | 
			
		||||
        } catch (error) {
 | 
			
		||||
            console.error('Error sending user audio:', error);
 | 
			
		||||
 | 
			
		||||
@ -233,7 +233,11 @@ class SimpleAEC {
 | 
			
		||||
        this.echoGain = 0.5;
 | 
			
		||||
        this.noiseFloor = 0.01;
 | 
			
		||||
 | 
			
		||||
        console.log('🎯 Weakened AEC initialized');
 | 
			
		||||
        // 🔧 Adaptive-gain parameters (User-tuned, very aggressive)
 | 
			
		||||
        this.targetErr = 0.002;
 | 
			
		||||
        this.adaptRate  = 0.1;
 | 
			
		||||
 | 
			
		||||
        console.log('🎯 AEC initialized (hyper-aggressive)');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    process(micData, systemData) {
 | 
			
		||||
@ -241,6 +245,19 @@ class SimpleAEC {
 | 
			
		||||
            return micData;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (let i = 0; i < systemData.length; i++) {
 | 
			
		||||
            if (systemData[i] > 0.98) systemData[i] = 0.98;
 | 
			
		||||
            else if (systemData[i] < -0.98) systemData[i] = -0.98;
 | 
			
		||||
 | 
			
		||||
            systemData[i] = Math.tanh(systemData[i] * 4);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let sum2 = 0;
 | 
			
		||||
        for (let i = 0; i < systemData.length; i++) sum2 += systemData[i] * systemData[i];
 | 
			
		||||
        const rms = Math.sqrt(sum2 / systemData.length);
 | 
			
		||||
        const targetRms = 0.08;                   // 🔧 기준 RMS (기존 0.1)
 | 
			
		||||
        const scale = targetRms / (rms + 1e-6);   // 1e-6: 0-division 방지
 | 
			
		||||
 | 
			
		||||
        const output = new Float32Array(micData.length);
 | 
			
		||||
 | 
			
		||||
        const optimalDelay = this.findOptimalDelay(micData, systemData);
 | 
			
		||||
@ -252,23 +269,32 @@ class SimpleAEC {
 | 
			
		||||
                const delayIndex = i - optimalDelay - d;
 | 
			
		||||
                if (delayIndex >= 0 && delayIndex < systemData.length) {
 | 
			
		||||
                    const weight = Math.exp(-Math.abs(d) / 1000);
 | 
			
		||||
                    echoEstimate += systemData[delayIndex] * this.echoGain * weight;
 | 
			
		||||
                    echoEstimate += systemData[delayIndex] * scale * this.echoGain * weight;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            output[i] = micData[i] - echoEstimate * 0.5;
 | 
			
		||||
            output[i] = micData[i] - echoEstimate * 0.9;
 | 
			
		||||
 | 
			
		||||
            if (Math.abs(output[i]) < this.noiseFloor) {
 | 
			
		||||
                output[i] *= 0.5;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) {
 | 
			
		||||
                output[i] *= 0.5;
 | 
			
		||||
                output[i] *= 0.25;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            output[i] = Math.max(-1, Math.min(1, output[i]));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        let errSum = 0;
 | 
			
		||||
        for (let i = 0; i < output.length; i++) errSum += output[i] * output[i];
 | 
			
		||||
        const errRms = Math.sqrt(errSum / output.length);
 | 
			
		||||
 | 
			
		||||
        const err = errRms - this.targetErr;
 | 
			
		||||
        this.echoGain += this.adaptRate * err;      // 비례 제어
 | 
			
		||||
        this.echoGain  = Math.max(0, Math.min(1, this.echoGain));
 | 
			
		||||
 | 
			
		||||
        return output;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -310,7 +336,7 @@ class SimpleAEC {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return similarity / (2 * windowSize + 1) < 0.2;
 | 
			
		||||
        return similarity / (2 * windowSize + 1) < 0.15;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user