enable gemini stt, solve wondowmanager conflict

This commit is contained in:
sanio 2025-07-05 22:18:19 +09:00
parent 413ff96966
commit 8af7aae2b5
6 changed files with 335 additions and 78 deletions

View File

@ -29,6 +29,7 @@
}, },
"license": "GPL-3.0", "license": "GPL-3.0",
"dependencies": { "dependencies": {
"@google/genai": "^1.8.0",
"@google/generative-ai": "^0.24.1", "@google/generative-ai": "^0.24.1",
"axios": "^1.10.0", "axios": "^1.10.0",
"better-sqlite3": "^9.4.3", "better-sqlite3": "^9.4.3",

View File

@ -268,6 +268,7 @@ export class ApiKeyHeader extends LitElement {
this.handleAnimationEnd = this.handleAnimationEnd.bind(this); this.handleAnimationEnd = this.handleAnimationEnd.bind(this);
this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this); this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this);
this.handleProviderChange = this.handleProviderChange.bind(this); this.handleProviderChange = this.handleProviderChange.bind(this);
this.checkAndRequestPermissions = this.checkAndRequestPermissions.bind(this);
} }
reset() { reset() {
@ -404,12 +405,20 @@ export class ApiKeyHeader extends LitElement {
let isValid = false; let isValid = false;
try { try {
const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider); const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider);
if (isValid) { if (isValid) {
console.log('API key valid - starting slide out animation'); console.log('API key valid checking system permissions…');
this.startSlideOutAnimation(); const permissionResult = await this.checkAndRequestPermissions();
this.validatedApiKey = this.apiKey.trim();
this.validatedProvider = this.selectedProvider; if (permissionResult.success) {
console.log('All permissions granted starting slide-out animation');
this.startSlideOutAnimation();
this.validatedApiKey = this.apiKey.trim();
this.validatedProvider = this.selectedProvider;
} else {
this.errorMessage = permissionResult.error || 'Permission setup required';
console.log('Permission setup incomplete:', permissionResult);
}
} else { } else {
this.errorMessage = 'Invalid API key - please check and try again'; this.errorMessage = 'Invalid API key - please check and try again';
console.log('API key validation failed'); console.log('API key validation failed');
@ -488,6 +497,45 @@ export class ApiKeyHeader extends LitElement {
return false; return false;
} }
async checkAndRequestPermissions() {
if (!window.require) return { success: true };
const { ipcRenderer } = window.require('electron');
try {
const permissions = await ipcRenderer.invoke('check-system-permissions');
console.log('[Permissions] Current status:', permissions);
if (!permissions.needsSetup) return { success: true };
if (!permissions.microphone) {
console.log('[Permissions] Requesting microphone permission…');
const micResult = await ipcRenderer.invoke('request-microphone-permission');
if (!micResult.success) {
await ipcRenderer.invoke('open-system-preferences', 'microphone');
return {
success: false,
error: 'Please grant microphone access in System Preferences',
};
}
}
if (!permissions.screen) {
console.log('[Permissions] Screen-recording permission needed');
await ipcRenderer.invoke('open-system-preferences', 'screen-recording');
return {
success: false,
error: 'Please grant screen recording access in System Preferences',
};
}
return { success: true };
} catch (err) {
console.error('[Permissions] Error checking/requesting permissions:', err);
return { success: false, error: 'Failed to check permissions' };
}
}
startSlideOutAnimation() { startSlideOutAnimation() {
this.classList.add('sliding-out'); this.classList.add('sliding-out');
} }

View File

@ -1,4 +1,5 @@
const { GoogleGenerativeAI } = require('@google/generative-ai'); const { GoogleGenerativeAI } = require('@google/generative-ai');
const { GoogleGenAI } = require('@google/genai');
/** /**
* Creates and returns a Google Gemini client instance for generative AI. * Creates and returns a Google Gemini client instance for generative AI.
@ -113,8 +114,58 @@ function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) {
}; };
} }
// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
// const liveClient = new GoogleGenAI({
// vertexai: false, // Vertex AI 사용 안함
// apiKey,
// });
// // 라이브 STT 세션 열기
// const session = await liveClient.live.connect({
// model: 'gemini-live-2.5-flash-preview',
// callbacks,
// config: {
// inputAudioTranscription: {}, // 실시간 STT 필수
// speechConfig: { languageCode: language },
// },
// });
// return {
// sendRealtimeInput: async data => session.send({
// audio: { data, mimeType: 'audio/pcm;rate=24000' }
// }),
// close: async () => session.close(),
// };
// }
async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
// ① 옛날 스타일 helper 재사용
const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
// ② 언어 코드 강제 BCP-47 변환
const lang = language.includes('-') ? language : `${language}-US`;
const session = await liveClient.live.connect({
model: 'gemini-live-2.5-flash-preview',
callbacks,
config: {
inputAudioTranscription: {},
speechConfig: { languageCode: lang },
},
});
// ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름
return {
sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
close: async () => session.close(),
};
}
module.exports = { module.exports = {
createGeminiClient, createGeminiClient,
getGeminiGenerativeModel, getGeminiGenerativeModel,
createGeminiChat createGeminiChat,
connectToGeminiSession,
}; };

View File

@ -1833,8 +1833,103 @@ function setupIpcHandlers(openaiSessionRef) {
header.webContents.send('request-firebase-logout'); header.webContents.send('request-firebase-logout');
} }
}); });
ipcMain.handle('check-system-permissions', async () => {
const { systemPreferences } = require('electron');
const permissions = {
microphone: false,
screen: false,
needsSetup: false
};
try {
if (process.platform === 'darwin') {
// Check microphone permission on macOS
const micStatus = systemPreferences.getMediaAccessStatus('microphone');
permissions.microphone = micStatus === 'granted';
try {
const sources = await desktopCapturer.getSources({
types: ['screen'],
thumbnailSize: { width: 1, height: 1 }
});
permissions.screen = sources && sources.length > 0;
} catch (err) {
console.log('[Permissions] Screen capture test failed:', err);
permissions.screen = false;
}
permissions.needsSetup = !permissions.microphone || !permissions.screen;
} else {
permissions.microphone = true;
permissions.screen = true;
permissions.needsSetup = false;
}
console.log('[Permissions] System permissions status:', permissions);
return permissions;
} catch (error) {
console.error('[Permissions] Error checking permissions:', error);
return {
microphone: false,
screen: false,
needsSetup: true,
error: error.message
};
}
});
ipcMain.handle('request-microphone-permission', async () => {
if (process.platform !== 'darwin') {
return { success: true };
}
const { systemPreferences } = require('electron');
try {
const status = systemPreferences.getMediaAccessStatus('microphone');
if (status === 'granted') {
return { success: true, status: 'already-granted' };
}
// Req mic permission
const granted = await systemPreferences.askForMediaAccess('microphone');
return {
success: granted,
status: granted ? 'granted' : 'denied'
};
} catch (error) {
console.error('[Permissions] Error requesting microphone permission:', error);
return {
success: false,
error: error.message
};
}
});
ipcMain.handle('open-system-preferences', async (event, section) => {
if (process.platform !== 'darwin') {
return { success: false, error: 'Not supported on this platform' };
}
try {
// Open System Preferences to Privacy & Security > Screen Recording
if (section === 'screen-recording') {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
} else if (section === 'microphone') {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone');
} else {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy');
}
return { success: true };
} catch (error) {
console.error('[Permissions] Error opening system preferences:', error);
return { success: false, error: error.message };
}
});
} }
let storedApiKey = null; let storedApiKey = null;
let storedProvider = 'openai'; let storedProvider = 'openai';

View File

@ -3,6 +3,7 @@ const { BrowserWindow, ipcMain } = require('electron');
const { spawn } = require('child_process'); const { spawn } = require('child_process');
const { saveDebugAudio } = require('./audioUtils.js'); const { saveDebugAudio } = require('./audioUtils.js');
const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js'); const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js');
const { connectToGeminiSession } = require('../../common/services/googleGeminiClient.js');
const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js'); const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js');
const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js'); const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
const sqliteClient = require('../../common/services/sqliteClient'); const sqliteClient = require('../../common/services/sqliteClient');
@ -538,7 +539,6 @@ async function initializeLiveSummarySession(language = 'en') {
sendToRenderer('session-initializing', true); sendToRenderer('session-initializing', true);
sendToRenderer('update-status', 'Initializing sessions...'); sendToRenderer('update-status', 'Initializing sessions...');
// Merged block
const API_KEY = getApiKey(); const API_KEY = getApiKey();
if (!API_KEY) { if (!API_KEY) {
console.error('FATAL ERROR: API Key is not defined.'); console.error('FATAL ERROR: API Key is not defined.');
@ -550,73 +550,90 @@ async function initializeLiveSummarySession(language = 'en') {
initializeNewSession(); initializeNewSession();
const provider = await getAiProvider();
const isGemini = provider === 'gemini';
console.log(`[LiveSummaryService] Initializing STT for provider: ${provider}`);
try { try {
const handleMyMessage = message => { const handleMyMessage = message => {
const type = message.type; if (isGemini) {
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; // console.log('[Gemini Raw Message - Me]:', JSON.stringify(message, null, 2));
const text = message.serverContent?.inputTranscription?.text || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (myCompletionTimer) {
clearTimeout(myCompletionTimer);
myCompletionTimer = null;
}
myCurrentUtterance += text;
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Me',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (text && text.trim()) { if (text && text.trim()) {
const finalUtteranceText = text.trim(); const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
myCurrentUtterance = ''; if (finalUtteranceText && finalUtteranceText !== '.') {
debounceMyCompletion(finalUtteranceText);
debounceMyCompletion(finalUtteranceText); }
} }
} else if (message.error) { } else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (myCompletionTimer) clearTimeout(myCompletionTimer);
myCompletionTimer = null;
myCurrentUtterance += text;
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Me',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (text && text.trim()) {
const finalUtteranceText = text.trim();
myCurrentUtterance = '';
debounceMyCompletion(finalUtteranceText);
}
}
}
if (message.error) {
console.error('[Me] STT Session Error:', message.error); console.error('[Me] STT Session Error:', message.error);
} }
}; };
const handleTheirMessage = message => { const handleTheirMessage = message => {
const type = message.type; if (isGemini) {
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; // console.log('[Gemini Raw Message - Them]:', JSON.stringify(message, null, 2));
const text = message.serverContent?.inputTranscription?.text || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (theirCompletionTimer) {
clearTimeout(theirCompletionTimer);
theirCompletionTimer = null;
}
theirCurrentUtterance += text;
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Them',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (text && text.trim()) { if (text && text.trim()) {
const finalUtteranceText = text.trim(); const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
theirCurrentUtterance = ''; if (finalUtteranceText && finalUtteranceText !== '.') {
debounceTheirCompletion(finalUtteranceText);
debounceTheirCompletion(finalUtteranceText); }
} }
} else if (message.error) { } else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (theirCompletionTimer) clearTimeout(theirCompletionTimer);
theirCompletionTimer = null;
theirCurrentUtterance += text;
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Them',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (text && text.trim()) {
const finalUtteranceText = text.trim();
theirCurrentUtterance = '';
debounceTheirCompletion(finalUtteranceText);
}
}
}
if (message.error) {
console.error('[Them] STT Session Error:', message.error); console.error('[Them] STT Session Error:', message.error);
} }
}; };
@ -638,10 +655,17 @@ async function initializeLiveSummarySession(language = 'en') {
}, },
}; };
[mySttSession, theirSttSession] = await Promise.all([ if (isGemini) {
connectToOpenAiSession(API_KEY, mySttConfig, keyType), [mySttSession, theirSttSession] = await Promise.all([
connectToOpenAiSession(API_KEY, theirSttConfig, keyType), connectToGeminiSession(API_KEY, mySttConfig),
]); connectToGeminiSession(API_KEY, theirSttConfig),
]);
} else {
[mySttSession, theirSttSession] = await Promise.all([
connectToOpenAiSession(API_KEY, mySttConfig, keyType),
connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
]);
}
console.log('✅ Both STT sessions initialized successfully.'); console.log('✅ Both STT sessions initialized successfully.');
triggerAnalysisIfNeeded(); triggerAnalysisIfNeeded();
@ -653,7 +677,7 @@ async function initializeLiveSummarySession(language = 'en') {
sendToRenderer('update-status', 'Connected. Ready to listen.'); sendToRenderer('update-status', 'Connected. Ready to listen.');
return true; return true;
} catch (error) { } catch (error) {
console.error('❌ Failed to initialize OpenAI STT sessions:', error); console.error('❌ Failed to initialize STT sessions:', error);
isInitializingSession = false; isInitializingSession = false;
sendToRenderer('session-initializing', false); sendToRenderer('session-initializing', false);
sendToRenderer('update-status', 'Initialization failed.'); sendToRenderer('update-status', 'Initialization failed.');
@ -725,6 +749,9 @@ async function startMacOSAudioCapture() {
let audioBuffer = Buffer.alloc(0); let audioBuffer = Buffer.alloc(0);
const provider = await getAiProvider();
const isGemini = provider === 'gemini';
systemAudioProc.stdout.on('data', async data => { systemAudioProc.stdout.on('data', async data => {
audioBuffer = Buffer.concat([audioBuffer, data]); audioBuffer = Buffer.concat([audioBuffer, data]);
@ -739,10 +766,11 @@ async function startMacOSAudioCapture() {
if (theirSttSession) { if (theirSttSession) {
try { try {
// await theirSttSession.sendRealtimeInput({ // await theirSttSession.sendRealtimeInput(base64Data);
// audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' }, const payload = isGemini
// }); ? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
await theirSttSession.sendRealtimeInput(base64Data); : base64Data;
await theirSttSession.sendRealtimeInput(payload);
} catch (err) { } catch (err) {
console.error('Error sending system audio:', err.message); console.error('Error sending system audio:', err.message);
} }
@ -861,9 +889,17 @@ function setupLiveSummaryIpcHandlers() {
}); });
ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => { ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => {
const provider = await getAiProvider();
const isGemini = provider === 'gemini';
if (!mySttSession) return { success: false, error: 'User STT session not active' }; if (!mySttSession) return { success: false, error: 'User STT session not active' };
try { try {
await mySttSession.sendRealtimeInput(data); // await mySttSession.sendRealtimeInput(data);
// provider에 맞는 형식으로 래핑
const payload = isGemini
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
: data; // OpenAI는 base64 string 그대로
await mySttSession.sendRealtimeInput(payload);
return { success: true }; return { success: true };
} catch (error) { } catch (error) {
console.error('Error sending user audio:', error); console.error('Error sending user audio:', error);

View File

@ -233,7 +233,11 @@ class SimpleAEC {
this.echoGain = 0.5; this.echoGain = 0.5;
this.noiseFloor = 0.01; this.noiseFloor = 0.01;
console.log('🎯 Weakened AEC initialized'); // 🔧 Adaptive-gain parameters (User-tuned, very aggressive)
this.targetErr = 0.002;
this.adaptRate = 0.1;
console.log('🎯 AEC initialized (hyper-aggressive)');
} }
process(micData, systemData) { process(micData, systemData) {
@ -241,6 +245,19 @@ class SimpleAEC {
return micData; return micData;
} }
for (let i = 0; i < systemData.length; i++) {
if (systemData[i] > 0.98) systemData[i] = 0.98;
else if (systemData[i] < -0.98) systemData[i] = -0.98;
systemData[i] = Math.tanh(systemData[i] * 4);
}
let sum2 = 0;
for (let i = 0; i < systemData.length; i++) sum2 += systemData[i] * systemData[i];
const rms = Math.sqrt(sum2 / systemData.length);
const targetRms = 0.08; // 🔧 기준 RMS (기존 0.1)
const scale = targetRms / (rms + 1e-6); // 1e-6: 0-division 방지
const output = new Float32Array(micData.length); const output = new Float32Array(micData.length);
const optimalDelay = this.findOptimalDelay(micData, systemData); const optimalDelay = this.findOptimalDelay(micData, systemData);
@ -252,23 +269,32 @@ class SimpleAEC {
const delayIndex = i - optimalDelay - d; const delayIndex = i - optimalDelay - d;
if (delayIndex >= 0 && delayIndex < systemData.length) { if (delayIndex >= 0 && delayIndex < systemData.length) {
const weight = Math.exp(-Math.abs(d) / 1000); const weight = Math.exp(-Math.abs(d) / 1000);
echoEstimate += systemData[delayIndex] * this.echoGain * weight; echoEstimate += systemData[delayIndex] * scale * this.echoGain * weight;
} }
} }
output[i] = micData[i] - echoEstimate * 0.5; output[i] = micData[i] - echoEstimate * 0.9;
if (Math.abs(output[i]) < this.noiseFloor) { if (Math.abs(output[i]) < this.noiseFloor) {
output[i] *= 0.5; output[i] *= 0.5;
} }
if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) { if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) {
output[i] *= 0.5; output[i] *= 0.25;
} }
output[i] = Math.max(-1, Math.min(1, output[i])); output[i] = Math.max(-1, Math.min(1, output[i]));
} }
let errSum = 0;
for (let i = 0; i < output.length; i++) errSum += output[i] * output[i];
const errRms = Math.sqrt(errSum / output.length);
const err = errRms - this.targetErr;
this.echoGain += this.adaptRate * err; // 비례 제어
this.echoGain = Math.max(0, Math.min(1, this.echoGain));
return output; return output;
} }
@ -310,7 +336,7 @@ class SimpleAEC {
} }
} }
return similarity / (2 * windowSize + 1) < 0.2; return similarity / (2 * windowSize + 1) < 0.15;
} }
} }