enable gemini stt, solve wondowmanager conflict
This commit is contained in:
parent
413ff96966
commit
8af7aae2b5
@ -29,6 +29,7 @@
|
||||
},
|
||||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"@google/genai": "^1.8.0",
|
||||
"@google/generative-ai": "^0.24.1",
|
||||
"axios": "^1.10.0",
|
||||
"better-sqlite3": "^9.4.3",
|
||||
|
@ -268,6 +268,7 @@ export class ApiKeyHeader extends LitElement {
|
||||
this.handleAnimationEnd = this.handleAnimationEnd.bind(this);
|
||||
this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this);
|
||||
this.handleProviderChange = this.handleProviderChange.bind(this);
|
||||
this.checkAndRequestPermissions = this.checkAndRequestPermissions.bind(this);
|
||||
}
|
||||
|
||||
reset() {
|
||||
@ -406,10 +407,18 @@ export class ApiKeyHeader extends LitElement {
|
||||
const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider);
|
||||
|
||||
if (isValid) {
|
||||
console.log('API key valid - starting slide out animation');
|
||||
console.log('API key valid – checking system permissions…');
|
||||
const permissionResult = await this.checkAndRequestPermissions();
|
||||
|
||||
if (permissionResult.success) {
|
||||
console.log('All permissions granted – starting slide-out animation');
|
||||
this.startSlideOutAnimation();
|
||||
this.validatedApiKey = this.apiKey.trim();
|
||||
this.validatedProvider = this.selectedProvider;
|
||||
} else {
|
||||
this.errorMessage = permissionResult.error || 'Permission setup required';
|
||||
console.log('Permission setup incomplete:', permissionResult);
|
||||
}
|
||||
} else {
|
||||
this.errorMessage = 'Invalid API key - please check and try again';
|
||||
console.log('API key validation failed');
|
||||
@ -488,6 +497,45 @@ export class ApiKeyHeader extends LitElement {
|
||||
return false;
|
||||
}
|
||||
|
||||
async checkAndRequestPermissions() {
|
||||
if (!window.require) return { success: true };
|
||||
|
||||
const { ipcRenderer } = window.require('electron');
|
||||
|
||||
try {
|
||||
const permissions = await ipcRenderer.invoke('check-system-permissions');
|
||||
console.log('[Permissions] Current status:', permissions);
|
||||
|
||||
if (!permissions.needsSetup) return { success: true };
|
||||
|
||||
if (!permissions.microphone) {
|
||||
console.log('[Permissions] Requesting microphone permission…');
|
||||
const micResult = await ipcRenderer.invoke('request-microphone-permission');
|
||||
if (!micResult.success) {
|
||||
await ipcRenderer.invoke('open-system-preferences', 'microphone');
|
||||
return {
|
||||
success: false,
|
||||
error: 'Please grant microphone access in System Preferences',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (!permissions.screen) {
|
||||
console.log('[Permissions] Screen-recording permission needed');
|
||||
await ipcRenderer.invoke('open-system-preferences', 'screen-recording');
|
||||
return {
|
||||
success: false,
|
||||
error: 'Please grant screen recording access in System Preferences',
|
||||
};
|
||||
}
|
||||
|
||||
return { success: true };
|
||||
} catch (err) {
|
||||
console.error('[Permissions] Error checking/requesting permissions:', err);
|
||||
return { success: false, error: 'Failed to check permissions' };
|
||||
}
|
||||
}
|
||||
|
||||
startSlideOutAnimation() {
|
||||
this.classList.add('sliding-out');
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
const { GoogleGenerativeAI } = require('@google/generative-ai');
|
||||
const { GoogleGenAI } = require('@google/genai');
|
||||
|
||||
/**
|
||||
* Creates and returns a Google Gemini client instance for generative AI.
|
||||
@ -113,8 +114,58 @@ function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) {
|
||||
};
|
||||
}
|
||||
|
||||
// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
|
||||
// const liveClient = new GoogleGenAI({
|
||||
// vertexai: false, // Vertex AI 사용 안함
|
||||
// apiKey,
|
||||
// });
|
||||
|
||||
// // 라이브 STT 세션 열기
|
||||
// const session = await liveClient.live.connect({
|
||||
// model: 'gemini-live-2.5-flash-preview',
|
||||
// callbacks,
|
||||
// config: {
|
||||
// inputAudioTranscription: {}, // 실시간 STT 필수
|
||||
// speechConfig: { languageCode: language },
|
||||
// },
|
||||
// });
|
||||
|
||||
// return {
|
||||
// sendRealtimeInput: async data => session.send({
|
||||
// audio: { data, mimeType: 'audio/pcm;rate=24000' }
|
||||
// }),
|
||||
// close: async () => session.close(),
|
||||
// };
|
||||
// }
|
||||
|
||||
async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
|
||||
// ① 옛날 스타일 helper 재사용
|
||||
const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
|
||||
|
||||
// ② 언어 코드 강제 BCP-47 변환
|
||||
const lang = language.includes('-') ? language : `${language}-US`;
|
||||
|
||||
const session = await liveClient.live.connect({
|
||||
model: 'gemini-live-2.5-flash-preview',
|
||||
callbacks,
|
||||
config: {
|
||||
inputAudioTranscription: {},
|
||||
speechConfig: { languageCode: lang },
|
||||
},
|
||||
});
|
||||
|
||||
// ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름
|
||||
return {
|
||||
sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
|
||||
close: async () => session.close(),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
module.exports = {
|
||||
createGeminiClient,
|
||||
getGeminiGenerativeModel,
|
||||
createGeminiChat
|
||||
createGeminiChat,
|
||||
connectToGeminiSession,
|
||||
};
|
@ -1833,8 +1833,103 @@ function setupIpcHandlers(openaiSessionRef) {
|
||||
header.webContents.send('request-firebase-logout');
|
||||
}
|
||||
});
|
||||
|
||||
ipcMain.handle('check-system-permissions', async () => {
|
||||
const { systemPreferences } = require('electron');
|
||||
const permissions = {
|
||||
microphone: false,
|
||||
screen: false,
|
||||
needsSetup: false
|
||||
};
|
||||
|
||||
try {
|
||||
if (process.platform === 'darwin') {
|
||||
// Check microphone permission on macOS
|
||||
const micStatus = systemPreferences.getMediaAccessStatus('microphone');
|
||||
permissions.microphone = micStatus === 'granted';
|
||||
|
||||
try {
|
||||
const sources = await desktopCapturer.getSources({
|
||||
types: ['screen'],
|
||||
thumbnailSize: { width: 1, height: 1 }
|
||||
});
|
||||
permissions.screen = sources && sources.length > 0;
|
||||
} catch (err) {
|
||||
console.log('[Permissions] Screen capture test failed:', err);
|
||||
permissions.screen = false;
|
||||
}
|
||||
|
||||
permissions.needsSetup = !permissions.microphone || !permissions.screen;
|
||||
} else {
|
||||
permissions.microphone = true;
|
||||
permissions.screen = true;
|
||||
permissions.needsSetup = false;
|
||||
}
|
||||
|
||||
console.log('[Permissions] System permissions status:', permissions);
|
||||
return permissions;
|
||||
} catch (error) {
|
||||
console.error('[Permissions] Error checking permissions:', error);
|
||||
return {
|
||||
microphone: false,
|
||||
screen: false,
|
||||
needsSetup: true,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
ipcMain.handle('request-microphone-permission', async () => {
|
||||
if (process.platform !== 'darwin') {
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
const { systemPreferences } = require('electron');
|
||||
try {
|
||||
const status = systemPreferences.getMediaAccessStatus('microphone');
|
||||
if (status === 'granted') {
|
||||
return { success: true, status: 'already-granted' };
|
||||
}
|
||||
|
||||
// Req mic permission
|
||||
const granted = await systemPreferences.askForMediaAccess('microphone');
|
||||
return {
|
||||
success: granted,
|
||||
status: granted ? 'granted' : 'denied'
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[Permissions] Error requesting microphone permission:', error);
|
||||
return {
|
||||
success: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
ipcMain.handle('open-system-preferences', async (event, section) => {
|
||||
if (process.platform !== 'darwin') {
|
||||
return { success: false, error: 'Not supported on this platform' };
|
||||
}
|
||||
|
||||
try {
|
||||
// Open System Preferences to Privacy & Security > Screen Recording
|
||||
if (section === 'screen-recording') {
|
||||
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
|
||||
} else if (section === 'microphone') {
|
||||
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone');
|
||||
} else {
|
||||
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy');
|
||||
}
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
console.error('[Permissions] Error opening system preferences:', error);
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
let storedApiKey = null;
|
||||
let storedProvider = 'openai';
|
||||
|
||||
|
@ -3,6 +3,7 @@ const { BrowserWindow, ipcMain } = require('electron');
|
||||
const { spawn } = require('child_process');
|
||||
const { saveDebugAudio } = require('./audioUtils.js');
|
||||
const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js');
|
||||
const { connectToGeminiSession } = require('../../common/services/googleGeminiClient.js');
|
||||
const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js');
|
||||
const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
|
||||
const sqliteClient = require('../../common/services/sqliteClient');
|
||||
@ -538,7 +539,6 @@ async function initializeLiveSummarySession(language = 'en') {
|
||||
sendToRenderer('session-initializing', true);
|
||||
sendToRenderer('update-status', 'Initializing sessions...');
|
||||
|
||||
// Merged block
|
||||
const API_KEY = getApiKey();
|
||||
if (!API_KEY) {
|
||||
console.error('FATAL ERROR: API Key is not defined.');
|
||||
@ -550,21 +550,30 @@ async function initializeLiveSummarySession(language = 'en') {
|
||||
|
||||
initializeNewSession();
|
||||
|
||||
const provider = await getAiProvider();
|
||||
const isGemini = provider === 'gemini';
|
||||
console.log(`[LiveSummaryService] Initializing STT for provider: ${provider}`);
|
||||
|
||||
try {
|
||||
const handleMyMessage = message => {
|
||||
if (isGemini) {
|
||||
// console.log('[Gemini Raw Message - Me]:', JSON.stringify(message, null, 2));
|
||||
const text = message.serverContent?.inputTranscription?.text || '';
|
||||
if (text && text.trim()) {
|
||||
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
|
||||
if (finalUtteranceText && finalUtteranceText !== '.') {
|
||||
debounceMyCompletion(finalUtteranceText);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const type = message.type;
|
||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||
|
||||
if (type === 'conversation.item.input_audio_transcription.delta') {
|
||||
if (myCompletionTimer) {
|
||||
clearTimeout(myCompletionTimer);
|
||||
if (myCompletionTimer) clearTimeout(myCompletionTimer);
|
||||
myCompletionTimer = null;
|
||||
}
|
||||
|
||||
myCurrentUtterance += text;
|
||||
|
||||
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
|
||||
|
||||
if (text && !text.includes('vq_lbr_audio_')) {
|
||||
sendToRenderer('stt-update', {
|
||||
speaker: 'Me',
|
||||
@ -578,28 +587,34 @@ async function initializeLiveSummarySession(language = 'en') {
|
||||
if (text && text.trim()) {
|
||||
const finalUtteranceText = text.trim();
|
||||
myCurrentUtterance = '';
|
||||
|
||||
debounceMyCompletion(finalUtteranceText);
|
||||
}
|
||||
} else if (message.error) {
|
||||
}
|
||||
}
|
||||
|
||||
if (message.error) {
|
||||
console.error('[Me] STT Session Error:', message.error);
|
||||
}
|
||||
};
|
||||
|
||||
const handleTheirMessage = message => {
|
||||
if (isGemini) {
|
||||
// console.log('[Gemini Raw Message - Them]:', JSON.stringify(message, null, 2));
|
||||
const text = message.serverContent?.inputTranscription?.text || '';
|
||||
if (text && text.trim()) {
|
||||
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
|
||||
if (finalUtteranceText && finalUtteranceText !== '.') {
|
||||
debounceTheirCompletion(finalUtteranceText);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const type = message.type;
|
||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||
|
||||
if (type === 'conversation.item.input_audio_transcription.delta') {
|
||||
if (theirCompletionTimer) {
|
||||
clearTimeout(theirCompletionTimer);
|
||||
if (theirCompletionTimer) clearTimeout(theirCompletionTimer);
|
||||
theirCompletionTimer = null;
|
||||
}
|
||||
|
||||
theirCurrentUtterance += text;
|
||||
|
||||
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
|
||||
|
||||
if (text && !text.includes('vq_lbr_audio_')) {
|
||||
sendToRenderer('stt-update', {
|
||||
speaker: 'Them',
|
||||
@ -613,10 +628,12 @@ async function initializeLiveSummarySession(language = 'en') {
|
||||
if (text && text.trim()) {
|
||||
const finalUtteranceText = text.trim();
|
||||
theirCurrentUtterance = '';
|
||||
|
||||
debounceTheirCompletion(finalUtteranceText);
|
||||
}
|
||||
} else if (message.error) {
|
||||
}
|
||||
}
|
||||
|
||||
if (message.error) {
|
||||
console.error('[Them] STT Session Error:', message.error);
|
||||
}
|
||||
};
|
||||
@ -638,10 +655,17 @@ async function initializeLiveSummarySession(language = 'en') {
|
||||
},
|
||||
};
|
||||
|
||||
if (isGemini) {
|
||||
[mySttSession, theirSttSession] = await Promise.all([
|
||||
connectToGeminiSession(API_KEY, mySttConfig),
|
||||
connectToGeminiSession(API_KEY, theirSttConfig),
|
||||
]);
|
||||
} else {
|
||||
[mySttSession, theirSttSession] = await Promise.all([
|
||||
connectToOpenAiSession(API_KEY, mySttConfig, keyType),
|
||||
connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
|
||||
]);
|
||||
}
|
||||
|
||||
console.log('✅ Both STT sessions initialized successfully.');
|
||||
triggerAnalysisIfNeeded();
|
||||
@ -653,7 +677,7 @@ async function initializeLiveSummarySession(language = 'en') {
|
||||
sendToRenderer('update-status', 'Connected. Ready to listen.');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('❌ Failed to initialize OpenAI STT sessions:', error);
|
||||
console.error('❌ Failed to initialize STT sessions:', error);
|
||||
isInitializingSession = false;
|
||||
sendToRenderer('session-initializing', false);
|
||||
sendToRenderer('update-status', 'Initialization failed.');
|
||||
@ -725,6 +749,9 @@ async function startMacOSAudioCapture() {
|
||||
|
||||
let audioBuffer = Buffer.alloc(0);
|
||||
|
||||
const provider = await getAiProvider();
|
||||
const isGemini = provider === 'gemini';
|
||||
|
||||
systemAudioProc.stdout.on('data', async data => {
|
||||
audioBuffer = Buffer.concat([audioBuffer, data]);
|
||||
|
||||
@ -739,10 +766,11 @@ async function startMacOSAudioCapture() {
|
||||
|
||||
if (theirSttSession) {
|
||||
try {
|
||||
// await theirSttSession.sendRealtimeInput({
|
||||
// audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' },
|
||||
// });
|
||||
await theirSttSession.sendRealtimeInput(base64Data);
|
||||
// await theirSttSession.sendRealtimeInput(base64Data);
|
||||
const payload = isGemini
|
||||
? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
|
||||
: base64Data;
|
||||
await theirSttSession.sendRealtimeInput(payload);
|
||||
} catch (err) {
|
||||
console.error('Error sending system audio:', err.message);
|
||||
}
|
||||
@ -861,9 +889,17 @@ function setupLiveSummaryIpcHandlers() {
|
||||
});
|
||||
|
||||
ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => {
|
||||
const provider = await getAiProvider();
|
||||
const isGemini = provider === 'gemini';
|
||||
if (!mySttSession) return { success: false, error: 'User STT session not active' };
|
||||
try {
|
||||
await mySttSession.sendRealtimeInput(data);
|
||||
// await mySttSession.sendRealtimeInput(data);
|
||||
// provider에 맞는 형식으로 래핑
|
||||
const payload = isGemini
|
||||
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
|
||||
: data; // OpenAI는 base64 string 그대로
|
||||
|
||||
await mySttSession.sendRealtimeInput(payload);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
console.error('Error sending user audio:', error);
|
||||
|
@ -233,7 +233,11 @@ class SimpleAEC {
|
||||
this.echoGain = 0.5;
|
||||
this.noiseFloor = 0.01;
|
||||
|
||||
console.log('🎯 Weakened AEC initialized');
|
||||
// 🔧 Adaptive-gain parameters (User-tuned, very aggressive)
|
||||
this.targetErr = 0.002;
|
||||
this.adaptRate = 0.1;
|
||||
|
||||
console.log('🎯 AEC initialized (hyper-aggressive)');
|
||||
}
|
||||
|
||||
process(micData, systemData) {
|
||||
@ -241,6 +245,19 @@ class SimpleAEC {
|
||||
return micData;
|
||||
}
|
||||
|
||||
for (let i = 0; i < systemData.length; i++) {
|
||||
if (systemData[i] > 0.98) systemData[i] = 0.98;
|
||||
else if (systemData[i] < -0.98) systemData[i] = -0.98;
|
||||
|
||||
systemData[i] = Math.tanh(systemData[i] * 4);
|
||||
}
|
||||
|
||||
let sum2 = 0;
|
||||
for (let i = 0; i < systemData.length; i++) sum2 += systemData[i] * systemData[i];
|
||||
const rms = Math.sqrt(sum2 / systemData.length);
|
||||
const targetRms = 0.08; // 🔧 기준 RMS (기존 0.1)
|
||||
const scale = targetRms / (rms + 1e-6); // 1e-6: 0-division 방지
|
||||
|
||||
const output = new Float32Array(micData.length);
|
||||
|
||||
const optimalDelay = this.findOptimalDelay(micData, systemData);
|
||||
@ -252,23 +269,32 @@ class SimpleAEC {
|
||||
const delayIndex = i - optimalDelay - d;
|
||||
if (delayIndex >= 0 && delayIndex < systemData.length) {
|
||||
const weight = Math.exp(-Math.abs(d) / 1000);
|
||||
echoEstimate += systemData[delayIndex] * this.echoGain * weight;
|
||||
echoEstimate += systemData[delayIndex] * scale * this.echoGain * weight;
|
||||
}
|
||||
}
|
||||
|
||||
output[i] = micData[i] - echoEstimate * 0.5;
|
||||
output[i] = micData[i] - echoEstimate * 0.9;
|
||||
|
||||
if (Math.abs(output[i]) < this.noiseFloor) {
|
||||
output[i] *= 0.5;
|
||||
}
|
||||
|
||||
if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) {
|
||||
output[i] *= 0.5;
|
||||
output[i] *= 0.25;
|
||||
}
|
||||
|
||||
output[i] = Math.max(-1, Math.min(1, output[i]));
|
||||
}
|
||||
|
||||
|
||||
let errSum = 0;
|
||||
for (let i = 0; i < output.length; i++) errSum += output[i] * output[i];
|
||||
const errRms = Math.sqrt(errSum / output.length);
|
||||
|
||||
const err = errRms - this.targetErr;
|
||||
this.echoGain += this.adaptRate * err; // 비례 제어
|
||||
this.echoGain = Math.max(0, Math.min(1, this.echoGain));
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
@ -310,7 +336,7 @@ class SimpleAEC {
|
||||
}
|
||||
}
|
||||
|
||||
return similarity / (2 * windowSize + 1) < 0.2;
|
||||
return similarity / (2 * windowSize + 1) < 0.15;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user