enable gemini stt, solve wondowmanager conflict
This commit is contained in:
parent
413ff96966
commit
8af7aae2b5
@ -29,6 +29,7 @@
|
|||||||
},
|
},
|
||||||
"license": "GPL-3.0",
|
"license": "GPL-3.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@google/genai": "^1.8.0",
|
||||||
"@google/generative-ai": "^0.24.1",
|
"@google/generative-ai": "^0.24.1",
|
||||||
"axios": "^1.10.0",
|
"axios": "^1.10.0",
|
||||||
"better-sqlite3": "^9.4.3",
|
"better-sqlite3": "^9.4.3",
|
||||||
|
@ -268,6 +268,7 @@ export class ApiKeyHeader extends LitElement {
|
|||||||
this.handleAnimationEnd = this.handleAnimationEnd.bind(this);
|
this.handleAnimationEnd = this.handleAnimationEnd.bind(this);
|
||||||
this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this);
|
this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this);
|
||||||
this.handleProviderChange = this.handleProviderChange.bind(this);
|
this.handleProviderChange = this.handleProviderChange.bind(this);
|
||||||
|
this.checkAndRequestPermissions = this.checkAndRequestPermissions.bind(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
reset() {
|
reset() {
|
||||||
@ -404,12 +405,20 @@ export class ApiKeyHeader extends LitElement {
|
|||||||
let isValid = false;
|
let isValid = false;
|
||||||
try {
|
try {
|
||||||
const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider);
|
const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider);
|
||||||
|
|
||||||
if (isValid) {
|
if (isValid) {
|
||||||
console.log('API key valid - starting slide out animation');
|
console.log('API key valid – checking system permissions…');
|
||||||
this.startSlideOutAnimation();
|
const permissionResult = await this.checkAndRequestPermissions();
|
||||||
this.validatedApiKey = this.apiKey.trim();
|
|
||||||
this.validatedProvider = this.selectedProvider;
|
if (permissionResult.success) {
|
||||||
|
console.log('All permissions granted – starting slide-out animation');
|
||||||
|
this.startSlideOutAnimation();
|
||||||
|
this.validatedApiKey = this.apiKey.trim();
|
||||||
|
this.validatedProvider = this.selectedProvider;
|
||||||
|
} else {
|
||||||
|
this.errorMessage = permissionResult.error || 'Permission setup required';
|
||||||
|
console.log('Permission setup incomplete:', permissionResult);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
this.errorMessage = 'Invalid API key - please check and try again';
|
this.errorMessage = 'Invalid API key - please check and try again';
|
||||||
console.log('API key validation failed');
|
console.log('API key validation failed');
|
||||||
@ -488,6 +497,45 @@ export class ApiKeyHeader extends LitElement {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async checkAndRequestPermissions() {
|
||||||
|
if (!window.require) return { success: true };
|
||||||
|
|
||||||
|
const { ipcRenderer } = window.require('electron');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const permissions = await ipcRenderer.invoke('check-system-permissions');
|
||||||
|
console.log('[Permissions] Current status:', permissions);
|
||||||
|
|
||||||
|
if (!permissions.needsSetup) return { success: true };
|
||||||
|
|
||||||
|
if (!permissions.microphone) {
|
||||||
|
console.log('[Permissions] Requesting microphone permission…');
|
||||||
|
const micResult = await ipcRenderer.invoke('request-microphone-permission');
|
||||||
|
if (!micResult.success) {
|
||||||
|
await ipcRenderer.invoke('open-system-preferences', 'microphone');
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Please grant microphone access in System Preferences',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!permissions.screen) {
|
||||||
|
console.log('[Permissions] Screen-recording permission needed');
|
||||||
|
await ipcRenderer.invoke('open-system-preferences', 'screen-recording');
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Please grant screen recording access in System Preferences',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { success: true };
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[Permissions] Error checking/requesting permissions:', err);
|
||||||
|
return { success: false, error: 'Failed to check permissions' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
startSlideOutAnimation() {
|
startSlideOutAnimation() {
|
||||||
this.classList.add('sliding-out');
|
this.classList.add('sliding-out');
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
const { GoogleGenerativeAI } = require('@google/generative-ai');
|
const { GoogleGenerativeAI } = require('@google/generative-ai');
|
||||||
|
const { GoogleGenAI } = require('@google/genai');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates and returns a Google Gemini client instance for generative AI.
|
* Creates and returns a Google Gemini client instance for generative AI.
|
||||||
@ -113,8 +114,58 @@ function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
|
||||||
|
// const liveClient = new GoogleGenAI({
|
||||||
|
// vertexai: false, // Vertex AI 사용 안함
|
||||||
|
// apiKey,
|
||||||
|
// });
|
||||||
|
|
||||||
|
// // 라이브 STT 세션 열기
|
||||||
|
// const session = await liveClient.live.connect({
|
||||||
|
// model: 'gemini-live-2.5-flash-preview',
|
||||||
|
// callbacks,
|
||||||
|
// config: {
|
||||||
|
// inputAudioTranscription: {}, // 실시간 STT 필수
|
||||||
|
// speechConfig: { languageCode: language },
|
||||||
|
// },
|
||||||
|
// });
|
||||||
|
|
||||||
|
// return {
|
||||||
|
// sendRealtimeInput: async data => session.send({
|
||||||
|
// audio: { data, mimeType: 'audio/pcm;rate=24000' }
|
||||||
|
// }),
|
||||||
|
// close: async () => session.close(),
|
||||||
|
// };
|
||||||
|
// }
|
||||||
|
|
||||||
|
async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
|
||||||
|
// ① 옛날 스타일 helper 재사용
|
||||||
|
const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
|
||||||
|
|
||||||
|
// ② 언어 코드 강제 BCP-47 변환
|
||||||
|
const lang = language.includes('-') ? language : `${language}-US`;
|
||||||
|
|
||||||
|
const session = await liveClient.live.connect({
|
||||||
|
model: 'gemini-live-2.5-flash-preview',
|
||||||
|
callbacks,
|
||||||
|
config: {
|
||||||
|
inputAudioTranscription: {},
|
||||||
|
speechConfig: { languageCode: lang },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름
|
||||||
|
return {
|
||||||
|
sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
|
||||||
|
close: async () => session.close(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
createGeminiClient,
|
createGeminiClient,
|
||||||
getGeminiGenerativeModel,
|
getGeminiGenerativeModel,
|
||||||
createGeminiChat
|
createGeminiChat,
|
||||||
|
connectToGeminiSession,
|
||||||
};
|
};
|
@ -1833,8 +1833,103 @@ function setupIpcHandlers(openaiSessionRef) {
|
|||||||
header.webContents.send('request-firebase-logout');
|
header.webContents.send('request-firebase-logout');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ipcMain.handle('check-system-permissions', async () => {
|
||||||
|
const { systemPreferences } = require('electron');
|
||||||
|
const permissions = {
|
||||||
|
microphone: false,
|
||||||
|
screen: false,
|
||||||
|
needsSetup: false
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
// Check microphone permission on macOS
|
||||||
|
const micStatus = systemPreferences.getMediaAccessStatus('microphone');
|
||||||
|
permissions.microphone = micStatus === 'granted';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const sources = await desktopCapturer.getSources({
|
||||||
|
types: ['screen'],
|
||||||
|
thumbnailSize: { width: 1, height: 1 }
|
||||||
|
});
|
||||||
|
permissions.screen = sources && sources.length > 0;
|
||||||
|
} catch (err) {
|
||||||
|
console.log('[Permissions] Screen capture test failed:', err);
|
||||||
|
permissions.screen = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
permissions.needsSetup = !permissions.microphone || !permissions.screen;
|
||||||
|
} else {
|
||||||
|
permissions.microphone = true;
|
||||||
|
permissions.screen = true;
|
||||||
|
permissions.needsSetup = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[Permissions] System permissions status:', permissions);
|
||||||
|
return permissions;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[Permissions] Error checking permissions:', error);
|
||||||
|
return {
|
||||||
|
microphone: false,
|
||||||
|
screen: false,
|
||||||
|
needsSetup: true,
|
||||||
|
error: error.message
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ipcMain.handle('request-microphone-permission', async () => {
|
||||||
|
if (process.platform !== 'darwin') {
|
||||||
|
return { success: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
const { systemPreferences } = require('electron');
|
||||||
|
try {
|
||||||
|
const status = systemPreferences.getMediaAccessStatus('microphone');
|
||||||
|
if (status === 'granted') {
|
||||||
|
return { success: true, status: 'already-granted' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Req mic permission
|
||||||
|
const granted = await systemPreferences.askForMediaAccess('microphone');
|
||||||
|
return {
|
||||||
|
success: granted,
|
||||||
|
status: granted ? 'granted' : 'denied'
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[Permissions] Error requesting microphone permission:', error);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: error.message
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ipcMain.handle('open-system-preferences', async (event, section) => {
|
||||||
|
if (process.platform !== 'darwin') {
|
||||||
|
return { success: false, error: 'Not supported on this platform' };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Open System Preferences to Privacy & Security > Screen Recording
|
||||||
|
if (section === 'screen-recording') {
|
||||||
|
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
|
||||||
|
} else if (section === 'microphone') {
|
||||||
|
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone');
|
||||||
|
} else {
|
||||||
|
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy');
|
||||||
|
}
|
||||||
|
return { success: true };
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[Permissions] Error opening system preferences:', error);
|
||||||
|
return { success: false, error: error.message };
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
let storedApiKey = null;
|
let storedApiKey = null;
|
||||||
let storedProvider = 'openai';
|
let storedProvider = 'openai';
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ const { BrowserWindow, ipcMain } = require('electron');
|
|||||||
const { spawn } = require('child_process');
|
const { spawn } = require('child_process');
|
||||||
const { saveDebugAudio } = require('./audioUtils.js');
|
const { saveDebugAudio } = require('./audioUtils.js');
|
||||||
const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js');
|
const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js');
|
||||||
|
const { connectToGeminiSession } = require('../../common/services/googleGeminiClient.js');
|
||||||
const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js');
|
const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js');
|
||||||
const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
|
const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
|
||||||
const sqliteClient = require('../../common/services/sqliteClient');
|
const sqliteClient = require('../../common/services/sqliteClient');
|
||||||
@ -538,7 +539,6 @@ async function initializeLiveSummarySession(language = 'en') {
|
|||||||
sendToRenderer('session-initializing', true);
|
sendToRenderer('session-initializing', true);
|
||||||
sendToRenderer('update-status', 'Initializing sessions...');
|
sendToRenderer('update-status', 'Initializing sessions...');
|
||||||
|
|
||||||
// Merged block
|
|
||||||
const API_KEY = getApiKey();
|
const API_KEY = getApiKey();
|
||||||
if (!API_KEY) {
|
if (!API_KEY) {
|
||||||
console.error('FATAL ERROR: API Key is not defined.');
|
console.error('FATAL ERROR: API Key is not defined.');
|
||||||
@ -550,73 +550,90 @@ async function initializeLiveSummarySession(language = 'en') {
|
|||||||
|
|
||||||
initializeNewSession();
|
initializeNewSession();
|
||||||
|
|
||||||
|
const provider = await getAiProvider();
|
||||||
|
const isGemini = provider === 'gemini';
|
||||||
|
console.log(`[LiveSummaryService] Initializing STT for provider: ${provider}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const handleMyMessage = message => {
|
const handleMyMessage = message => {
|
||||||
const type = message.type;
|
if (isGemini) {
|
||||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
// console.log('[Gemini Raw Message - Me]:', JSON.stringify(message, null, 2));
|
||||||
|
const text = message.serverContent?.inputTranscription?.text || '';
|
||||||
if (type === 'conversation.item.input_audio_transcription.delta') {
|
|
||||||
if (myCompletionTimer) {
|
|
||||||
clearTimeout(myCompletionTimer);
|
|
||||||
myCompletionTimer = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
myCurrentUtterance += text;
|
|
||||||
|
|
||||||
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
|
|
||||||
|
|
||||||
if (text && !text.includes('vq_lbr_audio_')) {
|
|
||||||
sendToRenderer('stt-update', {
|
|
||||||
speaker: 'Me',
|
|
||||||
text: continuousText,
|
|
||||||
isPartial: true,
|
|
||||||
isFinal: false,
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (type === 'conversation.item.input_audio_transcription.completed') {
|
|
||||||
if (text && text.trim()) {
|
if (text && text.trim()) {
|
||||||
const finalUtteranceText = text.trim();
|
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
|
||||||
myCurrentUtterance = '';
|
if (finalUtteranceText && finalUtteranceText !== '.') {
|
||||||
|
debounceMyCompletion(finalUtteranceText);
|
||||||
debounceMyCompletion(finalUtteranceText);
|
}
|
||||||
}
|
}
|
||||||
} else if (message.error) {
|
} else {
|
||||||
|
const type = message.type;
|
||||||
|
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||||
|
|
||||||
|
if (type === 'conversation.item.input_audio_transcription.delta') {
|
||||||
|
if (myCompletionTimer) clearTimeout(myCompletionTimer);
|
||||||
|
myCompletionTimer = null;
|
||||||
|
myCurrentUtterance += text;
|
||||||
|
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
|
||||||
|
if (text && !text.includes('vq_lbr_audio_')) {
|
||||||
|
sendToRenderer('stt-update', {
|
||||||
|
speaker: 'Me',
|
||||||
|
text: continuousText,
|
||||||
|
isPartial: true,
|
||||||
|
isFinal: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (type === 'conversation.item.input_audio_transcription.completed') {
|
||||||
|
if (text && text.trim()) {
|
||||||
|
const finalUtteranceText = text.trim();
|
||||||
|
myCurrentUtterance = '';
|
||||||
|
debounceMyCompletion(finalUtteranceText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.error) {
|
||||||
console.error('[Me] STT Session Error:', message.error);
|
console.error('[Me] STT Session Error:', message.error);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleTheirMessage = message => {
|
const handleTheirMessage = message => {
|
||||||
const type = message.type;
|
if (isGemini) {
|
||||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
// console.log('[Gemini Raw Message - Them]:', JSON.stringify(message, null, 2));
|
||||||
|
const text = message.serverContent?.inputTranscription?.text || '';
|
||||||
if (type === 'conversation.item.input_audio_transcription.delta') {
|
|
||||||
if (theirCompletionTimer) {
|
|
||||||
clearTimeout(theirCompletionTimer);
|
|
||||||
theirCompletionTimer = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
theirCurrentUtterance += text;
|
|
||||||
|
|
||||||
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
|
|
||||||
|
|
||||||
if (text && !text.includes('vq_lbr_audio_')) {
|
|
||||||
sendToRenderer('stt-update', {
|
|
||||||
speaker: 'Them',
|
|
||||||
text: continuousText,
|
|
||||||
isPartial: true,
|
|
||||||
isFinal: false,
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (type === 'conversation.item.input_audio_transcription.completed') {
|
|
||||||
if (text && text.trim()) {
|
if (text && text.trim()) {
|
||||||
const finalUtteranceText = text.trim();
|
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
|
||||||
theirCurrentUtterance = '';
|
if (finalUtteranceText && finalUtteranceText !== '.') {
|
||||||
|
debounceTheirCompletion(finalUtteranceText);
|
||||||
debounceTheirCompletion(finalUtteranceText);
|
}
|
||||||
}
|
}
|
||||||
} else if (message.error) {
|
} else {
|
||||||
|
const type = message.type;
|
||||||
|
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||||
|
if (type === 'conversation.item.input_audio_transcription.delta') {
|
||||||
|
if (theirCompletionTimer) clearTimeout(theirCompletionTimer);
|
||||||
|
theirCompletionTimer = null;
|
||||||
|
theirCurrentUtterance += text;
|
||||||
|
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
|
||||||
|
if (text && !text.includes('vq_lbr_audio_')) {
|
||||||
|
sendToRenderer('stt-update', {
|
||||||
|
speaker: 'Them',
|
||||||
|
text: continuousText,
|
||||||
|
isPartial: true,
|
||||||
|
isFinal: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (type === 'conversation.item.input_audio_transcription.completed') {
|
||||||
|
if (text && text.trim()) {
|
||||||
|
const finalUtteranceText = text.trim();
|
||||||
|
theirCurrentUtterance = '';
|
||||||
|
debounceTheirCompletion(finalUtteranceText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.error) {
|
||||||
console.error('[Them] STT Session Error:', message.error);
|
console.error('[Them] STT Session Error:', message.error);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -638,10 +655,17 @@ async function initializeLiveSummarySession(language = 'en') {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
[mySttSession, theirSttSession] = await Promise.all([
|
if (isGemini) {
|
||||||
connectToOpenAiSession(API_KEY, mySttConfig, keyType),
|
[mySttSession, theirSttSession] = await Promise.all([
|
||||||
connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
|
connectToGeminiSession(API_KEY, mySttConfig),
|
||||||
]);
|
connectToGeminiSession(API_KEY, theirSttConfig),
|
||||||
|
]);
|
||||||
|
} else {
|
||||||
|
[mySttSession, theirSttSession] = await Promise.all([
|
||||||
|
connectToOpenAiSession(API_KEY, mySttConfig, keyType),
|
||||||
|
connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
console.log('✅ Both STT sessions initialized successfully.');
|
console.log('✅ Both STT sessions initialized successfully.');
|
||||||
triggerAnalysisIfNeeded();
|
triggerAnalysisIfNeeded();
|
||||||
@ -653,7 +677,7 @@ async function initializeLiveSummarySession(language = 'en') {
|
|||||||
sendToRenderer('update-status', 'Connected. Ready to listen.');
|
sendToRenderer('update-status', 'Connected. Ready to listen.');
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('❌ Failed to initialize OpenAI STT sessions:', error);
|
console.error('❌ Failed to initialize STT sessions:', error);
|
||||||
isInitializingSession = false;
|
isInitializingSession = false;
|
||||||
sendToRenderer('session-initializing', false);
|
sendToRenderer('session-initializing', false);
|
||||||
sendToRenderer('update-status', 'Initialization failed.');
|
sendToRenderer('update-status', 'Initialization failed.');
|
||||||
@ -725,6 +749,9 @@ async function startMacOSAudioCapture() {
|
|||||||
|
|
||||||
let audioBuffer = Buffer.alloc(0);
|
let audioBuffer = Buffer.alloc(0);
|
||||||
|
|
||||||
|
const provider = await getAiProvider();
|
||||||
|
const isGemini = provider === 'gemini';
|
||||||
|
|
||||||
systemAudioProc.stdout.on('data', async data => {
|
systemAudioProc.stdout.on('data', async data => {
|
||||||
audioBuffer = Buffer.concat([audioBuffer, data]);
|
audioBuffer = Buffer.concat([audioBuffer, data]);
|
||||||
|
|
||||||
@ -739,10 +766,11 @@ async function startMacOSAudioCapture() {
|
|||||||
|
|
||||||
if (theirSttSession) {
|
if (theirSttSession) {
|
||||||
try {
|
try {
|
||||||
// await theirSttSession.sendRealtimeInput({
|
// await theirSttSession.sendRealtimeInput(base64Data);
|
||||||
// audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' },
|
const payload = isGemini
|
||||||
// });
|
? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
|
||||||
await theirSttSession.sendRealtimeInput(base64Data);
|
: base64Data;
|
||||||
|
await theirSttSession.sendRealtimeInput(payload);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Error sending system audio:', err.message);
|
console.error('Error sending system audio:', err.message);
|
||||||
}
|
}
|
||||||
@ -861,9 +889,17 @@ function setupLiveSummaryIpcHandlers() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => {
|
ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => {
|
||||||
|
const provider = await getAiProvider();
|
||||||
|
const isGemini = provider === 'gemini';
|
||||||
if (!mySttSession) return { success: false, error: 'User STT session not active' };
|
if (!mySttSession) return { success: false, error: 'User STT session not active' };
|
||||||
try {
|
try {
|
||||||
await mySttSession.sendRealtimeInput(data);
|
// await mySttSession.sendRealtimeInput(data);
|
||||||
|
// provider에 맞는 형식으로 래핑
|
||||||
|
const payload = isGemini
|
||||||
|
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
|
||||||
|
: data; // OpenAI는 base64 string 그대로
|
||||||
|
|
||||||
|
await mySttSession.sendRealtimeInput(payload);
|
||||||
return { success: true };
|
return { success: true };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error sending user audio:', error);
|
console.error('Error sending user audio:', error);
|
||||||
|
@ -233,7 +233,11 @@ class SimpleAEC {
|
|||||||
this.echoGain = 0.5;
|
this.echoGain = 0.5;
|
||||||
this.noiseFloor = 0.01;
|
this.noiseFloor = 0.01;
|
||||||
|
|
||||||
console.log('🎯 Weakened AEC initialized');
|
// 🔧 Adaptive-gain parameters (User-tuned, very aggressive)
|
||||||
|
this.targetErr = 0.002;
|
||||||
|
this.adaptRate = 0.1;
|
||||||
|
|
||||||
|
console.log('🎯 AEC initialized (hyper-aggressive)');
|
||||||
}
|
}
|
||||||
|
|
||||||
process(micData, systemData) {
|
process(micData, systemData) {
|
||||||
@ -241,6 +245,19 @@ class SimpleAEC {
|
|||||||
return micData;
|
return micData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < systemData.length; i++) {
|
||||||
|
if (systemData[i] > 0.98) systemData[i] = 0.98;
|
||||||
|
else if (systemData[i] < -0.98) systemData[i] = -0.98;
|
||||||
|
|
||||||
|
systemData[i] = Math.tanh(systemData[i] * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
let sum2 = 0;
|
||||||
|
for (let i = 0; i < systemData.length; i++) sum2 += systemData[i] * systemData[i];
|
||||||
|
const rms = Math.sqrt(sum2 / systemData.length);
|
||||||
|
const targetRms = 0.08; // 🔧 기준 RMS (기존 0.1)
|
||||||
|
const scale = targetRms / (rms + 1e-6); // 1e-6: 0-division 방지
|
||||||
|
|
||||||
const output = new Float32Array(micData.length);
|
const output = new Float32Array(micData.length);
|
||||||
|
|
||||||
const optimalDelay = this.findOptimalDelay(micData, systemData);
|
const optimalDelay = this.findOptimalDelay(micData, systemData);
|
||||||
@ -252,23 +269,32 @@ class SimpleAEC {
|
|||||||
const delayIndex = i - optimalDelay - d;
|
const delayIndex = i - optimalDelay - d;
|
||||||
if (delayIndex >= 0 && delayIndex < systemData.length) {
|
if (delayIndex >= 0 && delayIndex < systemData.length) {
|
||||||
const weight = Math.exp(-Math.abs(d) / 1000);
|
const weight = Math.exp(-Math.abs(d) / 1000);
|
||||||
echoEstimate += systemData[delayIndex] * this.echoGain * weight;
|
echoEstimate += systemData[delayIndex] * scale * this.echoGain * weight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output[i] = micData[i] - echoEstimate * 0.5;
|
output[i] = micData[i] - echoEstimate * 0.9;
|
||||||
|
|
||||||
if (Math.abs(output[i]) < this.noiseFloor) {
|
if (Math.abs(output[i]) < this.noiseFloor) {
|
||||||
output[i] *= 0.5;
|
output[i] *= 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) {
|
if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) {
|
||||||
output[i] *= 0.5;
|
output[i] *= 0.25;
|
||||||
}
|
}
|
||||||
|
|
||||||
output[i] = Math.max(-1, Math.min(1, output[i]));
|
output[i] = Math.max(-1, Math.min(1, output[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
let errSum = 0;
|
||||||
|
for (let i = 0; i < output.length; i++) errSum += output[i] * output[i];
|
||||||
|
const errRms = Math.sqrt(errSum / output.length);
|
||||||
|
|
||||||
|
const err = errRms - this.targetErr;
|
||||||
|
this.echoGain += this.adaptRate * err; // 비례 제어
|
||||||
|
this.echoGain = Math.max(0, Math.min(1, this.echoGain));
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -310,7 +336,7 @@ class SimpleAEC {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return similarity / (2 * windowSize + 1) < 0.2;
|
return similarity / (2 * windowSize + 1) < 0.15;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user