implemented AEC + guard stt error

This commit is contained in:
samtiz 2025-07-08 19:22:53 +09:00
parent da6602ef66
commit 733ea7a0f6
7 changed files with 118 additions and 176 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -17,7 +17,14 @@ async function createSTT({ apiKey, language = 'en-US', callbacks = {}, ...config
const session = await liveClient.live.connect({
model: 'gemini-live-2.5-flash-preview',
callbacks,
callbacks: {
...callbacks,
onMessage: (msg) => {
if (!msg || typeof msg !== 'object') return;
msg.provider = 'gemini';
callbacks.onmessage?.(msg);
}
},
config: {
inputAudioTranscription: {},
speechConfig: { languageCode: lang },

View File

@ -72,6 +72,7 @@ async function createSTT({ apiKey, language = 'en', callbacks = {}, usePortkey =
close: () => {
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: 'session.close' }));
ws.onmessage = ws.onerror = () => {}; // 핸들러 제거
ws.close(1000, 'Client initiated close.');
}
}
@ -79,10 +80,17 @@ async function createSTT({ apiKey, language = 'en', callbacks = {}, usePortkey =
};
ws.onmessage = (event) => {
const message = JSON.parse(event.data);
if (callbacks && callbacks.onmessage) {
callbacks.onmessage(message);
}
// ── 종료·하트비트 패킷 필터링 ──────────────────────────────
if (!event.data || event.data === 'null' || event.data === '[DONE]') return;
let msg;
try { msg = JSON.parse(event.data); }
catch { return; } // JSON 파싱 실패 무시
if (!msg || typeof msg !== 'object') return;
msg.provider = 'openai'; // ← 항상 명시
callbacks.onmessage?.(msg);
};
ws.onerror = (error) => {

View File

@ -1,4 +1,4 @@
const { BrowserWindow } = require('electron');
const { BrowserWindow, app } = require('electron');
const SttService = require('./stt/sttService');
const SummaryService = require('./summary/summaryService');
const authService = require('../../common/services/authService');
@ -213,9 +213,9 @@ class ListenService {
try {
await this.sendAudioContent(data, mimeType);
return { success: true };
} catch (error) {
console.error('Error sending user audio:', error);
return { success: false, error: error.message };
} catch (e) {
console.error('Error sending user audio:', e);
return { success: false, error: e.message };
}
});
@ -237,9 +237,13 @@ class ListenService {
if (process.platform !== 'darwin') {
return { success: false, error: 'macOS audio capture only available on macOS' };
}
if (this.sttService.isMacOSAudioRunning?.()) {
return { success: false, error: 'already_running' };
}
try {
const success = await this.startMacOSAudioCapture();
return { success };
return { success, error: null };
} catch (error) {
console.error('Error starting macOS audio capture:', error);
return { success: false, error: error.message };
@ -274,4 +278,4 @@ class ListenService {
}
}
module.exports = ListenService;
module.exports = ListenService;

View File

@ -1,33 +1,30 @@
const { ipcRenderer } = require('electron');
const createAecModule = require('../../../assets/aec.js');
const createAecModule = require('../../../assets/aec.js'); // aec.js 위치
let aecModPromise = null; // 한 번만 로드
let aecMod = null;
let aecPtr = 0; // Rust Aec* 1개만 재사용
let aecWasm; // 전역 캐시
/** WASM 모듈 가져오고 1회 초기화 */
async function getAec () {
if (aecModPromise) return aecModPromise; // 캐시
export async function initAec () {
if (aecWasm) return aecWasm; // 이미 초기화됐으면 그대로
aecModPromise = createAecModule().then((M) => {
aecMod = M;
// C 심볼 → JS 래퍼 바인딩 (딱 1번)
M.newPtr = M.cwrap('AecNew', 'number',
['number','number','number','number']);
M.cancel = M.cwrap('AecCancelEcho', null,
['number','number','number','number','number']);
M.destroy = M.cwrap('AecDestroy', null, ['number']);
return M;
});
// ⬇️ locateFile: aec.js 가 wasm 로드를 시도할 때 호출
aecWasm = await createAecModule({
locateFile (filename) {
// aec.js 는 aec.wasm 한 개만 요청하므로, 주소를 직접 반환
return '../../../assets/' + filename; // ← **브라우저 기준 URL**
// (Electron renderer 에서 file://…/dist/renderer/… 에서 접근)
}
});
// C → JS 래퍼
aecWasm.newPtr = aecWasm.cwrap('AecNew', 'number',
['number','number','number','number']);
aecWasm.cancel = aecWasm.cwrap('AecCancelEcho', null,
['number','number','number','number','number']);
aecWasm.destroy = aecWasm.cwrap('AecDestroy', null, ['number']);
return aecWasm;
return aecModPromise;
}
// 바로 로드-실패 로그를 보기 위해
initAec().catch(console.error);
getAec().catch(console.error);
// ---------------------------
// Constants & Globals
// ---------------------------
@ -108,11 +105,14 @@ function arrayBufferToBase64(buffer) {
return btoa(binary);
}
/* ───────────────────────── JS ↔︎ WASM 헬퍼 ───────────────────────── */
function int16PtrFromFloat32(mod, f32) {
const len = f32.length;
const bytes = len * 2;
const ptr = mod._malloc(bytes);
const i16 = new Int16Array(mod.HEAP16.buffer, ptr, len);
// HEAP16이 없으면 HEAPU8.buffer로 직접 래핑
const heapBuf = (mod.HEAP16 ? mod.HEAP16.buffer : mod.HEAPU8.buffer);
const i16 = new Int16Array(heapBuf, ptr, len);
for (let i = 0; i < len; ++i) {
const s = Math.max(-1, Math.min(1, f32[i]));
i16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
@ -126,128 +126,28 @@ function float32FromInt16View(i16) {
return out;
}
// ---------------------------
// Complete SimpleAEC implementation (exact from renderer.js)
// ---------------------------
class SimpleAEC {
constructor() {
this.adaptiveFilter = new Float32Array(1024);
this.mu = 0.2;
this.echoDelay = 100;
this.sampleRate = 24000;
this.delaySamples = Math.floor((this.echoDelay / 1000) * this.sampleRate);
this.echoGain = 0.5;
this.noiseFloor = 0.01;
// 🔧 Adaptive-gain parameters (User-tuned, very aggressive)
this.targetErr = 0.002;
this.adaptRate = 0.1;
console.log('🎯 AEC initialized (hyper-aggressive)');
}
process(micData, systemData) {
if (!systemData || systemData.length === 0) {
return micData;
}
for (let i = 0; i < systemData.length; i++) {
if (systemData[i] > 0.98) systemData[i] = 0.98;
else if (systemData[i] < -0.98) systemData[i] = -0.98;
systemData[i] = Math.tanh(systemData[i] * 4);
}
let sum2 = 0;
for (let i = 0; i < systemData.length; i++) sum2 += systemData[i] * systemData[i];
const rms = Math.sqrt(sum2 / systemData.length);
const targetRms = 0.08; // 🔧 기준 RMS (기존 0.1)
const scale = targetRms / (rms + 1e-6); // 1e-6: 0-division 방지
const output = new Float32Array(micData.length);
const optimalDelay = this.findOptimalDelay(micData, systemData);
for (let i = 0; i < micData.length; i++) {
let echoEstimate = 0;
for (let d = -500; d <= 500; d += 100) {
const delayIndex = i - optimalDelay - d;
if (delayIndex >= 0 && delayIndex < systemData.length) {
const weight = Math.exp(-Math.abs(d) / 1000);
echoEstimate += systemData[delayIndex] * scale * this.echoGain * weight;
}
}
output[i] = micData[i] - echoEstimate * 0.9;
if (Math.abs(output[i]) < this.noiseFloor) {
output[i] *= 0.5;
}
if (this.isSimilarToSystem(output[i], systemData, i, optimalDelay)) {
output[i] *= 0.25;
}
output[i] = Math.max(-1, Math.min(1, output[i]));
}
let errSum = 0;
for (let i = 0; i < output.length; i++) errSum += output[i] * output[i];
const errRms = Math.sqrt(errSum / output.length);
const err = errRms - this.targetErr;
this.echoGain += this.adaptRate * err; // 비례 제어
this.echoGain = Math.max(0, Math.min(1, this.echoGain));
return output;
}
findOptimalDelay(micData, systemData) {
let maxCorr = 0;
let optimalDelay = this.delaySamples;
for (let delay = 0; delay < 5000 && delay < systemData.length; delay += 200) {
let corr = 0;
let count = 0;
for (let i = 0; i < Math.min(500, micData.length); i++) {
if (i + delay < systemData.length) {
corr += micData[i] * systemData[i + delay];
count++;
}
}
if (count > 0) {
corr = Math.abs(corr / count);
if (corr > maxCorr) {
maxCorr = corr;
optimalDelay = delay;
}
}
}
return optimalDelay;
}
isSimilarToSystem(sample, systemData, index, delay) {
const windowSize = 50;
let similarity = 0;
for (let i = -windowSize; i <= windowSize; i++) {
const sysIndex = index - delay + i;
if (sysIndex >= 0 && sysIndex < systemData.length) {
similarity += Math.abs(sample - systemData[sysIndex]);
}
}
return similarity / (2 * windowSize + 1) < 0.15;
}
/* 필요하다면 종료 시 */
function disposeAec () {
getAec().then(mod => { if (aecPtr) mod.destroy(aecPtr); });
}
function runAecSync (micF32, sysF32) {
if (!aecMod || !aecPtr || !aecMod.HEAPU8) return micF32; // 아직 모듈 안 뜸 → 패스
const len = micF32.length;
const mic = int16PtrFromFloat32(aecMod, micF32);
const echo = int16PtrFromFloat32(aecMod, sysF32);
const out = aecMod._malloc(len * 2);
aecMod.cancel(aecPtr, mic.ptr, echo.ptr, out, len);
const heapBuf = (aecMod.HEAP16 ? aecMod.HEAP16.buffer : aecMod.HEAPU8.buffer);
const outF32 = float32FromInt16View(new Int16Array(heapBuf, out, len));
aecMod._free(mic.ptr); aecMod._free(echo.ptr); aecMod._free(out);
return outF32;
}
let aecProcessor = new SimpleAEC();
// System audio data handler
ipcRenderer.on('system-audio-data', (event, { data }) => {
@ -260,8 +160,6 @@ ipcRenderer.on('system-audio-data', (event, { data }) => {
if (systemAudioBuffer.length > MAX_SYSTEM_BUFFER_SIZE) {
systemAudioBuffer = systemAudioBuffer.slice(-MAX_SYSTEM_BUFFER_SIZE);
}
console.log('📥 Received system audio for AEC reference');
});
// ---------------------------
@ -351,39 +249,47 @@ setInterval(() => {
// ---------------------------
// Audio processing functions (exact from renderer.js)
// ---------------------------
function setupMicProcessing(micStream) {
async function setupMicProcessing(micStream) {
/* ── WASM 먼저 로드 ───────────────────────── */
const mod = await getAec();
if (!aecPtr) aecPtr = mod.newPtr(160, 1600, 24000, 1);
const micAudioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
await micAudioContext.resume();
const micSource = micAudioContext.createMediaStreamSource(micStream);
const micProcessor = micAudioContext.createScriptProcessor(BUFFER_SIZE, 1, 1);
let audioBuffer = [];
const samplesPerChunk = SAMPLE_RATE * AUDIO_CHUNK_DURATION;
micProcessor.onaudioprocess = async e => {
micProcessor.onaudioprocess = (e) => {
const inputData = e.inputBuffer.getChannelData(0);
audioBuffer.push(...inputData);
console.log('🎤 micProcessor.onaudioprocess');
// samplesPerChunk(=2400) 만큼 모이면 전송
while (audioBuffer.length >= samplesPerChunk) {
let chunk = audioBuffer.splice(0, samplesPerChunk);
let processedChunk = new Float32Array(chunk);
let processedChunk = new Float32Array(chunk); // 기본값
// Check for system audio and apply AEC only if voice is active
if (aecProcessor && systemAudioBuffer.length > 0) {
const latestSystemAudio = systemAudioBuffer[systemAudioBuffer.length - 1];
const systemFloat32 = base64ToFloat32Array(latestSystemAudio.data);
// ───────────────── WASM AEC ─────────────────
if (systemAudioBuffer.length > 0) {
const latest = systemAudioBuffer[systemAudioBuffer.length - 1];
const sysF32 = base64ToFloat32Array(latest.data);
// Apply AEC only when system audio has active speech
if (isVoiceActive(systemFloat32)) {
processedChunk = aecProcessor.process(new Float32Array(chunk), systemFloat32);
console.log('🔊 Applied AEC because system audio is active');
}
// **음성 구간일 때만 런**
processedChunk = runAecSync(new Float32Array(chunk), sysF32);
console.log('🔊 Applied WASM-AEC (speex)');
} else {
console.log('🔊 No system audio for AEC reference');
}
const pcmData16 = convertFloat32ToInt16(processedChunk);
const base64Data = arrayBufferToBase64(pcmData16.buffer);
const pcm16 = convertFloat32ToInt16(processedChunk);
const b64 = arrayBufferToBase64(pcm16.buffer);
await ipcRenderer.invoke('send-audio-content', {
data: base64Data,
ipcRenderer.invoke('send-audio-content', {
data: b64,
mimeType: 'audio/pcm;rate=24000',
});
}
@ -566,7 +472,19 @@ async function startCapture(screenshotIntervalSeconds = 5, imageQuality = 'mediu
// Start macOS audio capture
const audioResult = await ipcRenderer.invoke('start-macos-audio');
if (!audioResult.success) {
throw new Error('Failed to start macOS audio capture: ' + audioResult.error);
console.warn('[listenCapture] macOS audio start failed:', audioResult.error);
// 이미 실행 중 → stop 후 재시도
if (audioResult.error === 'already_running') {
await ipcRenderer.invoke('stop-macos-audio');
await new Promise(r => setTimeout(r, 500));
const retry = await ipcRenderer.invoke('start-macos-audio');
if (!retry.success) {
throw new Error('Retry failed: ' + retry.error);
}
} else {
throw new Error('Failed to start macOS audio capture: ' + audioResult.error);
}
}
// Initialize screen capture in main process
@ -589,7 +507,7 @@ async function startCapture(screenshotIntervalSeconds = 5, imageQuality = 'mediu
});
console.log('macOS microphone capture started');
const { context, processor } = setupMicProcessing(micMediaStream);
const { context, processor } = await setupMicProcessing(micMediaStream);
audioContext = context;
audioProcessor = processor;
} catch (micErr) {
@ -662,7 +580,7 @@ async function startCapture(screenshotIntervalSeconds = 5, imageQuality = 'mediu
video: false,
});
console.log('Windows microphone capture started');
const { context, processor } = setupMicProcessing(micMediaStream);
const { context, processor } = await setupMicProcessing(micMediaStream);
audioContext = context;
audioProcessor = processor;
} catch (micErr) {
@ -765,6 +683,9 @@ function stopCapture() {
// Exports & global registration
// ---------------------------
module.exports = {
getAec, // 새로 만든 초기화 함수
runAecSync, // sync 버전
disposeAec, // 필요시 Rust 객체 파괴
startCapture,
stopCapture,
captureManualScreenshot,

View File

@ -217,6 +217,8 @@ class SttService {
};
const handleTheirMessage = message => {
if (!message || typeof message !== 'object') return;
if (this.modelInfo.provider === 'gemini') {
const text = message.serverContent?.inputTranscription?.text || '';
if (text && text.trim()) {