add deepgram

This commit is contained in:
sanio 2025-07-15 03:47:47 +09:00
parent 4d93df09e2
commit c0cf74273a
5 changed files with 286 additions and 30 deletions

78
package-lock.json generated
View File

@ -11,6 +11,7 @@
"license": "GPL-3.0",
"dependencies": {
"@anthropic-ai/sdk": "^0.56.0",
"@deepgram/sdk": "^4.9.1",
"@google/genai": "^1.8.0",
"@google/generative-ai": "^0.24.1",
"axios": "^1.10.0",
@ -54,6 +55,50 @@
"anthropic-ai-sdk": "bin/cli"
}
},
"node_modules/@deepgram/captions": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/@deepgram/captions/-/captions-1.2.0.tgz",
"integrity": "sha512-8B1C/oTxTxyHlSFubAhNRgCbQ2SQ5wwvtlByn8sDYZvdDtdn/VE2yEPZ4BvUnrKWmsbTQY6/ooLV+9Ka2qmDSQ==",
"license": "MIT",
"dependencies": {
"dayjs": "^1.11.10"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@deepgram/sdk": {
"version": "4.9.1",
"resolved": "https://registry.npmjs.org/@deepgram/sdk/-/sdk-4.9.1.tgz",
"integrity": "sha512-a30Sed6OIRldnW1U0Q0Orvhjojq4O/1pMv6ijj+3j8735LBBfAJvlJpRCjrgtzBpnkKlY6v3bV5F8qUUSpz2yg==",
"license": "MIT",
"dependencies": {
"@deepgram/captions": "^1.1.1",
"@types/node": "^18.19.39",
"cross-fetch": "^3.1.5",
"deepmerge": "^4.3.1",
"events": "^3.3.0",
"ws": "^8.17.0"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@deepgram/sdk/node_modules/@types/node": {
"version": "18.19.118",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.118.tgz",
"integrity": "sha512-hIPK0hSrrcaoAu/gJMzN3QClXE4QdCdFvaenJ0JsjIbExP1JFFVH+RHcBt25c9n8bx5dkIfqKE+uw6BmBns7ug==",
"license": "MIT",
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/@deepgram/sdk/node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"license": "MIT"
},
"node_modules/@develar/schema-utils": {
"version": "2.6.5",
"dev": true,
@ -2992,6 +3037,15 @@
"optional": true,
"peer": true
},
"node_modules/cross-fetch": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz",
"integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==",
"license": "MIT",
"dependencies": {
"node-fetch": "^2.7.0"
}
},
"node_modules/cross-spawn": {
"version": "7.0.6",
"dev": true,
@ -3020,6 +3074,12 @@
"node": ">=6"
}
},
"node_modules/dayjs": {
"version": "1.11.13",
"resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.13.tgz",
"integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==",
"license": "MIT"
},
"node_modules/debounce-fn": {
"version": "4.0.0",
"license": "MIT",
@ -3078,6 +3138,15 @@
"node": ">=4.0.0"
}
},
"node_modules/deepmerge": {
"version": "4.3.1",
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/defaults": {
"version": "1.0.4",
"dev": true,
@ -3735,6 +3804,15 @@
"node": ">=6"
}
},
"node_modules/events": {
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz",
"integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
"license": "MIT",
"engines": {
"node": ">=0.8.x"
}
},
"node_modules/expand-template": {
"version": "2.0.3",
"license": "(MIT OR WTFPL)",

View File

@ -33,6 +33,7 @@
"license": "GPL-3.0",
"dependencies": {
"@anthropic-ai/sdk": "^0.56.0",
"@deepgram/sdk": "^4.9.1",
"@google/genai": "^1.8.0",
"@google/generative-ai": "^0.24.1",
"axios": "^1.10.0",

View File

@ -57,6 +57,14 @@ const PROVIDERS = {
],
sttModels: [],
},
'deepgram': {
name: 'Deepgram',
handler: () => require("./providers/deepgram"),
llmModels: [],
sttModels: [
{ id: 'nova-3', name: 'Nova-3 (General)' },
],
},
'ollama': {
name: 'Ollama (Local)',
handler: () => require("./providers/ollama"),
@ -148,6 +156,7 @@ function getProviderClass(providerId) {
'openai': 'OpenAIProvider',
'anthropic': 'AnthropicProvider',
'gemini': 'GeminiProvider',
'deepgram': 'DeepgramProvider',
'ollama': 'OllamaProvider',
'whisper': 'WhisperProvider'
};

View File

@ -0,0 +1,111 @@
// providers/deepgram.js
const { createClient, LiveTranscriptionEvents } = require('@deepgram/sdk');
const WebSocket = require('ws');
/**
* Deepgram Provider 클래스. API 유효성 검사를 담당합니다.
*/
class DeepgramProvider {
/**
* Deepgram API 키의 유효성을 검사합니다.
* @param {string} key - 검사할 Deepgram API
* @returns {Promise<{success: boolean, error?: string}>}
*/
static async validateApiKey(key) {
if (!key || typeof key !== 'string') {
return { success: false, error: 'Invalid Deepgram API key format.' };
}
try {
// ✨ 변경점: SDK 대신 직접 fetch로 API를 호출하여 안정성 확보 (openai.js 방식)
const response = await fetch('https://api.deepgram.com/v1/projects', {
headers: { 'Authorization': `Token ${key}` }
});
if (response.ok) {
return { success: true };
} else {
const errorData = await response.json().catch(() => ({}));
const message = errorData.err_msg || `Validation failed with status: ${response.status}`;
return { success: false, error: message };
}
} catch (error) {
console.error(`[DeepgramProvider] Network error during key validation:`, error);
return { success: false, error: error.message || 'A network error occurred during validation.' };
}
}
}
function createSTT({
apiKey,
language = 'en-US',
sampleRate = 24000,
callbacks = {},
}) {
const qs = new URLSearchParams({
model: 'nova-3',
encoding: 'linear16',
sample_rate: sampleRate.toString(),
language,
smart_format: 'true',
interim_results: 'true',
channels: '1',
});
const url = `wss://api.deepgram.com/v1/listen?${qs}`;
const ws = new WebSocket(url, {
headers: { Authorization: `Token ${apiKey}` },
});
ws.binaryType = 'arraybuffer';
return new Promise((resolve, reject) => {
const to = setTimeout(() => {
ws.terminate();
reject(new Error('DG open timeout (10s)'));
}, 10_000);
ws.on('open', () => {
clearTimeout(to);
resolve({
sendRealtimeInput: (buf) => ws.send(buf),
close: () => ws.close(1000, 'client'),
});
});
ws.on('message', raw => {
let msg;
try { msg = JSON.parse(raw.toString()); } catch { return; }
if (msg.channel?.alternatives?.[0]?.transcript !== undefined) {
callbacks.onmessage?.({ provider: 'deepgram', ...msg });
}
});
ws.on('close', (code, reason) =>
callbacks.onclose?.({ code, reason: reason.toString() })
);
ws.on('error', err => {
clearTimeout(to);
callbacks.onerror?.(err);
reject(err);
});
});
}
// ... (LLM 관련 Placeholder 함수들은 그대로 유지) ...
function createLLM(opts) {
console.warn("[Deepgram] LLM not supported.");
return { generateContent: async () => { throw new Error("Deepgram does not support LLM functionality."); } };
}
function createStreamingLLM(opts) {
console.warn("[Deepgram] Streaming LLM not supported.");
return { streamChat: async () => { throw new Error("Deepgram does not support Streaming LLM functionality."); } };
}
module.exports = {
DeepgramProvider,
createSTT,
createLLM,
createStreamingLLM
};

View File

@ -55,17 +55,6 @@ class SttService {
}
}
async handleSendSystemAudioContent(data, mimeType) {
try {
await this.sendSystemAudioContent(data, mimeType);
this.sendToRenderer('system-audio-data', { data });
return { success: true };
} catch (error) {
console.error('Error sending system audio:', error);
return { success: false, error: error.message };
}
}
flushMyCompletion() {
const finalText = (this.myCompletionBuffer + this.myCurrentUtterance).trim();
if (!this.modelInfo || !finalText) return;
@ -157,7 +146,7 @@ class SttService {
console.log('[SttService] Ignoring message - session already closed');
return;
}
console.log('[SttService] handleMyMessage', message);
// console.log('[SttService] handleMyMessage', message);
if (this.modelInfo.provider === 'whisper') {
// Whisper STT emits 'transcription' events with different structure
@ -178,10 +167,6 @@ class SttService {
'(NOISE)'
];
const normalizedText = finalText.toLowerCase().trim();
const isNoise = noisePatterns.some(pattern =>
finalText.includes(pattern) || finalText === pattern
);
@ -232,6 +217,38 @@ class SttService {
isFinal: false,
timestamp: Date.now(),
});
// Deepgram
} else if (this.modelInfo.provider === 'deepgram') {
const text = message.channel?.alternatives?.[0]?.transcript;
if (!text || text.trim().length === 0) return;
const isFinal = message.is_final;
console.log(`[SttService-Me-Deepgram] Received: isFinal=${isFinal}, text="${text}"`);
if (isFinal) {
// 최종 결과가 도착하면, 현재 진행중인 부분 발화는 비우고
// 최종 텍스트로 debounce를 실행합니다.
this.myCurrentUtterance = '';
this.debounceMyCompletion(text);
} else {
// 부분 결과(interim)인 경우, 화면에 실시간으로 업데이트합니다.
if (this.myCompletionTimer) clearTimeout(this.myCompletionTimer);
this.myCompletionTimer = null;
this.myCurrentUtterance = text;
const continuousText = (this.myCompletionBuffer + ' ' + this.myCurrentUtterance).trim();
this.sendToRenderer('stt-update', {
speaker: 'Me',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
@ -291,9 +308,6 @@ class SttService {
'(NOISE)'
];
const normalizedText = finalText.toLowerCase().trim();
const isNoise = noisePatterns.some(pattern =>
finalText.includes(pattern) || finalText === pattern
);
@ -345,6 +359,34 @@ class SttService {
isFinal: false,
timestamp: Date.now(),
});
// Deepgram
} else if (this.modelInfo.provider === 'deepgram') {
const text = message.channel?.alternatives?.[0]?.transcript;
if (!text || text.trim().length === 0) return;
const isFinal = message.is_final;
if (isFinal) {
this.theirCurrentUtterance = '';
this.debounceTheirCompletion(text);
} else {
if (this.theirCompletionTimer) clearTimeout(this.theirCompletionTimer);
this.theirCompletionTimer = null;
this.theirCurrentUtterance = text;
const continuousText = (this.theirCompletionBuffer + ' ' + this.theirCurrentUtterance).trim();
this.sendToRenderer('stt-update', {
speaker: 'Them',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
@ -431,10 +473,14 @@ class SttService {
throw new Error('STT model info could not be retrieved.');
}
const payload = modelInfo.provider === 'gemini'
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
: data;
let payload;
if (modelInfo.provider === 'gemini') {
payload = { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } };
} else if (modelInfo.provider === 'deepgram') {
payload = Buffer.from(data, 'base64');
} else {
payload = data;
}
await this.mySttSession.sendRealtimeInput(payload);
}
@ -452,10 +498,15 @@ class SttService {
throw new Error('STT model info could not be retrieved.');
}
const payload = modelInfo.provider === 'gemini'
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
: data;
let payload;
if (modelInfo.provider === 'gemini') {
payload = { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } };
} else if (modelInfo.provider === 'deepgram') {
payload = Buffer.from(data, 'base64');
} else {
payload = data;
}
await this.theirSttSession.sendRealtimeInput(payload);
}
@ -547,9 +598,15 @@ class SttService {
if (this.theirSttSession) {
try {
const payload = modelInfo.provider === 'gemini'
? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
: base64Data;
let payload;
if (modelInfo.provider === 'gemini') {
payload = { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } };
} else if (modelInfo.provider === 'deepgram') {
payload = Buffer.from(base64Data, 'base64');
} else {
payload = base64Data;
}
await this.theirSttSession.sendRealtimeInput(payload);
} catch (err) {
console.error('Error sending system audio:', err.message);