add deepgram
This commit is contained in:
parent
4d93df09e2
commit
c0cf74273a
78
package-lock.json
generated
78
package-lock.json
generated
@ -11,6 +11,7 @@
|
||||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.56.0",
|
||||
"@deepgram/sdk": "^4.9.1",
|
||||
"@google/genai": "^1.8.0",
|
||||
"@google/generative-ai": "^0.24.1",
|
||||
"axios": "^1.10.0",
|
||||
@ -54,6 +55,50 @@
|
||||
"anthropic-ai-sdk": "bin/cli"
|
||||
}
|
||||
},
|
||||
"node_modules/@deepgram/captions": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@deepgram/captions/-/captions-1.2.0.tgz",
|
||||
"integrity": "sha512-8B1C/oTxTxyHlSFubAhNRgCbQ2SQ5wwvtlByn8sDYZvdDtdn/VE2yEPZ4BvUnrKWmsbTQY6/ooLV+9Ka2qmDSQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dayjs": "^1.11.10"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@deepgram/sdk": {
|
||||
"version": "4.9.1",
|
||||
"resolved": "https://registry.npmjs.org/@deepgram/sdk/-/sdk-4.9.1.tgz",
|
||||
"integrity": "sha512-a30Sed6OIRldnW1U0Q0Orvhjojq4O/1pMv6ijj+3j8735LBBfAJvlJpRCjrgtzBpnkKlY6v3bV5F8qUUSpz2yg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@deepgram/captions": "^1.1.1",
|
||||
"@types/node": "^18.19.39",
|
||||
"cross-fetch": "^3.1.5",
|
||||
"deepmerge": "^4.3.1",
|
||||
"events": "^3.3.0",
|
||||
"ws": "^8.17.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@deepgram/sdk/node_modules/@types/node": {
|
||||
"version": "18.19.118",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.118.tgz",
|
||||
"integrity": "sha512-hIPK0hSrrcaoAu/gJMzN3QClXE4QdCdFvaenJ0JsjIbExP1JFFVH+RHcBt25c9n8bx5dkIfqKE+uw6BmBns7ug==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@deepgram/sdk/node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@develar/schema-utils": {
|
||||
"version": "2.6.5",
|
||||
"dev": true,
|
||||
@ -2992,6 +3037,15 @@
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/cross-fetch": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz",
|
||||
"integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-fetch": "^2.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/cross-spawn": {
|
||||
"version": "7.0.6",
|
||||
"dev": true,
|
||||
@ -3020,6 +3074,12 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/dayjs": {
|
||||
"version": "1.11.13",
|
||||
"resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.13.tgz",
|
||||
"integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/debounce-fn": {
|
||||
"version": "4.0.0",
|
||||
"license": "MIT",
|
||||
@ -3078,6 +3138,15 @@
|
||||
"node": ">=4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/deepmerge": {
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
||||
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/defaults": {
|
||||
"version": "1.0.4",
|
||||
"dev": true,
|
||||
@ -3735,6 +3804,15 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/events": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz",
|
||||
"integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.8.x"
|
||||
}
|
||||
},
|
||||
"node_modules/expand-template": {
|
||||
"version": "2.0.3",
|
||||
"license": "(MIT OR WTFPL)",
|
||||
|
@ -33,6 +33,7 @@
|
||||
"license": "GPL-3.0",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.56.0",
|
||||
"@deepgram/sdk": "^4.9.1",
|
||||
"@google/genai": "^1.8.0",
|
||||
"@google/generative-ai": "^0.24.1",
|
||||
"axios": "^1.10.0",
|
||||
|
@ -57,6 +57,14 @@ const PROVIDERS = {
|
||||
],
|
||||
sttModels: [],
|
||||
},
|
||||
'deepgram': {
|
||||
name: 'Deepgram',
|
||||
handler: () => require("./providers/deepgram"),
|
||||
llmModels: [],
|
||||
sttModels: [
|
||||
{ id: 'nova-3', name: 'Nova-3 (General)' },
|
||||
],
|
||||
},
|
||||
'ollama': {
|
||||
name: 'Ollama (Local)',
|
||||
handler: () => require("./providers/ollama"),
|
||||
@ -148,6 +156,7 @@ function getProviderClass(providerId) {
|
||||
'openai': 'OpenAIProvider',
|
||||
'anthropic': 'AnthropicProvider',
|
||||
'gemini': 'GeminiProvider',
|
||||
'deepgram': 'DeepgramProvider',
|
||||
'ollama': 'OllamaProvider',
|
||||
'whisper': 'WhisperProvider'
|
||||
};
|
||||
|
111
src/features/common/ai/providers/deepgram.js
Normal file
111
src/features/common/ai/providers/deepgram.js
Normal file
@ -0,0 +1,111 @@
|
||||
// providers/deepgram.js
|
||||
|
||||
const { createClient, LiveTranscriptionEvents } = require('@deepgram/sdk');
|
||||
const WebSocket = require('ws');
|
||||
|
||||
/**
|
||||
* Deepgram Provider 클래스. API 키 유효성 검사를 담당합니다.
|
||||
*/
|
||||
class DeepgramProvider {
|
||||
/**
|
||||
* Deepgram API 키의 유효성을 검사합니다.
|
||||
* @param {string} key - 검사할 Deepgram API 키
|
||||
* @returns {Promise<{success: boolean, error?: string}>}
|
||||
*/
|
||||
static async validateApiKey(key) {
|
||||
if (!key || typeof key !== 'string') {
|
||||
return { success: false, error: 'Invalid Deepgram API key format.' };
|
||||
}
|
||||
try {
|
||||
// ✨ 변경점: SDK 대신 직접 fetch로 API를 호출하여 안정성 확보 (openai.js 방식)
|
||||
const response = await fetch('https://api.deepgram.com/v1/projects', {
|
||||
headers: { 'Authorization': `Token ${key}` }
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
return { success: true };
|
||||
} else {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
const message = errorData.err_msg || `Validation failed with status: ${response.status}`;
|
||||
return { success: false, error: message };
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[DeepgramProvider] Network error during key validation:`, error);
|
||||
return { success: false, error: error.message || 'A network error occurred during validation.' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function createSTT({
|
||||
apiKey,
|
||||
language = 'en-US',
|
||||
sampleRate = 24000,
|
||||
callbacks = {},
|
||||
}) {
|
||||
const qs = new URLSearchParams({
|
||||
model: 'nova-3',
|
||||
encoding: 'linear16',
|
||||
sample_rate: sampleRate.toString(),
|
||||
language,
|
||||
smart_format: 'true',
|
||||
interim_results: 'true',
|
||||
channels: '1',
|
||||
});
|
||||
|
||||
const url = `wss://api.deepgram.com/v1/listen?${qs}`;
|
||||
|
||||
const ws = new WebSocket(url, {
|
||||
headers: { Authorization: `Token ${apiKey}` },
|
||||
});
|
||||
ws.binaryType = 'arraybuffer';
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const to = setTimeout(() => {
|
||||
ws.terminate();
|
||||
reject(new Error('DG open timeout (10 s)'));
|
||||
}, 10_000);
|
||||
|
||||
ws.on('open', () => {
|
||||
clearTimeout(to);
|
||||
resolve({
|
||||
sendRealtimeInput: (buf) => ws.send(buf),
|
||||
close: () => ws.close(1000, 'client'),
|
||||
});
|
||||
});
|
||||
|
||||
ws.on('message', raw => {
|
||||
let msg;
|
||||
try { msg = JSON.parse(raw.toString()); } catch { return; }
|
||||
if (msg.channel?.alternatives?.[0]?.transcript !== undefined) {
|
||||
callbacks.onmessage?.({ provider: 'deepgram', ...msg });
|
||||
}
|
||||
});
|
||||
|
||||
ws.on('close', (code, reason) =>
|
||||
callbacks.onclose?.({ code, reason: reason.toString() })
|
||||
);
|
||||
|
||||
ws.on('error', err => {
|
||||
clearTimeout(to);
|
||||
callbacks.onerror?.(err);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ... (LLM 관련 Placeholder 함수들은 그대로 유지) ...
|
||||
function createLLM(opts) {
|
||||
console.warn("[Deepgram] LLM not supported.");
|
||||
return { generateContent: async () => { throw new Error("Deepgram does not support LLM functionality."); } };
|
||||
}
|
||||
function createStreamingLLM(opts) {
|
||||
console.warn("[Deepgram] Streaming LLM not supported.");
|
||||
return { streamChat: async () => { throw new Error("Deepgram does not support Streaming LLM functionality."); } };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
DeepgramProvider,
|
||||
createSTT,
|
||||
createLLM,
|
||||
createStreamingLLM
|
||||
};
|
@ -55,17 +55,6 @@ class SttService {
|
||||
}
|
||||
}
|
||||
|
||||
async handleSendSystemAudioContent(data, mimeType) {
|
||||
try {
|
||||
await this.sendSystemAudioContent(data, mimeType);
|
||||
this.sendToRenderer('system-audio-data', { data });
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
console.error('Error sending system audio:', error);
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
flushMyCompletion() {
|
||||
const finalText = (this.myCompletionBuffer + this.myCurrentUtterance).trim();
|
||||
if (!this.modelInfo || !finalText) return;
|
||||
@ -157,7 +146,7 @@ class SttService {
|
||||
console.log('[SttService] Ignoring message - session already closed');
|
||||
return;
|
||||
}
|
||||
console.log('[SttService] handleMyMessage', message);
|
||||
// console.log('[SttService] handleMyMessage', message);
|
||||
|
||||
if (this.modelInfo.provider === 'whisper') {
|
||||
// Whisper STT emits 'transcription' events with different structure
|
||||
@ -178,10 +167,6 @@ class SttService {
|
||||
'(NOISE)'
|
||||
];
|
||||
|
||||
|
||||
|
||||
const normalizedText = finalText.toLowerCase().trim();
|
||||
|
||||
const isNoise = noisePatterns.some(pattern =>
|
||||
finalText.includes(pattern) || finalText === pattern
|
||||
);
|
||||
@ -232,6 +217,38 @@ class SttService {
|
||||
isFinal: false,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
// Deepgram
|
||||
} else if (this.modelInfo.provider === 'deepgram') {
|
||||
const text = message.channel?.alternatives?.[0]?.transcript;
|
||||
if (!text || text.trim().length === 0) return;
|
||||
|
||||
const isFinal = message.is_final;
|
||||
console.log(`[SttService-Me-Deepgram] Received: isFinal=${isFinal}, text="${text}"`);
|
||||
|
||||
if (isFinal) {
|
||||
// 최종 결과가 도착하면, 현재 진행중인 부분 발화는 비우고
|
||||
// 최종 텍스트로 debounce를 실행합니다.
|
||||
this.myCurrentUtterance = '';
|
||||
this.debounceMyCompletion(text);
|
||||
} else {
|
||||
// 부분 결과(interim)인 경우, 화면에 실시간으로 업데이트합니다.
|
||||
if (this.myCompletionTimer) clearTimeout(this.myCompletionTimer);
|
||||
this.myCompletionTimer = null;
|
||||
|
||||
this.myCurrentUtterance = text;
|
||||
|
||||
const continuousText = (this.myCompletionBuffer + ' ' + this.myCurrentUtterance).trim();
|
||||
|
||||
this.sendToRenderer('stt-update', {
|
||||
speaker: 'Me',
|
||||
text: continuousText,
|
||||
isPartial: true,
|
||||
isFinal: false,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
} else {
|
||||
const type = message.type;
|
||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||
@ -291,9 +308,6 @@ class SttService {
|
||||
'(NOISE)'
|
||||
];
|
||||
|
||||
|
||||
const normalizedText = finalText.toLowerCase().trim();
|
||||
|
||||
const isNoise = noisePatterns.some(pattern =>
|
||||
finalText.includes(pattern) || finalText === pattern
|
||||
);
|
||||
@ -345,6 +359,34 @@ class SttService {
|
||||
isFinal: false,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
// Deepgram
|
||||
} else if (this.modelInfo.provider === 'deepgram') {
|
||||
const text = message.channel?.alternatives?.[0]?.transcript;
|
||||
if (!text || text.trim().length === 0) return;
|
||||
|
||||
const isFinal = message.is_final;
|
||||
|
||||
if (isFinal) {
|
||||
this.theirCurrentUtterance = '';
|
||||
this.debounceTheirCompletion(text);
|
||||
} else {
|
||||
if (this.theirCompletionTimer) clearTimeout(this.theirCompletionTimer);
|
||||
this.theirCompletionTimer = null;
|
||||
|
||||
this.theirCurrentUtterance = text;
|
||||
|
||||
const continuousText = (this.theirCompletionBuffer + ' ' + this.theirCurrentUtterance).trim();
|
||||
|
||||
this.sendToRenderer('stt-update', {
|
||||
speaker: 'Them',
|
||||
text: continuousText,
|
||||
isPartial: true,
|
||||
isFinal: false,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
} else {
|
||||
const type = message.type;
|
||||
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
|
||||
@ -431,10 +473,14 @@ class SttService {
|
||||
throw new Error('STT model info could not be retrieved.');
|
||||
}
|
||||
|
||||
const payload = modelInfo.provider === 'gemini'
|
||||
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
|
||||
: data;
|
||||
|
||||
let payload;
|
||||
if (modelInfo.provider === 'gemini') {
|
||||
payload = { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } };
|
||||
} else if (modelInfo.provider === 'deepgram') {
|
||||
payload = Buffer.from(data, 'base64');
|
||||
} else {
|
||||
payload = data;
|
||||
}
|
||||
await this.mySttSession.sendRealtimeInput(payload);
|
||||
}
|
||||
|
||||
@ -452,10 +498,15 @@ class SttService {
|
||||
throw new Error('STT model info could not be retrieved.');
|
||||
}
|
||||
|
||||
const payload = modelInfo.provider === 'gemini'
|
||||
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
|
||||
: data;
|
||||
|
||||
let payload;
|
||||
if (modelInfo.provider === 'gemini') {
|
||||
payload = { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } };
|
||||
} else if (modelInfo.provider === 'deepgram') {
|
||||
payload = Buffer.from(data, 'base64');
|
||||
} else {
|
||||
payload = data;
|
||||
}
|
||||
|
||||
await this.theirSttSession.sendRealtimeInput(payload);
|
||||
}
|
||||
|
||||
@ -547,9 +598,15 @@ class SttService {
|
||||
|
||||
if (this.theirSttSession) {
|
||||
try {
|
||||
const payload = modelInfo.provider === 'gemini'
|
||||
? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
|
||||
: base64Data;
|
||||
let payload;
|
||||
if (modelInfo.provider === 'gemini') {
|
||||
payload = { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } };
|
||||
} else if (modelInfo.provider === 'deepgram') {
|
||||
payload = Buffer.from(base64Data, 'base64');
|
||||
} else {
|
||||
payload = base64Data;
|
||||
}
|
||||
|
||||
await this.theirSttSession.sendRealtimeInput(payload);
|
||||
} catch (err) {
|
||||
console.error('Error sending system audio:', err.message);
|
||||
|
Loading…
x
Reference in New Issue
Block a user