From c0cf74273af99bb62150c413b21f60653c77b85b Mon Sep 17 00:00:00 2001 From: sanio Date: Tue, 15 Jul 2025 03:47:47 +0900 Subject: [PATCH] add deepgram --- package-lock.json | 78 +++++++++++++ package.json | 1 + src/features/common/ai/factory.js | 9 ++ src/features/common/ai/providers/deepgram.js | 111 ++++++++++++++++++ src/features/listen/stt/sttService.js | 117 ++++++++++++++----- 5 files changed, 286 insertions(+), 30 deletions(-) create mode 100644 src/features/common/ai/providers/deepgram.js diff --git a/package-lock.json b/package-lock.json index 7e406cf..fe4703a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "license": "GPL-3.0", "dependencies": { "@anthropic-ai/sdk": "^0.56.0", + "@deepgram/sdk": "^4.9.1", "@google/genai": "^1.8.0", "@google/generative-ai": "^0.24.1", "axios": "^1.10.0", @@ -54,6 +55,50 @@ "anthropic-ai-sdk": "bin/cli" } }, + "node_modules/@deepgram/captions": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@deepgram/captions/-/captions-1.2.0.tgz", + "integrity": "sha512-8B1C/oTxTxyHlSFubAhNRgCbQ2SQ5wwvtlByn8sDYZvdDtdn/VE2yEPZ4BvUnrKWmsbTQY6/ooLV+9Ka2qmDSQ==", + "license": "MIT", + "dependencies": { + "dayjs": "^1.11.10" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@deepgram/sdk": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/@deepgram/sdk/-/sdk-4.9.1.tgz", + "integrity": "sha512-a30Sed6OIRldnW1U0Q0Orvhjojq4O/1pMv6ijj+3j8735LBBfAJvlJpRCjrgtzBpnkKlY6v3bV5F8qUUSpz2yg==", + "license": "MIT", + "dependencies": { + "@deepgram/captions": "^1.1.1", + "@types/node": "^18.19.39", + "cross-fetch": "^3.1.5", + "deepmerge": "^4.3.1", + "events": "^3.3.0", + "ws": "^8.17.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@deepgram/sdk/node_modules/@types/node": { + "version": "18.19.118", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.118.tgz", + "integrity": "sha512-hIPK0hSrrcaoAu/gJMzN3QClXE4QdCdFvaenJ0JsjIbExP1JFFVH+RHcBt25c9n8bx5dkIfqKE+uw6BmBns7ug==", + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@deepgram/sdk/node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "license": "MIT" + }, "node_modules/@develar/schema-utils": { "version": "2.6.5", "dev": true, @@ -2992,6 +3037,15 @@ "optional": true, "peer": true }, + "node_modules/cross-fetch": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz", + "integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==", + "license": "MIT", + "dependencies": { + "node-fetch": "^2.7.0" + } + }, "node_modules/cross-spawn": { "version": "7.0.6", "dev": true, @@ -3020,6 +3074,12 @@ "node": ">=6" } }, + "node_modules/dayjs": { + "version": "1.11.13", + "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.13.tgz", + "integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==", + "license": "MIT" + }, "node_modules/debounce-fn": { "version": "4.0.0", "license": "MIT", @@ -3078,6 +3138,15 @@ "node": ">=4.0.0" } }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/defaults": { "version": "1.0.4", "dev": true, @@ -3735,6 +3804,15 @@ "node": ">=6" } }, + "node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "license": "MIT", + "engines": { + "node": ">=0.8.x" + } + }, "node_modules/expand-template": { "version": "2.0.3", "license": "(MIT OR WTFPL)", diff --git a/package.json b/package.json index 4ed8d95..255f90b 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "license": "GPL-3.0", "dependencies": { "@anthropic-ai/sdk": "^0.56.0", + "@deepgram/sdk": "^4.9.1", "@google/genai": "^1.8.0", "@google/generative-ai": "^0.24.1", "axios": "^1.10.0", diff --git a/src/features/common/ai/factory.js b/src/features/common/ai/factory.js index 8ccc5ec..419cfdf 100644 --- a/src/features/common/ai/factory.js +++ b/src/features/common/ai/factory.js @@ -57,6 +57,14 @@ const PROVIDERS = { ], sttModels: [], }, + 'deepgram': { + name: 'Deepgram', + handler: () => require("./providers/deepgram"), + llmModels: [], + sttModels: [ + { id: 'nova-3', name: 'Nova-3 (General)' }, + ], + }, 'ollama': { name: 'Ollama (Local)', handler: () => require("./providers/ollama"), @@ -148,6 +156,7 @@ function getProviderClass(providerId) { 'openai': 'OpenAIProvider', 'anthropic': 'AnthropicProvider', 'gemini': 'GeminiProvider', + 'deepgram': 'DeepgramProvider', 'ollama': 'OllamaProvider', 'whisper': 'WhisperProvider' }; diff --git a/src/features/common/ai/providers/deepgram.js b/src/features/common/ai/providers/deepgram.js new file mode 100644 index 0000000..c365396 --- /dev/null +++ b/src/features/common/ai/providers/deepgram.js @@ -0,0 +1,111 @@ +// providers/deepgram.js + +const { createClient, LiveTranscriptionEvents } = require('@deepgram/sdk'); +const WebSocket = require('ws'); + +/** + * Deepgram Provider 클래스. API 키 유효성 검사를 담당합니다. + */ +class DeepgramProvider { + /** + * Deepgram API 키의 유효성을 검사합니다. + * @param {string} key - 검사할 Deepgram API 키 + * @returns {Promise<{success: boolean, error?: string}>} + */ + static async validateApiKey(key) { + if (!key || typeof key !== 'string') { + return { success: false, error: 'Invalid Deepgram API key format.' }; + } + try { + // ✨ 변경점: SDK 대신 직접 fetch로 API를 호출하여 안정성 확보 (openai.js 방식) + const response = await fetch('https://api.deepgram.com/v1/projects', { + headers: { 'Authorization': `Token ${key}` } + }); + + if (response.ok) { + return { success: true }; + } else { + const errorData = await response.json().catch(() => ({})); + const message = errorData.err_msg || `Validation failed with status: ${response.status}`; + return { success: false, error: message }; + } + } catch (error) { + console.error(`[DeepgramProvider] Network error during key validation:`, error); + return { success: false, error: error.message || 'A network error occurred during validation.' }; + } + } +} + +function createSTT({ + apiKey, + language = 'en-US', + sampleRate = 24000, + callbacks = {}, + }) { + const qs = new URLSearchParams({ + model: 'nova-3', + encoding: 'linear16', + sample_rate: sampleRate.toString(), + language, + smart_format: 'true', + interim_results: 'true', + channels: '1', + }); + + const url = `wss://api.deepgram.com/v1/listen?${qs}`; + + const ws = new WebSocket(url, { + headers: { Authorization: `Token ${apiKey}` }, + }); + ws.binaryType = 'arraybuffer'; + + return new Promise((resolve, reject) => { + const to = setTimeout(() => { + ws.terminate(); + reject(new Error('DG open timeout (10 s)')); + }, 10_000); + + ws.on('open', () => { + clearTimeout(to); + resolve({ + sendRealtimeInput: (buf) => ws.send(buf), + close: () => ws.close(1000, 'client'), + }); + }); + + ws.on('message', raw => { + let msg; + try { msg = JSON.parse(raw.toString()); } catch { return; } + if (msg.channel?.alternatives?.[0]?.transcript !== undefined) { + callbacks.onmessage?.({ provider: 'deepgram', ...msg }); + } + }); + + ws.on('close', (code, reason) => + callbacks.onclose?.({ code, reason: reason.toString() }) + ); + + ws.on('error', err => { + clearTimeout(to); + callbacks.onerror?.(err); + reject(err); + }); + }); + } + +// ... (LLM 관련 Placeholder 함수들은 그대로 유지) ... +function createLLM(opts) { + console.warn("[Deepgram] LLM not supported."); + return { generateContent: async () => { throw new Error("Deepgram does not support LLM functionality."); } }; +} +function createStreamingLLM(opts) { + console.warn("[Deepgram] Streaming LLM not supported."); + return { streamChat: async () => { throw new Error("Deepgram does not support Streaming LLM functionality."); } }; +} + +module.exports = { + DeepgramProvider, + createSTT, + createLLM, + createStreamingLLM +}; \ No newline at end of file diff --git a/src/features/listen/stt/sttService.js b/src/features/listen/stt/sttService.js index 9fd9e87..8acf767 100644 --- a/src/features/listen/stt/sttService.js +++ b/src/features/listen/stt/sttService.js @@ -55,17 +55,6 @@ class SttService { } } - async handleSendSystemAudioContent(data, mimeType) { - try { - await this.sendSystemAudioContent(data, mimeType); - this.sendToRenderer('system-audio-data', { data }); - return { success: true }; - } catch (error) { - console.error('Error sending system audio:', error); - return { success: false, error: error.message }; - } - } - flushMyCompletion() { const finalText = (this.myCompletionBuffer + this.myCurrentUtterance).trim(); if (!this.modelInfo || !finalText) return; @@ -157,7 +146,7 @@ class SttService { console.log('[SttService] Ignoring message - session already closed'); return; } - console.log('[SttService] handleMyMessage', message); + // console.log('[SttService] handleMyMessage', message); if (this.modelInfo.provider === 'whisper') { // Whisper STT emits 'transcription' events with different structure @@ -178,10 +167,6 @@ class SttService { '(NOISE)' ]; - - - const normalizedText = finalText.toLowerCase().trim(); - const isNoise = noisePatterns.some(pattern => finalText.includes(pattern) || finalText === pattern ); @@ -232,6 +217,38 @@ class SttService { isFinal: false, timestamp: Date.now(), }); + + // Deepgram + } else if (this.modelInfo.provider === 'deepgram') { + const text = message.channel?.alternatives?.[0]?.transcript; + if (!text || text.trim().length === 0) return; + + const isFinal = message.is_final; + console.log(`[SttService-Me-Deepgram] Received: isFinal=${isFinal}, text="${text}"`); + + if (isFinal) { + // 최종 결과가 도착하면, 현재 진행중인 부분 발화는 비우고 + // 최종 텍스트로 debounce를 실행합니다. + this.myCurrentUtterance = ''; + this.debounceMyCompletion(text); + } else { + // 부분 결과(interim)인 경우, 화면에 실시간으로 업데이트합니다. + if (this.myCompletionTimer) clearTimeout(this.myCompletionTimer); + this.myCompletionTimer = null; + + this.myCurrentUtterance = text; + + const continuousText = (this.myCompletionBuffer + ' ' + this.myCurrentUtterance).trim(); + + this.sendToRenderer('stt-update', { + speaker: 'Me', + text: continuousText, + isPartial: true, + isFinal: false, + timestamp: Date.now(), + }); + } + } else { const type = message.type; const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; @@ -291,9 +308,6 @@ class SttService { '(NOISE)' ]; - - const normalizedText = finalText.toLowerCase().trim(); - const isNoise = noisePatterns.some(pattern => finalText.includes(pattern) || finalText === pattern ); @@ -345,6 +359,34 @@ class SttService { isFinal: false, timestamp: Date.now(), }); + + // Deepgram + } else if (this.modelInfo.provider === 'deepgram') { + const text = message.channel?.alternatives?.[0]?.transcript; + if (!text || text.trim().length === 0) return; + + const isFinal = message.is_final; + + if (isFinal) { + this.theirCurrentUtterance = ''; + this.debounceTheirCompletion(text); + } else { + if (this.theirCompletionTimer) clearTimeout(this.theirCompletionTimer); + this.theirCompletionTimer = null; + + this.theirCurrentUtterance = text; + + const continuousText = (this.theirCompletionBuffer + ' ' + this.theirCurrentUtterance).trim(); + + this.sendToRenderer('stt-update', { + speaker: 'Them', + text: continuousText, + isPartial: true, + isFinal: false, + timestamp: Date.now(), + }); + } + } else { const type = message.type; const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || ''; @@ -431,10 +473,14 @@ class SttService { throw new Error('STT model info could not be retrieved.'); } - const payload = modelInfo.provider === 'gemini' - ? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } } - : data; - + let payload; + if (modelInfo.provider === 'gemini') { + payload = { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }; + } else if (modelInfo.provider === 'deepgram') { + payload = Buffer.from(data, 'base64'); + } else { + payload = data; + } await this.mySttSession.sendRealtimeInput(payload); } @@ -452,10 +498,15 @@ class SttService { throw new Error('STT model info could not be retrieved.'); } - const payload = modelInfo.provider === 'gemini' - ? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } } - : data; - + let payload; + if (modelInfo.provider === 'gemini') { + payload = { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }; + } else if (modelInfo.provider === 'deepgram') { + payload = Buffer.from(data, 'base64'); + } else { + payload = data; + } + await this.theirSttSession.sendRealtimeInput(payload); } @@ -547,9 +598,15 @@ class SttService { if (this.theirSttSession) { try { - const payload = modelInfo.provider === 'gemini' - ? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } } - : base64Data; + let payload; + if (modelInfo.provider === 'gemini') { + payload = { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }; + } else if (modelInfo.provider === 'deepgram') { + payload = Buffer.from(base64Data, 'base64'); + } else { + payload = base64Data; + } + await this.theirSttSession.sendRealtimeInput(payload); } catch (err) { console.error('Error sending system audio:', err.message);