Merge pull request #51 from pickle-com/pr-37

Pr 37 (enable gemini STT)
This commit is contained in:
sanio 2025-07-05 22:55:21 +09:00 committed by GitHub
commit 552a6bebcd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 1204 additions and 845 deletions

View File

@ -29,6 +29,8 @@
},
"license": "GPL-3.0",
"dependencies": {
"@google/genai": "^1.8.0",
"@google/generative-ai": "^0.24.1",
"axios": "^1.10.0",
"better-sqlite3": "^9.4.3",
"cors": "^2.8.5",

View File

@ -5,6 +5,7 @@ export class ApiKeyHeader extends LitElement {
apiKey: { type: String },
isLoading: { type: Boolean },
errorMessage: { type: String },
selectedProvider: { type: String },
};
static styles = css`
@ -45,11 +46,11 @@ export class ApiKeyHeader extends LitElement {
.container {
width: 285px;
height: 220px;
min-height: 260px;
padding: 18px 20px;
background: rgba(0, 0, 0, 0.3);
border-radius: 16px;
overflow: hidden;
overflow: visible;
position: relative;
display: flex;
flex-direction: column;
@ -108,7 +109,7 @@ export class ApiKeyHeader extends LitElement {
font-weight: 500; /* Medium */
margin: 0;
text-align: center;
flex-shrink: 0; /* 제목이 줄어들지 않도록 고정 */
flex-shrink: 0;
}
.form-content {
@ -116,14 +117,14 @@ export class ApiKeyHeader extends LitElement {
flex-direction: column;
align-items: center;
width: 100%;
margin-top: auto; /* 이 속성이 제목과 폼 사이의 공간을 만듭니다. */
margin-top: auto;
}
.error-message {
color: rgba(239, 68, 68, 0.9);
font-weight: 500;
font-size: 11px;
height: 14px; /* Reserve space to prevent layout shift */
height: 14px;
text-align: center;
margin-bottom: 4px;
}
@ -152,6 +153,46 @@ export class ApiKeyHeader extends LitElement {
outline: none;
}
.provider-select {
width: 100%;
height: 34px;
background: rgba(255, 255, 255, 0.1);
border-radius: 10px;
border: 1px solid rgba(255, 255, 255, 0.2);
padding: 0 10px;
color: white;
font-size: 12px;
font-weight: 400;
margin-bottom: 6px;
text-align: center;
cursor: pointer;
-webkit-appearance: none;
-moz-appearance: none;
appearance: none;
background-image: url('data:image/svg+xml;charset=US-ASCII,%3Csvg%20width%3D%2714%27%20height%3D%278%27%20viewBox%3D%270%200%2014%208%27%20xmlns%3D%27http%3A//www.w3.org/2000/svg%27%3E%3Cpath%20d%3D%27M1%201l6%206%206-6%27%20stroke%3D%27%23ffffff%27%20stroke-width%3D%271.5%27%20fill%3D%27none%27%20fill-rule%3D%27evenodd%27/%3E%3C/svg%3E');
background-repeat: no-repeat;
background-position: right 10px center;
background-size: 12px;
padding-right: 30px;
}
.provider-select:hover {
background-color: rgba(255, 255, 255, 0.15);
border-color: rgba(255, 255, 255, 0.3);
}
.provider-select:focus {
outline: none;
background-color: rgba(255, 255, 255, 0.15);
border-color: rgba(255, 255, 255, 0.4);
}
.provider-select option {
background: #1a1a1a;
color: white;
padding: 5px;
}
.action-button {
width: 100%;
height: 34px;
@ -164,7 +205,7 @@ export class ApiKeyHeader extends LitElement {
cursor: pointer;
transition: background 0.15s ease;
position: relative;
overflow: hidden;
overflow: visible;
}
.action-button::after {
@ -198,6 +239,15 @@ export class ApiKeyHeader extends LitElement {
font-weight: 500; /* Medium */
margin: 10px 0;
}
.provider-label {
color: rgba(255, 255, 255, 0.7);
font-size: 11px;
font-weight: 400;
margin-bottom: 4px;
width: 100%;
text-align: left;
}
`;
constructor() {
@ -208,6 +258,7 @@ export class ApiKeyHeader extends LitElement {
this.isLoading = false;
this.errorMessage = '';
this.validatedApiKey = null;
this.selectedProvider = 'openai';
this.handleMouseMove = this.handleMouseMove.bind(this);
this.handleMouseUp = this.handleMouseUp.bind(this);
@ -216,6 +267,8 @@ export class ApiKeyHeader extends LitElement {
this.handleInput = this.handleInput.bind(this);
this.handleAnimationEnd = this.handleAnimationEnd.bind(this);
this.handleUsePicklesKey = this.handleUsePicklesKey.bind(this);
this.handleProviderChange = this.handleProviderChange.bind(this);
this.checkAndRequestPermissions = this.checkAndRequestPermissions.bind(this);
}
reset() {
@ -223,11 +276,12 @@ export class ApiKeyHeader extends LitElement {
this.isLoading = false;
this.errorMessage = '';
this.validatedApiKey = null;
this.selectedProvider = 'openai';
this.requestUpdate();
}
async handleMouseDown(e) {
if (e.target.tagName === 'INPUT' || e.target.tagName === 'BUTTON') {
if (e.target.tagName === 'INPUT' || e.target.tagName === 'BUTTON' || e.target.tagName === 'SELECT') {
return;
}
@ -295,6 +349,13 @@ export class ApiKeyHeader extends LitElement {
});
}
handleProviderChange(e) {
this.selectedProvider = e.target.value;
this.errorMessage = '';
console.log('Provider changed to:', this.selectedProvider);
this.requestUpdate();
}
handlePaste(e) {
e.preventDefault();
this.errorMessage = '';
@ -343,17 +404,17 @@ export class ApiKeyHeader extends LitElement {
const apiKey = this.apiKey.trim();
let isValid = false;
try {
const isValid = await this.validateApiKey(this.apiKey.trim());
const isValid = await this.validateApiKey(this.apiKey.trim(), this.selectedProvider);
if (isValid) {
console.log('API key valid - checking system permissions...');
console.log('API key valid checking system permissions…');
const permissionResult = await this.checkAndRequestPermissions();
if (permissionResult.success) {
console.log('All permissions granted - starting slide out animation');
console.log('All permissions granted starting slide-out animation');
this.startSlideOutAnimation();
this.validatedApiKey = this.apiKey.trim();
this.validatedProvider = this.selectedProvider;
} else {
this.errorMessage = permissionResult.error || 'Permission setup required';
console.log('Permission setup incomplete:', permissionResult);
@ -371,46 +432,73 @@ export class ApiKeyHeader extends LitElement {
}
}
async validateApiKey(apiKey) {
async validateApiKey(apiKey, provider = 'openai') {
if (!apiKey || apiKey.length < 15) return false;
if (!apiKey.match(/^[A-Za-z0-9_-]+$/)) return false;
try {
console.log('Validating API key with openai models endpoint...');
if (provider === 'openai') {
if (!apiKey.match(/^[A-Za-z0-9_-]+$/)) return false;
const response = await fetch('https://api.openai.com/v1/models', {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
});
try {
console.log('Validating OpenAI API key...');
if (response.ok) {
const data = await response.json();
const response = await fetch('https://api.openai.com/v1/models', {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
});
const hasGPTModels = data.data && data.data.some(m => m.id.startsWith('gpt-'));
if (hasGPTModels) {
console.log('API key validation successful - GPT models available');
return true;
if (response.ok) {
const data = await response.json();
const hasGPTModels = data.data && data.data.some(m => m.id.startsWith('gpt-'));
if (hasGPTModels) {
console.log('OpenAI API key validation successful');
return true;
} else {
console.log('API key valid but no GPT models available');
return false;
}
} else {
console.log('API key valid but no GPT models available');
const errorData = await response.json().catch(() => ({}));
console.log('API key validation failed:', response.status, errorData.error?.message || 'Unknown error');
return false;
}
} else {
const errorData = await response.json().catch(() => ({}));
console.log('API key validation failed:', response.status, errorData.error?.message || 'Unknown error');
return false;
} catch (error) {
console.error('API key validation network error:', error);
return apiKey.length >= 20; // Fallback for network issues
}
} else if (provider === 'gemini') {
// Gemini API keys typically start with 'AIza'
if (!apiKey.match(/^[A-Za-z0-9_-]+$/)) return false;
try {
console.log('Validating Gemini API key...');
// Test the API key with a simple models list request
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`);
if (response.ok) {
const data = await response.json();
if (data.models && data.models.length > 0) {
console.log('Gemini API key validation successful');
return true;
}
}
console.log('Gemini API key validation failed');
return false;
} catch (error) {
console.error('Gemini API key validation network error:', error);
return apiKey.length >= 20; // Fallback
}
} catch (error) {
console.error('API key validation network error:', error);
return apiKey.length >= 20; // Fallback for network issues
}
return false;
}
async checkAndRequestPermissions() {
if (!window.require) {
return { success: true };
}
if (!window.require) return { success: true };
const { ipcRenderer } = window.require('electron');
@ -418,44 +506,33 @@ export class ApiKeyHeader extends LitElement {
const permissions = await ipcRenderer.invoke('check-system-permissions');
console.log('[Permissions] Current status:', permissions);
if (!permissions.needsSetup) {
return { success: true };
}
if (!permissions.needsSetup) return { success: true };
if (!permissions.microphone) {
console.log('[Permissions] Requesting microphone permission...');
console.log('[Permissions] Requesting microphone permission');
const micResult = await ipcRenderer.invoke('request-microphone-permission');
if (!micResult.success) {
console.log('[Permissions] Microphone permission denied');
await ipcRenderer.invoke('open-system-preferences', 'microphone');
return {
success: false,
error: 'Please grant microphone access in System Preferences'
error: 'Please grant microphone access in System Preferences',
};
}
}
if (!permissions.screen) {
console.log('[Permissions] Screen recording permission needed');
console.log('[Permissions] Screen-recording permission needed');
await ipcRenderer.invoke('open-system-preferences', 'screen-recording');
this.errorMessage = 'Please grant screen recording permission and try again';
this.requestUpdate();
return {
success: false,
error: 'Please grant screen recording access in System Preferences'
error: 'Please grant screen recording access in System Preferences',
};
}
return { success: true };
} catch (error) {
console.error('[Permissions] Error checking/requesting permissions:', error);
return {
success: false,
error: 'Failed to check permissions'
};
} catch (err) {
console.error('[Permissions] Error checking/requesting permissions:', err);
return { success: false, error: 'Failed to check permissions' };
}
}
@ -489,9 +566,13 @@ export class ApiKeyHeader extends LitElement {
if (this.validatedApiKey) {
if (window.require) {
window.require('electron').ipcRenderer.invoke('api-key-validated', this.validatedApiKey);
window.require('electron').ipcRenderer.invoke('api-key-validated', {
apiKey: this.validatedApiKey,
provider: this.validatedProvider || 'openai'
});
}
this.validatedApiKey = null;
this.validatedProvider = null;
}
}
}
@ -510,6 +591,7 @@ export class ApiKeyHeader extends LitElement {
render() {
const isButtonDisabled = this.isLoading || !this.apiKey || !this.apiKey.trim();
console.log('Rendering with provider:', this.selectedProvider);
return html`
<div class="container" @mousedown=${this.handleMouseDown}>
@ -522,10 +604,21 @@ export class ApiKeyHeader extends LitElement {
<div class="form-content">
<div class="error-message">${this.errorMessage}</div>
<div class="provider-label">Select AI Provider:</div>
<select
class="provider-select"
.value=${this.selectedProvider || 'openai'}
@change=${this.handleProviderChange}
?disabled=${this.isLoading}
tabindex="0"
>
<option value="openai" ?selected=${this.selectedProvider === 'openai'}>OpenAI</option>
<option value="gemini" ?selected=${this.selectedProvider === 'gemini'}>Google Gemini</option>
</select>
<input
type="password"
class="api-input"
placeholder="Enter your OpenAI API key"
placeholder=${this.selectedProvider === 'openai' ? "Enter your OpenAI API key" : "Enter your Gemini API key"}
.value=${this.apiKey || ''}
@input=${this.handleInput}
@keypress=${this.handleKeyPress}

View File

@ -113,6 +113,7 @@ class HeaderTransitionManager {
ipcRenderer.on('request-firebase-logout', async () => {
console.log('[HeaderController] Received request to sign out.');
try {
this.hasApiKey = false;
await signOut(auth);
} catch (error) {
console.error('[HeaderController] Sign out failed', error);
@ -316,7 +317,7 @@ class HeaderTransitionManager {
if (!window.require) return;
return window
.require('electron')
.ipcRenderer.invoke('resize-header-window', { width: 285, height: 220 })
.ipcRenderer.invoke('resize-header-window', { width: 285, height: 300 })
.catch(() => {});
}

View File

@ -5,7 +5,8 @@ const LATEST_SCHEMA = {
{ name: 'display_name', type: 'TEXT NOT NULL' },
{ name: 'email', type: 'TEXT NOT NULL' },
{ name: 'created_at', type: 'INTEGER' },
{ name: 'api_key', type: 'TEXT' }
{ name: 'api_key', type: 'TEXT' },
{ name: 'provider', type: 'TEXT DEFAULT \'openai\'' }
]
},
sessions: {

View File

@ -0,0 +1,377 @@
const { createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('./openAiClient.js');
const { createGeminiClient, getGeminiGenerativeModel, createGeminiChat } = require('./googleGeminiClient.js');
/**
* Creates an AI client based on the provider
* @param {string} apiKey - The API key
* @param {string} provider - The provider ('openai' or 'gemini')
* @returns {object} The AI client
*/
function createAIClient(apiKey, provider = 'openai') {
switch (provider) {
case 'openai':
return createOpenAiGenerativeClient(apiKey);
case 'gemini':
return createGeminiClient(apiKey);
default:
throw new Error(`Unsupported AI provider: ${provider}`);
}
}
/**
* Gets a generative model based on the provider
* @param {object} client - The AI client
* @param {string} provider - The provider ('openai' or 'gemini')
* @param {string} model - The model name (optional)
* @returns {object} The model object
*/
function getGenerativeModel(client, provider = 'openai', model) {
switch (provider) {
case 'openai':
return getOpenAiGenerativeModel(client, model || 'gpt-4.1');
case 'gemini':
return getGeminiGenerativeModel(client, model || 'gemini-2.5-flash');
default:
throw new Error(`Unsupported AI provider: ${provider}`);
}
}
/**
* Makes a chat completion request based on the provider
* @param {object} params - Request parameters
* @returns {Promise<object>} The completion response
*/
async function makeChatCompletion({ apiKey, provider = 'openai', messages, temperature = 0.7, maxTokens = 1024, model, stream = false }) {
if (provider === 'openai') {
const fetchUrl = 'https://api.openai.com/v1/chat/completions';
const response = await fetch(fetchUrl, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model || 'gpt-4.1',
messages,
temperature,
max_tokens: maxTokens,
stream,
}),
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
}
if (stream) {
return response;
}
const result = await response.json();
return {
content: result.choices[0].message.content.trim(),
raw: result
};
} else if (provider === 'gemini') {
const client = createGeminiClient(apiKey);
const genModel = getGeminiGenerativeModel(client, model || 'gemini-2.5-flash');
// Convert OpenAI format messages to Gemini format
const parts = [];
for (const message of messages) {
if (message.role === 'system') {
parts.push(message.content);
} else if (message.role === 'user') {
if (typeof message.content === 'string') {
parts.push(message.content);
} else if (Array.isArray(message.content)) {
// Handle multimodal content
for (const part of message.content) {
if (part.type === 'text') {
parts.push(part.text);
} else if (part.type === 'image_url' && part.image_url?.url) {
// Extract base64 data from data URL
const base64Match = part.image_url.url.match(/^data:(.+);base64,(.+)$/);
if (base64Match) {
parts.push({
inlineData: {
mimeType: base64Match[1],
data: base64Match[2]
}
});
}
}
}
}
}
}
const result = await genModel.generateContent(parts);
return {
content: result.response.text(),
raw: result
};
} else {
throw new Error(`Unsupported AI provider: ${provider}`);
}
}
/**
* Makes a chat completion request with Portkey support
* @param {object} params - Request parameters including Portkey options
* @returns {Promise<object>} The completion response
*/
async function makeChatCompletionWithPortkey({
apiKey,
provider = 'openai',
messages,
temperature = 0.7,
maxTokens = 1024,
model,
usePortkey = false,
portkeyVirtualKey = null
}) {
if (!usePortkey) {
return makeChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
}
// Portkey is only supported for OpenAI currently
if (provider !== 'openai') {
console.warn('Portkey is only supported for OpenAI provider, falling back to direct API');
return makeChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
}
const fetchUrl = 'https://api.portkey.ai/v1/chat/completions';
const response = await fetch(fetchUrl, {
method: 'POST',
headers: {
'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
'x-portkey-virtual-key': portkeyVirtualKey || apiKey,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model || 'gpt-4.1',
messages,
temperature,
max_tokens: maxTokens,
}),
});
if (!response.ok) {
throw new Error(`Portkey API error: ${response.status} ${response.statusText}`);
}
const result = await response.json();
return {
content: result.choices[0].message.content.trim(),
raw: result
};
}
/**
* Makes a streaming chat completion request
* @param {object} params - Request parameters
* @returns {Promise<Response>} The streaming response
*/
async function makeStreamingChatCompletion({ apiKey, provider = 'openai', messages, temperature = 0.7, maxTokens = 1024, model }) {
if (provider === 'openai') {
const fetchUrl = 'https://api.openai.com/v1/chat/completions';
const response = await fetch(fetchUrl, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model || 'gpt-4.1',
messages,
temperature,
max_tokens: maxTokens,
stream: true,
}),
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
}
return response;
} else if (provider === 'gemini') {
console.log('[AIProviderService] Starting Gemini streaming request');
// Gemini streaming requires a different approach
// We'll create a ReadableStream that mimics OpenAI's SSE format
const geminiClient = createGeminiClient(apiKey);
// Extract system instruction if present
let systemInstruction = '';
const nonSystemMessages = [];
for (const msg of messages) {
if (msg.role === 'system') {
systemInstruction = msg.content;
} else {
nonSystemMessages.push(msg);
}
}
const chat = createGeminiChat(geminiClient, model || 'gemini-2.0-flash-exp', {
temperature,
maxOutputTokens: maxTokens || 8192,
systemInstruction: systemInstruction || undefined
});
// Create a ReadableStream to handle Gemini's streaming
const stream = new ReadableStream({
async start(controller) {
try {
console.log('[AIProviderService] Processing messages for Gemini:', nonSystemMessages.length, 'messages (excluding system)');
// Get the last user message
const lastMessage = nonSystemMessages[nonSystemMessages.length - 1];
let lastUserMessage = lastMessage.content;
// Handle case where content might be an array (multimodal)
if (Array.isArray(lastUserMessage)) {
// Extract text content from array
const textParts = lastUserMessage.filter(part =>
typeof part === 'string' || (part && part.type === 'text')
);
lastUserMessage = textParts.map(part =>
typeof part === 'string' ? part : part.text
).join(' ');
}
console.log('[AIProviderService] Sending message to Gemini:',
typeof lastUserMessage === 'string' ? lastUserMessage.substring(0, 100) + '...' : 'multimodal content');
// Prepare the message content for Gemini
let geminiContent = [];
// Handle multimodal content properly
if (Array.isArray(lastMessage.content)) {
for (const part of lastMessage.content) {
if (typeof part === 'string') {
geminiContent.push(part);
} else if (part.type === 'text') {
geminiContent.push(part.text);
} else if (part.type === 'image_url' && part.image_url) {
// Convert base64 image to Gemini format
const base64Data = part.image_url.url.split(',')[1];
geminiContent.push({
inlineData: {
mimeType: 'image/png',
data: base64Data
}
});
}
}
} else {
geminiContent = [lastUserMessage];
}
console.log('[AIProviderService] Prepared Gemini content:',
geminiContent.length, 'parts');
// Stream the response
let chunkCount = 0;
let totalContent = '';
for await (const chunk of chat.sendMessageStream(geminiContent)) {
chunkCount++;
const chunkText = chunk.text || '';
totalContent += chunkText;
// Format as SSE data
const data = JSON.stringify({
choices: [{
delta: {
content: chunkText
}
}]
});
controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`));
}
console.log(`[AIProviderService] Streamed ${chunkCount} chunks, total length: ${totalContent.length} chars`);
// Send the final done message
controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
controller.close();
console.log('[AIProviderService] Gemini streaming completed successfully');
} catch (error) {
console.error('[AIProviderService] Gemini streaming error:', error);
controller.error(error);
}
}
});
// Create a Response object with the stream
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
}
});
} else {
throw new Error(`Unsupported AI provider: ${provider}`);
}
}
/**
* Makes a streaming chat completion request with Portkey support
* @param {object} params - Request parameters
* @returns {Promise<Response>} The streaming response
*/
async function makeStreamingChatCompletionWithPortkey({
apiKey,
provider = 'openai',
messages,
temperature = 0.7,
maxTokens = 1024,
model,
usePortkey = false,
portkeyVirtualKey = null
}) {
if (!usePortkey) {
return makeStreamingChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
}
// Portkey is only supported for OpenAI currently
if (provider !== 'openai') {
console.warn('Portkey is only supported for OpenAI provider, falling back to direct API');
return makeStreamingChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
}
const fetchUrl = 'https://api.portkey.ai/v1/chat/completions';
const response = await fetch(fetchUrl, {
method: 'POST',
headers: {
'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
'x-portkey-virtual-key': portkeyVirtualKey || apiKey,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model || 'gpt-4.1',
messages,
temperature,
max_tokens: maxTokens,
stream: true,
}),
});
if (!response.ok) {
throw new Error(`Portkey API error: ${response.status} ${response.statusText}`);
}
return response;
}
module.exports = {
createAIClient,
getGenerativeModel,
makeChatCompletion,
makeChatCompletionWithPortkey,
makeStreamingChatCompletion,
makeStreamingChatCompletionWithPortkey
};

View File

@ -0,0 +1,171 @@
const { GoogleGenerativeAI } = require('@google/generative-ai');
const { GoogleGenAI } = require('@google/genai');
/**
* Creates and returns a Google Gemini client instance for generative AI.
* @param {string} apiKey - The API key for authentication.
* @returns {GoogleGenerativeAI} The initialized Gemini client.
*/
function createGeminiClient(apiKey) {
return new GoogleGenerativeAI(apiKey);
}
/**
* Gets a Gemini model for text/image generation.
* @param {GoogleGenerativeAI} client - The Gemini client instance.
* @param {string} [model='gemini-2.5-flash'] - The name for the text/vision model.
* @returns {object} Model object with generateContent method
*/
function getGeminiGenerativeModel(client, model = 'gemini-2.5-flash') {
const genAI = client;
const geminiModel = genAI.getGenerativeModel({ model: model });
return {
generateContent: async (parts) => {
let systemPrompt = '';
let userContent = [];
for (const part of parts) {
if (typeof part === 'string') {
if (systemPrompt === '' && part.includes('You are')) {
systemPrompt = part;
} else {
userContent.push(part);
}
} else if (part.inlineData) {
// Convert base64 image data to Gemini format
userContent.push({
inlineData: {
mimeType: part.inlineData.mimeType,
data: part.inlineData.data
}
});
}
}
// Prepare content array
const content = [];
// Add system instruction if present
if (systemPrompt) {
// For Gemini, we'll prepend system prompt to user content
content.push(systemPrompt + '\n\n' + userContent[0]);
content.push(...userContent.slice(1));
} else {
content.push(...userContent);
}
try {
const result = await geminiModel.generateContent(content);
const response = await result.response;
return {
response: {
text: () => response.text()
}
};
} catch (error) {
console.error('Gemini API error:', error);
throw error;
}
}
};
}
/**
* Creates a Gemini chat session for multi-turn conversations.
* @param {GoogleGenerativeAI} client - The Gemini client instance.
* @param {string} [model='gemini-2.5-flash'] - The model to use.
* @param {object} [config={}] - Configuration options.
* @returns {object} Chat session object
*/
function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) {
const genAI = client;
const geminiModel = genAI.getGenerativeModel({
model: model,
systemInstruction: config.systemInstruction
});
const chat = geminiModel.startChat({
history: config.history || [],
generationConfig: {
temperature: config.temperature || 0.7,
maxOutputTokens: config.maxOutputTokens || 8192,
}
});
return {
sendMessage: async (message) => {
const result = await chat.sendMessage(message);
const response = await result.response;
return {
text: response.text()
};
},
sendMessageStream: async function* (message) {
const result = await chat.sendMessageStream(message);
for await (const chunk of result.stream) {
yield {
text: chunk.text()
};
}
},
getHistory: () => chat.getHistory()
};
}
// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
// const liveClient = new GoogleGenAI({
// vertexai: false, // Vertex AI 사용 안함
// apiKey,
// });
// // 라이브 STT 세션 열기
// const session = await liveClient.live.connect({
// model: 'gemini-live-2.5-flash-preview',
// callbacks,
// config: {
// inputAudioTranscription: {}, // 실시간 STT 필수
// speechConfig: { languageCode: language },
// },
// });
// return {
// sendRealtimeInput: async data => session.send({
// audio: { data, mimeType: 'audio/pcm;rate=24000' }
// }),
// close: async () => session.close(),
// };
// }
async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
// ① 옛날 스타일 helper 재사용
const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
// ② 언어 코드 강제 BCP-47 변환
const lang = language.includes('-') ? language : `${language}-US`;
const session = await liveClient.live.connect({
model: 'gemini-live-2.5-flash-preview',
callbacks,
config: {
inputAudioTranscription: {},
speechConfig: { languageCode: lang },
},
});
// ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름
return {
sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
close: async () => session.close(),
};
}
module.exports = {
createGeminiClient,
getGeminiGenerativeModel,
createGeminiChat,
connectToGeminiSession,
};

View File

@ -226,17 +226,17 @@ class SQLiteClient {
});
}
async saveApiKey(apiKey, uid = this.defaultUserId) {
async saveApiKey(apiKey, uid = this.defaultUserId, provider = 'openai') {
return new Promise((resolve, reject) => {
this.db.run(
'UPDATE users SET api_key = ? WHERE uid = ?',
[apiKey, uid],
'UPDATE users SET api_key = ?, provider = ? WHERE uid = ?',
[apiKey, provider, uid],
function(err) {
if (err) {
console.error('SQLite: Failed to save API key:', err);
reject(err);
} else {
console.log(`SQLite: API key saved for user ${uid}.`);
console.log(`SQLite: API key saved for user ${uid} with provider ${provider}.`);
resolve({ changes: this.changes });
}
}

File diff suppressed because it is too large Load Diff

View File

@ -3,11 +3,13 @@ const { BrowserWindow, ipcMain } = require('electron');
const { spawn } = require('child_process');
const { saveDebugAudio } = require('./audioUtils.js');
const { getSystemPrompt } = require('../../common/prompts/promptBuilder.js');
const { connectToGeminiSession } = require('../../common/services/googleGeminiClient.js');
const { connectToOpenAiSession, createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('../../common/services/openAiClient.js');
const { makeChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
const sqliteClient = require('../../common/services/sqliteClient');
const dataService = require('../../common/services/dataService');
const { isFirebaseLoggedIn, getCurrentFirebaseUser } = require('../../electron/windowManager.js');
const { isFirebaseLoggedIn, getCurrentFirebaseUser, getStoredProvider } = require('../../electron/windowManager.js');
function getApiKey() {
const { getStoredApiKey } = require('../../electron/windowManager.js');
@ -28,6 +30,18 @@ function getApiKey() {
return null;
}
async function getAiProvider() {
try {
const { ipcRenderer } = require('electron');
const provider = await ipcRenderer.invoke('get-ai-provider');
return provider || 'openai';
} catch (error) {
// If we're in the main process, get it directly
const { getStoredProvider } = require('../../electron/windowManager.js');
return getStoredProvider ? getStoredProvider() : 'openai';
}
}
let currentSessionId = null;
let conversationHistory = [];
let isInitializingSession = false;
@ -208,41 +222,25 @@ Keep all points concise and build upon previous analysis if provided.`,
if (!API_KEY) {
throw new Error('No API key available');
}
const provider = getStoredProvider ? getStoredProvider() : 'openai';
const loggedIn = isFirebaseLoggedIn(); // true ➜ vKey, false ➜ apiKey
const keyType = loggedIn ? 'vKey' : 'apiKey';
console.log(`[LiveSummary] keyType: ${keyType}`);
const usePortkey = loggedIn && provider === 'openai'; // Only use Portkey for OpenAI with Firebase
const fetchUrl = keyType === 'apiKey' ? 'https://api.openai.com/v1/chat/completions' : 'https://api.portkey.ai/v1/chat/completions';
console.log(`[LiveSummary] provider: ${provider}, usePortkey: ${usePortkey}`);
const headers =
keyType === 'apiKey'
? {
Authorization: `Bearer ${API_KEY}`,
'Content-Type': 'application/json',
}
: {
'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
'x-portkey-virtual-key': API_KEY,
'Content-Type': 'application/json',
};
const response = await fetch(fetchUrl, {
method: 'POST',
headers,
body: JSON.stringify({
model: 'gpt-4.1',
messages,
temperature: 0.7,
max_tokens: 1024,
}),
const completion = await makeChatCompletionWithPortkey({
apiKey: API_KEY,
provider: provider,
messages: messages,
temperature: 0.7,
maxTokens: 1024,
model: provider === 'openai' ? 'gpt-4.1' : 'gemini-2.5-flash',
usePortkey: usePortkey,
portkeyVirtualKey: usePortkey ? API_KEY : null
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
}
const result = await response.json();
const responseText = result.choices[0].message.content.trim();
const responseText = completion.content;
console.log(`✅ Analysis response received: ${responseText}`);
const structuredData = parseResponseText(responseText, previousAnalysisResult);
@ -582,7 +580,6 @@ async function initializeLiveSummarySession(language = 'en') {
sendToRenderer('session-initializing', true);
sendToRenderer('update-status', 'Initializing sessions...');
// Merged block
const API_KEY = getApiKey();
if (!API_KEY) {
console.error('FATAL ERROR: API Key is not defined.');
@ -594,73 +591,90 @@ async function initializeLiveSummarySession(language = 'en') {
await initializeNewSession();
const provider = await getAiProvider();
const isGemini = provider === 'gemini';
console.log(`[LiveSummaryService] Initializing STT for provider: ${provider}`);
try {
const handleMyMessage = message => {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (myCompletionTimer) {
clearTimeout(myCompletionTimer);
myCompletionTimer = null;
}
myCurrentUtterance += text;
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Me',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (isGemini) {
// console.log('[Gemini Raw Message - Me]:', JSON.stringify(message, null, 2));
const text = message.serverContent?.inputTranscription?.text || '';
if (text && text.trim()) {
const finalUtteranceText = text.trim();
myCurrentUtterance = '';
debounceMyCompletion(finalUtteranceText);
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
if (finalUtteranceText && finalUtteranceText !== '.') {
debounceMyCompletion(finalUtteranceText);
}
}
} else if (message.error) {
} else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (myCompletionTimer) clearTimeout(myCompletionTimer);
myCompletionTimer = null;
myCurrentUtterance += text;
const continuousText = myCompletionBuffer + (myCompletionBuffer ? ' ' : '') + myCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Me',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (text && text.trim()) {
const finalUtteranceText = text.trim();
myCurrentUtterance = '';
debounceMyCompletion(finalUtteranceText);
}
}
}
if (message.error) {
console.error('[Me] STT Session Error:', message.error);
}
};
const handleTheirMessage = message => {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (theirCompletionTimer) {
clearTimeout(theirCompletionTimer);
theirCompletionTimer = null;
}
theirCurrentUtterance += text;
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Them',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (isGemini) {
// console.log('[Gemini Raw Message - Them]:', JSON.stringify(message, null, 2));
const text = message.serverContent?.inputTranscription?.text || '';
if (text && text.trim()) {
const finalUtteranceText = text.trim();
theirCurrentUtterance = '';
debounceTheirCompletion(finalUtteranceText);
const finalUtteranceText = text.trim().replace(/<noise>/g, '').trim();
if (finalUtteranceText && finalUtteranceText !== '.') {
debounceTheirCompletion(finalUtteranceText);
}
}
} else if (message.error) {
} else {
const type = message.type;
const text = message.transcript || message.delta || (message.alternatives && message.alternatives[0]?.transcript) || '';
if (type === 'conversation.item.input_audio_transcription.delta') {
if (theirCompletionTimer) clearTimeout(theirCompletionTimer);
theirCompletionTimer = null;
theirCurrentUtterance += text;
const continuousText = theirCompletionBuffer + (theirCompletionBuffer ? ' ' : '') + theirCurrentUtterance;
if (text && !text.includes('vq_lbr_audio_')) {
sendToRenderer('stt-update', {
speaker: 'Them',
text: continuousText,
isPartial: true,
isFinal: false,
timestamp: Date.now(),
});
}
} else if (type === 'conversation.item.input_audio_transcription.completed') {
if (text && text.trim()) {
const finalUtteranceText = text.trim();
theirCurrentUtterance = '';
debounceTheirCompletion(finalUtteranceText);
}
}
}
if (message.error) {
console.error('[Them] STT Session Error:', message.error);
}
};
@ -682,10 +696,17 @@ async function initializeLiveSummarySession(language = 'en') {
},
};
[mySttSession, theirSttSession] = await Promise.all([
connectToOpenAiSession(API_KEY, mySttConfig, keyType),
connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
]);
if (isGemini) {
[mySttSession, theirSttSession] = await Promise.all([
connectToGeminiSession(API_KEY, mySttConfig),
connectToGeminiSession(API_KEY, theirSttConfig),
]);
} else {
[mySttSession, theirSttSession] = await Promise.all([
connectToOpenAiSession(API_KEY, mySttConfig, keyType),
connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
]);
}
console.log('✅ Both STT sessions initialized successfully.');
triggerAnalysisIfNeeded();
@ -697,7 +718,7 @@ async function initializeLiveSummarySession(language = 'en') {
sendToRenderer('update-status', 'Connected. Ready to listen.');
return true;
} catch (error) {
console.error('❌ Failed to initialize OpenAI STT sessions:', error);
console.error('❌ Failed to initialize STT sessions:', error);
isInitializingSession = false;
sendToRenderer('session-initializing', false);
sendToRenderer('update-status', 'Initialization failed.');
@ -769,6 +790,9 @@ async function startMacOSAudioCapture() {
let audioBuffer = Buffer.alloc(0);
const provider = await getAiProvider();
const isGemini = provider === 'gemini';
systemAudioProc.stdout.on('data', async data => {
audioBuffer = Buffer.concat([audioBuffer, data]);
@ -783,10 +807,11 @@ async function startMacOSAudioCapture() {
if (theirSttSession) {
try {
// await theirSttSession.sendRealtimeInput({
// audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' },
// });
await theirSttSession.sendRealtimeInput(base64Data);
// await theirSttSession.sendRealtimeInput(base64Data);
const payload = isGemini
? { audio: { data: base64Data, mimeType: 'audio/pcm;rate=24000' } }
: base64Data;
await theirSttSession.sendRealtimeInput(payload);
} catch (err) {
console.error('Error sending system audio:', err.message);
}
@ -905,9 +930,17 @@ function setupLiveSummaryIpcHandlers() {
});
ipcMain.handle('send-audio-content', async (event, { data, mimeType }) => {
const provider = await getAiProvider();
const isGemini = provider === 'gemini';
if (!mySttSession) return { success: false, error: 'User STT session not active' };
try {
await mySttSession.sendRealtimeInput(data);
// await mySttSession.sendRealtimeInput(data);
// provider에 맞는 형식으로 래핑
const payload = isGemini
? { audio: { data, mimeType: mimeType || 'audio/pcm;rate=24000' } }
: data; // OpenAI는 base64 string 그대로
await mySttSession.sendRealtimeInput(payload);
return { success: true };
} catch (error) {
console.error('Error sending user audio:', error);

View File

@ -1,5 +1,6 @@
// renderer.js
const { ipcRenderer } = require('electron');
const { makeStreamingChatCompletionWithPortkey } = require('../../common/services/aiProviderService.js');
let mediaStream = null;
let screenshotInterval = null;
@ -229,7 +230,7 @@ class SimpleAEC {
this.sampleRate = 24000;
this.delaySamples = Math.floor((this.echoDelay / 1000) * this.sampleRate);
this.echoGain = 0.9;
this.echoGain = 0.5;
this.noiseFloor = 0.01;
// 🔧 Adaptive-gain parameters (User-tuned, very aggressive)
@ -998,40 +999,22 @@ async function sendMessage(userPrompt, options = {}) {
}
const { isLoggedIn } = await queryLoginState();
const keyType = isLoggedIn ? 'vKey' : 'apiKey';
const provider = await ipcRenderer.invoke('get-ai-provider');
const usePortkey = isLoggedIn && provider === 'openai';
console.log('🚀 Sending request to OpenAI...');
const { url, headers } =
keyType === 'apiKey'
? {
url: 'https://api.openai.com/v1/chat/completions',
headers: { Authorization: `Bearer ${API_KEY}`, 'Content-Type': 'application/json' },
}
: {
url: 'https://api.portkey.ai/v1/chat/completions',
headers: {
'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
'x-portkey-virtual-key': API_KEY,
'Content-Type': 'application/json',
},
};
console.log(`🚀 Sending request to ${provider} AI...`);
const response = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify({
model: 'gpt-4.1',
messages,
temperature: 0.7,
max_tokens: 2048,
stream: true,
}),
const response = await makeStreamingChatCompletionWithPortkey({
apiKey: API_KEY,
provider: provider,
messages: messages,
temperature: 0.7,
maxTokens: 2048,
model: provider === 'openai' ? 'gpt-4.1' : 'gemini-2.5-flash',
usePortkey: usePortkey,
portkeyVirtualKey: usePortkey ? API_KEY : null
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
}
// --- 스트리밍 응답 처리 ---
const reader = response.body.getReader();
const decoder = new TextDecoder();