Ho Jin Yu 7d33ea9ca8
[Refactor] full refactor and file structure changed (#125)
* refactoring the bridge

* Update aec submodule

* folder structure refactor

* fixing ask logic

* resolve import err

* fix askview

* fix header content html path

* fix systemaudiodump path

* centralized ask logic

* delete legacy code

* change askservice to class

* settingsService facade

* fix getCurrentModelInfo

* common service ipc moved to featureBridge

* featureBridge init

* ui fix

* add featureBridge func for listenservice

* fix preload conflict

* shortcuts seperated

* refactor ask

* transfer roles from askview to askservice

* modifying windowBridge

* delete legacy ask code

* retrieve conversation history for askserice

* fix legacy code

* shortcut moved

* change naming for featurebridge

* screenshot moved from windowManager

* rough refactor done

---------

Co-authored-by: sanio <sanio@pickle.com>
Co-authored-by: jhyang0 <junhyuck0819@gmail.com>
2025-07-13 15:31:24 +09:00

329 lines
10 KiB
JavaScript

const { GoogleGenerativeAI } = require("@google/generative-ai")
const { GoogleGenAI } = require("@google/genai")
class GeminiProvider {
static async validateApiKey(key) {
if (!key || typeof key !== 'string') {
return { success: false, error: 'Invalid Gemini API key format.' };
}
try {
const validationUrl = `https://generativelanguage.googleapis.com/v1beta/models?key=${key}`;
const response = await fetch(validationUrl);
if (response.ok) {
return { success: true };
} else {
const errorData = await response.json().catch(() => ({}));
const message = errorData.error?.message || `Validation failed with status: ${response.status}`;
return { success: false, error: message };
}
} catch (error) {
console.error(`[GeminiProvider] Network error during key validation:`, error);
return { success: false, error: 'A network error occurred during validation.' };
}
}
}
/**
* Creates a Gemini STT session
* @param {object} opts - Configuration options
* @param {string} opts.apiKey - Gemini API key
* @param {string} [opts.language='en-US'] - Language code
* @param {object} [opts.callbacks] - Event callbacks
* @returns {Promise<object>} STT session
*/
async function createSTT({ apiKey, language = "en-US", callbacks = {}, ...config }) {
const liveClient = new GoogleGenAI({ vertexai: false, apiKey })
// Language code BCP-47 conversion
const lang = language.includes("-") ? language : `${language}-US`
const session = await liveClient.live.connect({
model: 'gemini-live-2.5-flash-preview',
callbacks: {
...callbacks,
onMessage: (msg) => {
if (!msg || typeof msg !== 'object') return;
msg.provider = 'gemini';
callbacks.onmessage?.(msg);
}
},
config: {
inputAudioTranscription: {},
speechConfig: { languageCode: lang },
},
})
return {
sendRealtimeInput: async (payload) => session.sendRealtimeInput(payload),
close: async () => session.close(),
}
}
/**
* Creates a Gemini LLM instance with proper text response handling
*/
function createLLM({ apiKey, model = "gemini-2.5-flash", temperature = 0.7, maxTokens = 8192, ...config }) {
const client = new GoogleGenerativeAI(apiKey)
return {
generateContent: async (parts) => {
const geminiModel = client.getGenerativeModel({
model: model,
generationConfig: {
temperature,
maxOutputTokens: maxTokens,
// Ensure we get text responses, not JSON
responseMimeType: "text/plain",
},
})
const systemPrompt = ""
const userContent = []
for (const part of parts) {
if (typeof part === "string") {
// Don't automatically assume strings starting with "You are" are system prompts
// Check if it's explicitly marked as a system instruction
userContent.push(part)
} else if (part.inlineData) {
userContent.push({
inlineData: {
mimeType: part.inlineData.mimeType,
data: part.inlineData.data,
},
})
}
}
try {
const result = await geminiModel.generateContent(userContent)
const response = await result.response
// Return plain text, not wrapped in JSON structure
return {
response: {
text: () => response.text(),
},
}
} catch (error) {
console.error("Gemini API error:", error)
throw error
}
},
chat: async (messages) => {
// Filter out any system prompts that might be causing JSON responses
let systemInstruction = ""
const history = []
let lastMessage
messages.forEach((msg, index) => {
if (msg.role === "system") {
// Clean system instruction - avoid JSON formatting requests
systemInstruction = msg.content
.replace(/respond in json/gi, "")
.replace(/format.*json/gi, "")
.replace(/return.*json/gi, "")
// Add explicit instruction for natural text
if (!systemInstruction.includes("respond naturally")) {
systemInstruction += "\n\nRespond naturally in plain text, not in JSON or structured format."
}
return
}
const role = msg.role === "user" ? "user" : "model"
if (index === messages.length - 1) {
lastMessage = msg
} else {
history.push({ role, parts: [{ text: msg.content }] })
}
})
const geminiModel = client.getGenerativeModel({
model: model,
systemInstruction:
systemInstruction ||
"Respond naturally in plain text format. Do not use JSON or structured responses unless specifically requested.",
generationConfig: {
temperature: temperature,
maxOutputTokens: maxTokens,
// Force plain text responses
responseMimeType: "text/plain",
},
})
const chat = geminiModel.startChat({
history: history,
})
let content = lastMessage.content
// Handle multimodal content
if (Array.isArray(content)) {
const geminiContent = []
for (const part of content) {
if (typeof part === "string") {
geminiContent.push(part)
} else if (part.type === "text") {
geminiContent.push(part.text)
} else if (part.type === "image_url" && part.image_url) {
const base64Data = part.image_url.url.split(",")[1]
geminiContent.push({
inlineData: {
mimeType: "image/png",
data: base64Data,
},
})
}
}
content = geminiContent
}
const result = await chat.sendMessage(content)
const response = await result.response
// Return plain text content
return {
content: response.text(),
raw: result,
}
},
}
}
/**
* Creates a Gemini streaming LLM instance with text response fix
*/
function createStreamingLLM({ apiKey, model = "gemini-2.5-flash", temperature = 0.7, maxTokens = 8192, ...config }) {
const client = new GoogleGenerativeAI(apiKey)
return {
streamChat: async (messages) => {
console.log("[Gemini Provider] Starting streaming request")
let systemInstruction = ""
const nonSystemMessages = []
for (const msg of messages) {
if (msg.role === "system") {
// Clean and modify system instruction
systemInstruction = msg.content
.replace(/respond in json/gi, "")
.replace(/format.*json/gi, "")
.replace(/return.*json/gi, "")
if (!systemInstruction.includes("respond naturally")) {
systemInstruction += "\n\nRespond naturally in plain text, not in JSON or structured format."
}
} else {
nonSystemMessages.push(msg)
}
}
const geminiModel = client.getGenerativeModel({
model: model,
systemInstruction:
systemInstruction ||
"Respond naturally in plain text format. Do not use JSON or structured responses unless specifically requested.",
generationConfig: {
temperature,
maxOutputTokens: maxTokens || 8192,
// Force plain text responses
responseMimeType: "text/plain",
},
})
const stream = new ReadableStream({
async start(controller) {
try {
const lastMessage = nonSystemMessages[nonSystemMessages.length - 1]
let geminiContent = []
if (Array.isArray(lastMessage.content)) {
for (const part of lastMessage.content) {
if (typeof part === "string") {
geminiContent.push(part)
} else if (part.type === "text") {
geminiContent.push(part.text)
} else if (part.type === "image_url" && part.image_url) {
const base64Data = part.image_url.url.split(",")[1]
geminiContent.push({
inlineData: {
mimeType: "image/png",
data: base64Data,
},
})
}
}
} else {
geminiContent = [lastMessage.content]
}
const contentParts = geminiContent.map((part) => {
if (typeof part === "string") {
return { text: part }
} else if (part.inlineData) {
return { inlineData: part.inlineData }
}
return part
})
const result = await geminiModel.generateContentStream({
contents: [
{
role: "user",
parts: contentParts,
},
],
})
for await (const chunk of result.stream) {
const chunkText = chunk.text() || ""
// Format as SSE data - this should now be plain text
const data = JSON.stringify({
choices: [
{
delta: {
content: chunkText,
},
},
],
})
controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`))
}
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
controller.close()
} catch (error) {
console.error("[Gemini Provider] Streaming error:", error)
controller.error(error)
}
},
})
return new Response(stream, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
},
})
},
}
}
module.exports = {
GeminiProvider,
createSTT,
createLLM,
createStreamingLLM
};