From 5cc0d2b83a22605600be9bf3036a5639ad6d90b6 Mon Sep 17 00:00:00 2001 From: Surya Date: Tue, 8 Jul 2025 15:54:04 +0530 Subject: [PATCH] Fix : Gemini JSON Format Answer fixed to plain text answer --- package-lock.json | 66 +---- pickleglass_web/package-lock.json | 10 - src/common/ai/providers/gemini.js | 419 ++++++++++++++---------------- 3 files changed, 193 insertions(+), 302 deletions(-) diff --git a/package-lock.json b/package-lock.json index 5ae800a..944e413 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,18 +1,16 @@ { "name": "pickle-glass", - "version": "0.2.1", + "version": "0.2.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "pickle-glass", - "version": "0.2.1", + "version": "0.2.2", "hasInstallScript": true, "license": "GPL-3.0", "dependencies": { - "@anthropic-ai/sdk": "^0.56.0", - "@google/genai": "^1.8.0", "@google/generative-ai": "^0.24.1", "axios": "^1.10.0", @@ -54,7 +52,6 @@ "electron-liquid-glass": "^1.0.1" } }, - "node_modules/@anthropic-ai/sdk": { "version": "0.56.0", "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.56.0.tgz", @@ -64,7 +61,6 @@ "anthropic-ai-sdk": "bin/cli" } }, - "node_modules/@develar/schema-utils": { "version": "2.6.5", "resolved": "https://registry.npmjs.org/@develar/schema-utils/-/schema-utils-2.6.5.tgz", @@ -889,11 +885,9 @@ } }, "node_modules/@emnapi/runtime": { - "version": "1.4.4", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.4.tgz", "integrity": "sha512-hHyapA4A3gPaDCNfiqyZUStTMqIkKRshqPIuDOXv1hcBnD4U3l8cP0T1HMCfGRxQ6V64TGCcoswChANyOAwbQg==", - "license": "MIT", "optional": true, "dependencies": { @@ -901,11 +895,9 @@ } }, "node_modules/@esbuild/aix-ppc64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.6.tgz", "integrity": "sha512-ShbM/3XxwuxjFiuVBHA+d3j5dyac0aEVVq1oluIDf71hUw0aRF59dV/efUsIwFnR6m8JNM2FjZOzmaZ8yG61kw==", - "cpu": [ "ppc64" ], @@ -920,11 +912,9 @@ } }, "node_modules/@esbuild/android-arm": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.6.tgz", "integrity": "sha512-S8ToEOVfg++AU/bHwdksHNnyLyVM+eMVAOf6yRKFitnwnbwwPNqKr3srzFRe7nzV69RQKb5DgchIX5pt3L53xg==", - "cpu": [ "arm" ], @@ -939,11 +929,9 @@ } }, "node_modules/@esbuild/android-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.6.tgz", "integrity": "sha512-hd5zdUarsK6strW+3Wxi5qWws+rJhCCbMiC9QZyzoxfk5uHRIE8T287giQxzVpEvCwuJ9Qjg6bEjcRJcgfLqoA==", - "cpu": [ "arm64" ], @@ -958,11 +946,9 @@ } }, "node_modules/@esbuild/android-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.6.tgz", "integrity": "sha512-0Z7KpHSr3VBIO9A/1wcT3NTy7EB4oNC4upJ5ye3R7taCc2GUdeynSLArnon5G8scPwaU866d3H4BCrE5xLW25A==", - "cpu": [ "x64" ], @@ -977,11 +963,9 @@ } }, "node_modules/@esbuild/darwin-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.6.tgz", "integrity": "sha512-FFCssz3XBavjxcFxKsGy2DYK5VSvJqa6y5HXljKzhRZ87LvEi13brPrf/wdyl/BbpbMKJNOr1Sd0jtW4Ge1pAA==", - "cpu": [ "arm64" ], @@ -996,11 +980,9 @@ } }, "node_modules/@esbuild/darwin-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.6.tgz", "integrity": "sha512-GfXs5kry/TkGM2vKqK2oyiLFygJRqKVhawu3+DOCk7OxLy/6jYkWXhlHwOoTb0WqGnWGAS7sooxbZowy+pK9Yg==", - "cpu": [ "x64" ], @@ -1015,11 +997,9 @@ } }, "node_modules/@esbuild/freebsd-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.6.tgz", "integrity": "sha512-aoLF2c3OvDn2XDTRvn8hN6DRzVVpDlj2B/F66clWd/FHLiHaG3aVZjxQX2DYphA5y/evbdGvC6Us13tvyt4pWg==", - "cpu": [ "arm64" ], @@ -1034,11 +1014,9 @@ } }, "node_modules/@esbuild/freebsd-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.6.tgz", "integrity": "sha512-2SkqTjTSo2dYi/jzFbU9Plt1vk0+nNg8YC8rOXXea+iA3hfNJWebKYPs3xnOUf9+ZWhKAaxnQNUf2X9LOpeiMQ==", - "cpu": [ "x64" ], @@ -1053,11 +1031,9 @@ } }, "node_modules/@esbuild/linux-arm": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.6.tgz", "integrity": "sha512-SZHQlzvqv4Du5PrKE2faN0qlbsaW/3QQfUUc6yO2EjFcA83xnwm91UbEEVx4ApZ9Z5oG8Bxz4qPE+HFwtVcfyw==", - "cpu": [ "arm" ], @@ -1072,11 +1048,9 @@ } }, "node_modules/@esbuild/linux-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.6.tgz", "integrity": "sha512-b967hU0gqKd9Drsh/UuAm21Khpoh6mPBSgz8mKRq4P5mVK8bpA+hQzmm/ZwGVULSNBzKdZPQBRT3+WuVavcWsQ==", - "cpu": [ "arm64" ], @@ -1091,11 +1065,9 @@ } }, "node_modules/@esbuild/linux-ia32": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.6.tgz", "integrity": "sha512-aHWdQ2AAltRkLPOsKdi3xv0mZ8fUGPdlKEjIEhxCPm5yKEThcUjHpWB1idN74lfXGnZ5SULQSgtr5Qos5B0bPw==", - "cpu": [ "ia32" ], @@ -1110,11 +1082,9 @@ } }, "node_modules/@esbuild/linux-loong64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.6.tgz", "integrity": "sha512-VgKCsHdXRSQ7E1+QXGdRPlQ/e08bN6WMQb27/TMfV+vPjjTImuT9PmLXupRlC90S1JeNNW5lzkAEO/McKeJ2yg==", - "cpu": [ "loong64" ], @@ -1129,11 +1099,9 @@ } }, "node_modules/@esbuild/linux-mips64el": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.6.tgz", "integrity": "sha512-WViNlpivRKT9/py3kCmkHnn44GkGXVdXfdc4drNmRl15zVQ2+D2uFwdlGh6IuK5AAnGTo2qPB1Djppj+t78rzw==", - "cpu": [ "mips64el" ], @@ -1148,11 +1116,9 @@ } }, "node_modules/@esbuild/linux-ppc64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.6.tgz", "integrity": "sha512-wyYKZ9NTdmAMb5730I38lBqVu6cKl4ZfYXIs31Baf8aoOtB4xSGi3THmDYt4BTFHk7/EcVixkOV2uZfwU3Q2Jw==", - "cpu": [ "ppc64" ], @@ -1167,11 +1133,9 @@ } }, "node_modules/@esbuild/linux-riscv64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.6.tgz", "integrity": "sha512-KZh7bAGGcrinEj4qzilJ4hqTY3Dg2U82c8bv+e1xqNqZCrCyc+TL9AUEn5WGKDzm3CfC5RODE/qc96OcbIe33w==", - "cpu": [ "riscv64" ], @@ -1186,11 +1150,9 @@ } }, "node_modules/@esbuild/linux-s390x": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.6.tgz", "integrity": "sha512-9N1LsTwAuE9oj6lHMyyAM+ucxGiVnEqUdp4v7IaMmrwb06ZTEVCIs3oPPplVsnjPfyjmxwHxHMF8b6vzUVAUGw==", - "cpu": [ "s390x" ], @@ -1205,11 +1167,9 @@ } }, "node_modules/@esbuild/linux-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.6.tgz", "integrity": "sha512-A6bJB41b4lKFWRKNrWoP2LHsjVzNiaurf7wyj/XtFNTsnPuxwEBWHLty+ZE0dWBKuSK1fvKgrKaNjBS7qbFKig==", - "cpu": [ "x64" ], @@ -1224,11 +1184,9 @@ } }, "node_modules/@esbuild/netbsd-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.6.tgz", "integrity": "sha512-IjA+DcwoVpjEvyxZddDqBY+uJ2Snc6duLpjmkXm/v4xuS3H+3FkLZlDm9ZsAbF9rsfP3zeA0/ArNDORZgrxR/Q==", - "cpu": [ "arm64" ], @@ -1243,11 +1201,9 @@ } }, "node_modules/@esbuild/netbsd-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.6.tgz", "integrity": "sha512-dUXuZr5WenIDlMHdMkvDc1FAu4xdWixTCRgP7RQLBOkkGgwuuzaGSYcOpW4jFxzpzL1ejb8yF620UxAqnBrR9g==", - "cpu": [ "x64" ], @@ -1262,11 +1218,9 @@ } }, "node_modules/@esbuild/openbsd-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.6.tgz", "integrity": "sha512-l8ZCvXP0tbTJ3iaqdNf3pjaOSd5ex/e6/omLIQCVBLmHTlfXW3zAxQ4fnDmPLOB1x9xrcSi/xtCWFwCZRIaEwg==", - "cpu": [ "arm64" ], @@ -1281,11 +1235,9 @@ } }, "node_modules/@esbuild/openbsd-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.6.tgz", "integrity": "sha512-hKrmDa0aOFOr71KQ/19JC7az1P0GWtCN1t2ahYAf4O007DHZt/dW8ym5+CUdJhQ/qkZmI1HAF8KkJbEFtCL7gw==", - "cpu": [ "x64" ], @@ -1299,7 +1251,6 @@ "node": ">=18" } }, - "node_modules/@esbuild/openharmony-arm64": { "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.6.tgz", @@ -1321,7 +1272,6 @@ "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.6.tgz", "integrity": "sha512-dyCGxv1/Br7MiSC42qinGL8KkG4kX0pEsdb0+TKhmJZgCUDBGmyo1/ArCjNGiOLiIAgdbWgmWgib4HoCi5t7kA==", - "cpu": [ "x64" ], @@ -1336,11 +1286,9 @@ } }, "node_modules/@esbuild/win32-arm64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.6.tgz", "integrity": "sha512-42QOgcZeZOvXfsCBJF5Afw73t4veOId//XD3i+/9gSkhSV6Gk3VPlWncctI+JcOyERv85FUo7RxuxGy+z8A43Q==", - "cpu": [ "arm64" ], @@ -1355,11 +1303,9 @@ } }, "node_modules/@esbuild/win32-ia32": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.6.tgz", "integrity": "sha512-4AWhgXmDuYN7rJI6ORB+uU9DHLq/erBbuMoAuB4VWJTu5KtCgcKYPynF0YI1VkBNuEfjNlLrFr9KZPJzrtLkrQ==", - "cpu": [ "ia32" ], @@ -1374,11 +1320,9 @@ } }, "node_modules/@esbuild/win32-x64": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.6.tgz", "integrity": "sha512-NgJPHHbEpLQgDH2MjQu90pzW/5vvXIZ7KOnPyNBm92A6WgZ/7b6fJyUBjoumLqeOQQGqY2QjQxRo97ah4Sj0cA==", - "cpu": [ "x64" ], @@ -3255,11 +3199,9 @@ } }, "node_modules/agent-base": { - "version": "7.1.4", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", - "license": "MIT", "engines": { "node": ">= 14" @@ -6055,11 +5997,9 @@ "optional": true }, "node_modules/esbuild": { - "version": "0.25.6", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.6.tgz", "integrity": "sha512-GVuzuUwtdsghE3ocJ9Bs8PNoF13HNQ5TXbEi2AhvVb8xU1Iwt9Fos9FEamfoee+u/TOsn7GUWc04lz46n2bbTg==", - "dev": true, "hasInstallScript": true, "license": "MIT", @@ -6070,7 +6010,6 @@ "node": ">=18" }, "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.6", "@esbuild/android-arm": "0.25.6", "@esbuild/android-arm64": "0.25.6", @@ -6097,7 +6036,6 @@ "@esbuild/win32-arm64": "0.25.6", "@esbuild/win32-ia32": "0.25.6", "@esbuild/win32-x64": "0.25.6" - } }, "node_modules/escalade": { diff --git a/pickleglass_web/package-lock.json b/pickleglass_web/package-lock.json index a1726d6..f1d6fa0 100644 --- a/pickleglass_web/package-lock.json +++ b/pickleglass_web/package-lock.json @@ -42,27 +42,21 @@ } }, "node_modules/@emnapi/core": { - "version": "1.4.4", "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.4.4.tgz", "integrity": "sha512-A9CnAbC6ARNMKcIcrQwq6HeHCjpcBZ5wSx4U01WXCqEKlrzB9F9315WDNHkrs2xbx7YjjSxbUYxuN6EQzpcY2g==", - "dev": true, "license": "MIT", "optional": true, "dependencies": { - "@emnapi/wasi-threads": "1.0.3", - "tslib": "^2.4.0" } }, "node_modules/@emnapi/runtime": { - "version": "1.4.4", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.4.tgz", "integrity": "sha512-hHyapA4A3gPaDCNfiqyZUStTMqIkKRshqPIuDOXv1hcBnD4U3l8cP0T1HMCfGRxQ6V64TGCcoswChANyOAwbQg==", - "dev": true, "license": "MIT", "optional": true, @@ -71,11 +65,9 @@ } }, "node_modules/@emnapi/wasi-threads": { - "version": "1.0.3", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.0.3.tgz", "integrity": "sha512-8K5IFFsQqF9wQNJptGbS6FNKgUTsSRYnTqNCG1vPP8jFdjSv18n2mQfJpkt2Oibo9iBEzcDnDxNwKTzC7svlJw==", - "dev": true, "license": "MIT", "optional": true, @@ -2675,11 +2667,9 @@ "license": "MIT" }, "node_modules/electron-to-chromium": { - "version": "1.5.180", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.180.tgz", "integrity": "sha512-ED+GEyEh3kYMwt2faNmgMB0b8O5qtATGgR4RmRsIp4T6p7B8vdMbIedYndnvZfsaXvSzegtpfqRMDNCjjiSduA==", - "license": "ISC" }, "node_modules/emoji-regex": { diff --git a/src/common/ai/providers/gemini.js b/src/common/ai/providers/gemini.js index c3c4743..f3f864c 100644 --- a/src/common/ai/providers/gemini.js +++ b/src/common/ai/providers/gemini.js @@ -1,5 +1,5 @@ -const { GoogleGenerativeAI } = require('@google/generative-ai'); -const { GoogleGenAI } = require('@google/genai'); +const { GoogleGenerativeAI } = require("@google/generative-ai") +const { GoogleGenAI } = require("@google/genai") /** * Creates a Gemini STT session @@ -9,322 +9,285 @@ const { GoogleGenAI } = require('@google/genai'); * @param {object} [opts.callbacks] - Event callbacks * @returns {Promise} STT session */ -async function createSTT({ apiKey, language = 'en-US', callbacks = {}, ...config }) { - const liveClient = new GoogleGenAI({ vertexai: false, apiKey }); +async function createSTT({ apiKey, language = "en-US", callbacks = {}, ...config }) { + const liveClient = new GoogleGenAI({ vertexai: false, apiKey }) // Language code BCP-47 conversion - const lang = language.includes('-') ? language : `${language}-US`; + const lang = language.includes("-") ? language : `${language}-US` const session = await liveClient.live.connect({ - model: 'gemini-live-2.5-flash-preview', + model: "gemini-live-2.5-flash-preview", callbacks, config: { inputAudioTranscription: {}, speechConfig: { languageCode: lang }, }, - }); + }) return { - sendRealtimeInput: async payload => session.sendRealtimeInput(payload), + sendRealtimeInput: async (payload) => session.sendRealtimeInput(payload), close: async () => session.close(), - }; + } } /** - * Creates a Gemini LLM instance - * @param {object} opts - Configuration options - * @param {string} opts.apiKey - Gemini API key - * @param {string} [opts.model='gemini-2.5-flash'] - Model name - * @param {number} [opts.temperature=0.7] - Temperature - * @param {number} [opts.maxTokens=8192] - Max tokens - * @returns {object} LLM instance + * Creates a Gemini LLM instance with proper text response handling */ -function createLLM({ apiKey, model = 'gemini-2.5-flash', temperature = 0.7, maxTokens = 8192, ...config }) { - const client = new GoogleGenerativeAI(apiKey); - +function createLLM({ apiKey, model = "gemini-2.5-flash", temperature = 0.7, maxTokens = 8192, ...config }) { + const client = new GoogleGenerativeAI(apiKey) + return { generateContent: async (parts) => { - const geminiModel = client.getGenerativeModel({ model: model }); - - let systemPrompt = ''; - let userContent = []; - + const geminiModel = client.getGenerativeModel({ + model: model, + generationConfig: { + temperature, + maxOutputTokens: maxTokens, + // Ensure we get text responses, not JSON + responseMimeType: "text/plain", + }, + }) + + const systemPrompt = "" + const userContent = [] + for (const part of parts) { - if (typeof part === 'string') { - if (systemPrompt === '' && part.includes('You are')) { - systemPrompt = part; - } else { - userContent.push(part); - } + if (typeof part === "string") { + // Don't automatically assume strings starting with "You are" are system prompts + // Check if it's explicitly marked as a system instruction + userContent.push(part) } else if (part.inlineData) { - // Convert base64 image data to Gemini format userContent.push({ inlineData: { mimeType: part.inlineData.mimeType, - data: part.inlineData.data - } - }); + data: part.inlineData.data, + }, + }) } } - - // Prepare content array - const content = []; - - // Add system instruction if present - if (systemPrompt) { - // For Gemini, we'll prepend system prompt to user content - content.push(systemPrompt + '\n\n' + userContent[0]); - content.push(...userContent.slice(1)); - } else { - content.push(...userContent); - } - + try { - const result = await geminiModel.generateContent(content); - const response = await result.response; - + const result = await geminiModel.generateContent(userContent) + const response = await result.response + + // Return plain text, not wrapped in JSON structure return { response: { - text: () => response.text() - } - }; + text: () => response.text(), + }, + } } catch (error) { - console.error('Gemini API error:', error); - throw error; + console.error("Gemini API error:", error) + throw error } }, - - // For compatibility with chat-style interfaces + chat: async (messages) => { - // Extract system instruction if present - let systemInstruction = ''; - const history = []; - let lastMessage; + // Filter out any system prompts that might be causing JSON responses + let systemInstruction = "" + const history = [] + let lastMessage messages.forEach((msg, index) => { - if (msg.role === 'system') { - systemInstruction = msg.content; - return; + if (msg.role === "system") { + // Clean system instruction - avoid JSON formatting requests + systemInstruction = msg.content + .replace(/respond in json/gi, "") + .replace(/format.*json/gi, "") + .replace(/return.*json/gi, "") + + // Add explicit instruction for natural text + if (!systemInstruction.includes("respond naturally")) { + systemInstruction += "\n\nRespond naturally in plain text, not in JSON or structured format." + } + return } - - // Gemini's history format - const role = msg.role === 'user' ? 'user' : 'model'; + + const role = msg.role === "user" ? "user" : "model" if (index === messages.length - 1) { - lastMessage = msg; + lastMessage = msg } else { - history.push({ role, parts: [{ text: msg.content }] }); + history.push({ role, parts: [{ text: msg.content }] }) } - }); - - const geminiModel = client.getGenerativeModel({ + }) + + const geminiModel = client.getGenerativeModel({ model: model, - systemInstruction: systemInstruction - }); - - const chat = geminiModel.startChat({ - history: history, + systemInstruction: + systemInstruction || + "Respond naturally in plain text format. Do not use JSON or structured responses unless specifically requested.", generationConfig: { temperature: temperature, maxOutputTokens: maxTokens, - } - }); - - // Get the last user message content - let content = lastMessage.content; - - // Handle multimodal content for the last message + // Force plain text responses + responseMimeType: "text/plain", + }, + }) + + const chat = geminiModel.startChat({ + history: history, + }) + + let content = lastMessage.content + + // Handle multimodal content if (Array.isArray(content)) { - const geminiContent = []; + const geminiContent = [] for (const part of content) { - if (typeof part === 'string') { - geminiContent.push(part); - } else if (part.type === 'text') { - geminiContent.push(part.text); - } else if (part.type === 'image_url' && part.image_url) { - // Convert base64 image to Gemini format - const base64Data = part.image_url.url.split(',')[1]; + if (typeof part === "string") { + geminiContent.push(part) + } else if (part.type === "text") { + geminiContent.push(part.text) + } else if (part.type === "image_url" && part.image_url) { + const base64Data = part.image_url.url.split(",")[1] geminiContent.push({ inlineData: { - mimeType: 'image/png', - data: base64Data - } - }); + mimeType: "image/png", + data: base64Data, + }, + }) } } - content = geminiContent; + content = geminiContent } - - const result = await chat.sendMessage(content); - const response = await result.response; + + const result = await chat.sendMessage(content) + const response = await result.response + + // Return plain text content return { content: response.text(), - raw: result - }; - } - }; + raw: result, + } + }, + } } /** - * Creates a Gemini streaming LLM instance - * @param {object} opts - Configuration options - * @param {string} opts.apiKey - Gemini API key - * @param {string} [opts.model='gemini-2.5-flash'] - Model name - * @param {number} [opts.temperature=0.7] - Temperature - * @param {number} [opts.maxTokens=8192] - Max tokens - * @returns {object} Streaming LLM instance + * Creates a Gemini streaming LLM instance with text response fix */ -function createStreamingLLM({ apiKey, model = 'gemini-2.5-flash', temperature = 0.7, maxTokens = 8192, ...config }) { - const client = new GoogleGenerativeAI(apiKey); - +function createStreamingLLM({ apiKey, model = "gemini-2.5-flash", temperature = 0.7, maxTokens = 8192, ...config }) { + const client = new GoogleGenerativeAI(apiKey) + return { streamChat: async (messages) => { - console.log('[Gemini Provider] Starting streaming request'); - - // Extract system instruction if present - let systemInstruction = ''; - const nonSystemMessages = []; - + console.log("[Gemini Provider] Starting streaming request") + + let systemInstruction = "" + const nonSystemMessages = [] + for (const msg of messages) { - if (msg.role === 'system') { - systemInstruction = msg.content; + if (msg.role === "system") { + // Clean and modify system instruction + systemInstruction = msg.content + .replace(/respond in json/gi, "") + .replace(/format.*json/gi, "") + .replace(/return.*json/gi, "") + + if (!systemInstruction.includes("respond naturally")) { + systemInstruction += "\n\nRespond naturally in plain text, not in JSON or structured format." + } } else { - nonSystemMessages.push(msg); + nonSystemMessages.push(msg) } } - - const geminiModel = client.getGenerativeModel({ + + const geminiModel = client.getGenerativeModel({ model: model, - systemInstruction: systemInstruction || undefined - }); - - const chat = geminiModel.startChat({ - history: [], + systemInstruction: + systemInstruction || + "Respond naturally in plain text format. Do not use JSON or structured responses unless specifically requested.", generationConfig: { temperature, maxOutputTokens: maxTokens || 8192, - } - }); - - // Create a ReadableStream to handle Gemini's streaming + // Force plain text responses + responseMimeType: "text/plain", + }, + }) + const stream = new ReadableStream({ async start(controller) { try { - console.log('[Gemini Provider] Processing messages:', nonSystemMessages.length, 'messages (excluding system)'); - - // Get the last user message - const lastMessage = nonSystemMessages[nonSystemMessages.length - 1]; - let lastUserMessage = lastMessage.content; - - // Handle case where content might be an array (multimodal) - if (Array.isArray(lastUserMessage)) { - // Extract text content from array - const textParts = lastUserMessage.filter(part => - typeof part === 'string' || (part && part.type === 'text') - ); - lastUserMessage = textParts.map(part => - typeof part === 'string' ? part : part.text - ).join(' '); - } - - console.log('[Gemini Provider] Sending message to Gemini:', - typeof lastUserMessage === 'string' ? lastUserMessage.substring(0, 100) + '...' : 'multimodal content'); - - // Prepare the message content for Gemini - let geminiContent = []; - - // Handle multimodal content properly + const lastMessage = nonSystemMessages[nonSystemMessages.length - 1] + let geminiContent = [] + if (Array.isArray(lastMessage.content)) { for (const part of lastMessage.content) { - if (typeof part === 'string') { - geminiContent.push(part); - } else if (part.type === 'text') { - geminiContent.push(part.text); - } else if (part.type === 'image_url' && part.image_url) { - // Convert base64 image to Gemini format - const base64Data = part.image_url.url.split(',')[1]; + if (typeof part === "string") { + geminiContent.push(part) + } else if (part.type === "text") { + geminiContent.push(part.text) + } else if (part.type === "image_url" && part.image_url) { + const base64Data = part.image_url.url.split(",")[1] geminiContent.push({ inlineData: { - mimeType: 'image/png', - data: base64Data - } - }); + mimeType: "image/png", + data: base64Data, + }, + }) } } } else { - geminiContent = [lastUserMessage]; + geminiContent = [lastMessage.content] } - - console.log('[Gemini Provider] Prepared Gemini content:', - geminiContent.length, 'parts'); - - // Stream the response - let chunkCount = 0; - let totalContent = ''; - - const contentParts = geminiContent.map(part => { - if (typeof part === 'string') { - return { text: part }; + + const contentParts = geminiContent.map((part) => { + if (typeof part === "string") { + return { text: part } } else if (part.inlineData) { - return { inlineData: part.inlineData }; + return { inlineData: part.inlineData } } - return part; - }); + return part + }) const result = await geminiModel.generateContentStream({ - contents: [{ - role: 'user', - parts: contentParts - }], - generationConfig: { - temperature, - maxOutputTokens: maxTokens || 8192, - } - }); - + contents: [ + { + role: "user", + parts: contentParts, + }, + ], + }) + for await (const chunk of result.stream) { - chunkCount++; - const chunkText = chunk.text() || ''; - totalContent += chunkText; - - // Format as SSE data + const chunkText = chunk.text() || "" + + // Format as SSE data - this should now be plain text const data = JSON.stringify({ - choices: [{ - delta: { - content: chunkText - } - }] - }); - controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`)); + choices: [ + { + delta: { + content: chunkText, + }, + }, + ], + }) + controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`)) } - - console.log(`[Gemini Provider] Streamed ${chunkCount} chunks, total length: ${totalContent.length} chars`); - - // Send the final done message - controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n')); - controller.close(); - console.log('[Gemini Provider] Streaming completed successfully'); + + controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) + controller.close() } catch (error) { - console.error('[Gemini Provider] Streaming error:', error); - controller.error(error); + console.error("[Gemini Provider] Streaming error:", error) + controller.error(error) } - } - }); - - // Create a Response object with the stream + }, + }) + return new Response(stream, { headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive' - } - }); - } - }; + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }, + }) + }, + } } module.exports = { createSTT, createLLM, - createStreamingLLM -}; + createStreamingLLM, +}