Merge branch 'pr-84'

2025-07-08 22:43:39 +09:00 · 2025-07-08 22:43:39 +09:00 · 55961c956a
commit 55961c956a
parent f6540ef3ec 2bb5fcfae7
2 changed files with 192 additions and 237 deletions
--- a/pickleglass_web/package-lock.json
+++ b/pickleglass_web/package-lock.json
@ -42,27 +42,21 @@
      }
    },
    "node_modules/@emnapi/core": {
      "version": "1.4.4",
      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.4.4.tgz",
      "integrity": "sha512-A9CnAbC6ARNMKcIcrQwq6HeHCjpcBZ5wSx4U01WXCqEKlrzB9F9315WDNHkrs2xbx7YjjSxbUYxuN6EQzpcY2g==",
      "dev": true,
      "license": "MIT",
      "optional": true,
      "dependencies": {
        "@emnapi/wasi-threads": "1.0.3",
        "tslib": "^2.4.0"
      }
    },
    "node_modules/@emnapi/runtime": {
      "version": "1.4.4",
      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.4.tgz",
      "integrity": "sha512-hHyapA4A3gPaDCNfiqyZUStTMqIkKRshqPIuDOXv1hcBnD4U3l8cP0T1HMCfGRxQ6V64TGCcoswChANyOAwbQg==",
      "dev": true,
      "license": "MIT",
      "optional": true,
@ -71,11 +65,9 @@
      }
    },
    "node_modules/@emnapi/wasi-threads": {
      "version": "1.0.3",
      "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.0.3.tgz",
      "integrity": "sha512-8K5IFFsQqF9wQNJptGbS6FNKgUTsSRYnTqNCG1vPP8jFdjSv18n2mQfJpkt2Oibo9iBEzcDnDxNwKTzC7svlJw==",
      "dev": true,
      "license": "MIT",
      "optional": true,
@ -2675,11 +2667,9 @@
      "license": "MIT"
    },
    "node_modules/electron-to-chromium": {
      "version": "1.5.180",
      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.180.tgz",
      "integrity": "sha512-ED+GEyEh3kYMwt2faNmgMB0b8O5qtATGgR4RmRsIp4T6p7B8vdMbIedYndnvZfsaXvSzegtpfqRMDNCjjiSduA==",
      "license": "ISC"
    },
    "node_modules/emoji-regex": {
--- a/src/common/ai/providers/gemini.js
+++ b/src/common/ai/providers/gemini.js
@ -1,5 +1,5 @@
-const { GoogleGenerativeAI } = require('@google/generative-ai');
+const { GoogleGenerativeAI } = require("@google/generative-ai")
-const { GoogleGenAI } = require('@google/genai');
+const { GoogleGenAI } = require("@google/genai")
 /**
 * Creates a Gemini STT session
@ -9,13 +9,14 @@ const { GoogleGenAI } = require('@google/genai');
 * @param {object} [opts.callbacks] - Event callbacks
 * @returns {Promise<object>} STT session
 */
-async function createSTT({ apiKey, language = 'en-US', callbacks = {}, ...config }) {
+async function createSTT({ apiKey, language = "en-US", callbacks = {}, ...config }) {
-  const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
+  const liveClient = new GoogleGenAI({ vertexai: false, apiKey })
  // Language code BCP-47 conversion
-  const lang = language.includes('-') ? language : `${language}-US`;
+  const lang = language.includes("-") ? language : `${language}-US`
  const session = await liveClient.live.connect({
    model: 'gemini-live-2.5-flash-preview',
    callbacks: {
      ...callbacks,
@ -25,313 +26,277 @@ async function createSTT({ apiKey, language = 'en-US', callbacks = {}, ...config
        callbacks.onmessage?.(msg);
      }
    },
    config: {
      inputAudioTranscription: {},
      speechConfig: { languageCode: lang },
    },
-  });
+  })
  return {
-    sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
+    sendRealtimeInput: async (payload) => session.sendRealtimeInput(payload),
    close: async () => session.close(),
-  };
+  }
 }
 /**
- * Creates a Gemini LLM instance
+ * Creates a Gemini LLM instance with proper text response handling
 * @param {object} opts - Configuration options
 * @param {string} opts.apiKey - Gemini API key
 * @param {string} [opts.model='gemini-2.5-flash'] - Model name
 * @param {number} [opts.temperature=0.7] - Temperature
 * @param {number} [opts.maxTokens=8192] - Max tokens
 * @returns {object} LLM instance
 */
-function createLLM({ apiKey, model = 'gemini-2.5-flash', temperature = 0.7, maxTokens = 8192, ...config }) {
+function createLLM({ apiKey, model = "gemini-2.5-flash", temperature = 0.7, maxTokens = 8192, ...config }) {
-  const client = new GoogleGenerativeAI(apiKey);
+  const client = new GoogleGenerativeAI(apiKey)
  return {
    generateContent: async (parts) => {
-      const geminiModel = client.getGenerativeModel({ model: model });
+      const geminiModel = client.getGenerativeModel({
        model: model,
        generationConfig: {
          temperature,
          maxOutputTokens: maxTokens,
          // Ensure we get text responses, not JSON
          responseMimeType: "text/plain",
        },
      })
-      let systemPrompt = '';
+      const systemPrompt = ""
-      let userContent = [];
+      const userContent = []
      for (const part of parts) {
-        if (typeof part === 'string') {
+        if (typeof part === "string") {
-          if (systemPrompt === '' && part.includes('You are')) {
+          // Don't automatically assume strings starting with "You are" are system prompts
-            systemPrompt = part;
+          // Check if it's explicitly marked as a system instruction
-          } else {
+          userContent.push(part)
            userContent.push(part);
          }
        } else if (part.inlineData) {
          // Convert base64 image data to Gemini format
          userContent.push({
            inlineData: {
              mimeType: part.inlineData.mimeType,
-              data: part.inlineData.data
+              data: part.inlineData.data,
            },
          })
        }
          });
        }
      }
      // Prepare content array
      const content = [];
      // Add system instruction if present
      if (systemPrompt) {
        // For Gemini, we'll prepend system prompt to user content
        content.push(systemPrompt + '\n\n' + userContent[0]);
        content.push(...userContent.slice(1));
      } else {
        content.push(...userContent);
      }
      try {
-        const result = await geminiModel.generateContent(content);
+        const result = await geminiModel.generateContent(userContent)
-        const response = await result.response;
+        const response = await result.response
        // Return plain text, not wrapped in JSON structure
        return {
          response: {
-            text: () => response.text()
+            text: () => response.text(),
          },
        }
        };
      } catch (error) {
-        console.error('Gemini API error:', error);
+        console.error("Gemini API error:", error)
-        throw error;
+        throw error
      }
    },
    // For compatibility with chat-style interfaces
    chat: async (messages) => {
-      // Extract system instruction if present
+      // Filter out any system prompts that might be causing JSON responses
-      let systemInstruction = '';
+      let systemInstruction = ""
-      const history = [];
+      const history = []
-      let lastMessage;
+      let lastMessage
      messages.forEach((msg, index) => {
-        if (msg.role === 'system') {
+        if (msg.role === "system") {
-          systemInstruction = msg.content;
+          // Clean system instruction - avoid JSON formatting requests
-          return;
+          systemInstruction = msg.content
            .replace(/respond in json/gi, "")
            .replace(/format.*json/gi, "")
            .replace(/return.*json/gi, "")
          // Add explicit instruction for natural text
          if (!systemInstruction.includes("respond naturally")) {
            systemInstruction += "\n\nRespond naturally in plain text, not in JSON or structured format."
          }
          return
        }
-        // Gemini's history format
+        const role = msg.role === "user" ? "user" : "model"
        const role = msg.role === 'user' ? 'user' : 'model';
        if (index === messages.length - 1) {
-            lastMessage = msg;
+          lastMessage = msg
        } else {
-            history.push({ role, parts: [{ text: msg.content }] });
+          history.push({ role, parts: [{ text: msg.content }] })
        }
-      });
+      })
      const geminiModel = client.getGenerativeModel({
        model: model,
-        systemInstruction: systemInstruction
+        systemInstruction:
-      });
+          systemInstruction ||
-      
+          "Respond naturally in plain text format. Do not use JSON or structured responses unless specifically requested.",
      const chat = geminiModel.startChat({
        history: history,
        generationConfig: {
          temperature: temperature,
          maxOutputTokens: maxTokens,
-        }
+          // Force plain text responses
-      });
+          responseMimeType: "text/plain",
        },
      })
-      // Get the last user message content
+      const chat = geminiModel.startChat({
-      let content = lastMessage.content;
+        history: history,
      })
-      // Handle multimodal content for the last message
+      let content = lastMessage.content
      // Handle multimodal content
      if (Array.isArray(content)) {
-        const geminiContent = [];
+        const geminiContent = []
        for (const part of content) {
-          if (typeof part === 'string') {
+          if (typeof part === "string") {
-            geminiContent.push(part);
+            geminiContent.push(part)
-          } else if (part.type === 'text') {
+          } else if (part.type === "text") {
-            geminiContent.push(part.text);
+            geminiContent.push(part.text)
-          } else if (part.type === 'image_url' && part.image_url) {
+          } else if (part.type === "image_url" && part.image_url) {
-            // Convert base64 image to Gemini format
+            const base64Data = part.image_url.url.split(",")[1]
            const base64Data = part.image_url.url.split(',')[1];
            geminiContent.push({
              inlineData: {
-                mimeType: 'image/png',
+                mimeType: "image/png",
-                data: base64Data
+                data: base64Data,
-              }
+              },
-            });
+            })
          }
        }
-        content = geminiContent;
+        content = geminiContent
      }
-      const result = await chat.sendMessage(content);
+      const result = await chat.sendMessage(content)
-      const response = await result.response;
+      const response = await result.response
      // Return plain text content
      return {
        content: response.text(),
-        raw: result
+        raw: result,
-      };
+      }
    },
  }
  };
 }
 /**
- * Creates a Gemini streaming LLM instance
+ * Creates a Gemini streaming LLM instance with text response fix
 * @param {object} opts - Configuration options
 * @param {string} opts.apiKey - Gemini API key
 * @param {string} [opts.model='gemini-2.5-flash'] - Model name
 * @param {number} [opts.temperature=0.7] - Temperature
 * @param {number} [opts.maxTokens=8192] - Max tokens
 * @returns {object} Streaming LLM instance
 */
-function createStreamingLLM({ apiKey, model = 'gemini-2.5-flash', temperature = 0.7, maxTokens = 8192, ...config }) {
+function createStreamingLLM({ apiKey, model = "gemini-2.5-flash", temperature = 0.7, maxTokens = 8192, ...config }) {
-  const client = new GoogleGenerativeAI(apiKey);
+  const client = new GoogleGenerativeAI(apiKey)
  return {
    streamChat: async (messages) => {
-      console.log('[Gemini Provider] Starting streaming request');
+      console.log("[Gemini Provider] Starting streaming request")
-      // Extract system instruction if present
+      let systemInstruction = ""
-      let systemInstruction = '';
+      const nonSystemMessages = []
      const nonSystemMessages = [];
      for (const msg of messages) {
-        if (msg.role === 'system') {
+        if (msg.role === "system") {
-          systemInstruction = msg.content;
+          // Clean and modify system instruction
          systemInstruction = msg.content
            .replace(/respond in json/gi, "")
            .replace(/format.*json/gi, "")
            .replace(/return.*json/gi, "")
          if (!systemInstruction.includes("respond naturally")) {
            systemInstruction += "\n\nRespond naturally in plain text, not in JSON or structured format."
          }
        } else {
-          nonSystemMessages.push(msg);
+          nonSystemMessages.push(msg)
        }
      }
      const geminiModel = client.getGenerativeModel({
        model: model,
-        systemInstruction: systemInstruction || undefined
+        systemInstruction:
-      });
+          systemInstruction ||
-      
+          "Respond naturally in plain text format. Do not use JSON or structured responses unless specifically requested.",
      const chat = geminiModel.startChat({
        history: [],
        generationConfig: {
          temperature,
          maxOutputTokens: maxTokens || 8192,
-        }
+          // Force plain text responses
-      });
+          responseMimeType: "text/plain",
        },
      })
      // Create a ReadableStream to handle Gemini's streaming
      const stream = new ReadableStream({
        async start(controller) {
          try {
-            console.log('[Gemini Provider] Processing messages:', nonSystemMessages.length, 'messages (excluding system)');
+            const lastMessage = nonSystemMessages[nonSystemMessages.length - 1]
            let geminiContent = []
            // Get the last user message
            const lastMessage = nonSystemMessages[nonSystemMessages.length - 1];
            let lastUserMessage = lastMessage.content;
            // Handle case where content might be an array (multimodal)
            if (Array.isArray(lastUserMessage)) {
              // Extract text content from array
              const textParts = lastUserMessage.filter(part => 
                typeof part === 'string' || (part && part.type === 'text')
              );
              lastUserMessage = textParts.map(part => 
                typeof part === 'string' ? part : part.text
              ).join(' ');
            }
            console.log('[Gemini Provider] Sending message to Gemini:', 
              typeof lastUserMessage === 'string' ? lastUserMessage.substring(0, 100) + '...' : 'multimodal content');
            // Prepare the message content for Gemini
            let geminiContent = [];
            // Handle multimodal content properly
            if (Array.isArray(lastMessage.content)) {
              for (const part of lastMessage.content) {
-                if (typeof part === 'string') {
+                if (typeof part === "string") {
-                  geminiContent.push(part);
+                  geminiContent.push(part)
-                } else if (part.type === 'text') {
+                } else if (part.type === "text") {
-                  geminiContent.push(part.text);
+                  geminiContent.push(part.text)
-                } else if (part.type === 'image_url' && part.image_url) {
+                } else if (part.type === "image_url" && part.image_url) {
-                  // Convert base64 image to Gemini format
+                  const base64Data = part.image_url.url.split(",")[1]
                  const base64Data = part.image_url.url.split(',')[1];
                  geminiContent.push({
                    inlineData: {
-                      mimeType: 'image/png',
+                      mimeType: "image/png",
-                      data: base64Data
+                      data: base64Data,
-                    }
+                    },
-                  });
+                  })
                }
              }
            } else {
-              geminiContent = [lastUserMessage];
+              geminiContent = [lastMessage.content]
            }
-            console.log('[Gemini Provider] Prepared Gemini content:', 
+            const contentParts = geminiContent.map((part) => {
-              geminiContent.length, 'parts');
+              if (typeof part === "string") {
-            
+                return { text: part }
            // Stream the response
            let chunkCount = 0;
            let totalContent = '';
            const contentParts = geminiContent.map(part => {
              if (typeof part === 'string') {
                return { text: part };
              } else if (part.inlineData) {
-                return { inlineData: part.inlineData };
+                return { inlineData: part.inlineData }
              }
-              return part;
+              return part
-            });
+            })
            const result = await geminiModel.generateContentStream({
-              contents: [{
+              contents: [
-                role: 'user',
+                {
-                parts: contentParts
+                  role: "user",
-              }],
+                  parts: contentParts,
-              generationConfig: {
+                },
-                temperature,
+              ],
-                maxOutputTokens: maxTokens || 8192,
+            })
              }
            });
            for await (const chunk of result.stream) {
-              chunkCount++;
+              const chunkText = chunk.text() || ""
              const chunkText = chunk.text() || '';
              totalContent += chunkText;
-              // Format as SSE data
+              // Format as SSE data - this should now be plain text
              const data = JSON.stringify({
-                choices: [{
+                choices: [
                  {
                    delta: {
-                    content: chunkText
+                      content: chunkText,
-                  }
+                    },
-                }]
+                  },
-              });
+                ],
-              controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`));
+              })
              controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`))
            }
-            console.log(`[Gemini Provider] Streamed ${chunkCount} chunks, total length: ${totalContent.length} chars`);
+            controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-            
+            controller.close()
            // Send the final done message
            controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
            controller.close();
            console.log('[Gemini Provider] Streaming completed successfully');
          } catch (error) {
-            console.error('[Gemini Provider] Streaming error:', error);
+            console.error("[Gemini Provider] Streaming error:", error)
-            controller.error(error);
+            controller.error(error)
          }
-        }
+        },
-      });
+      })
      // Create a Response object with the stream
      return new Response(stream, {
        headers: {
-          'Content-Type': 'text/event-stream',
+          "Content-Type": "text/event-stream",
-          'Cache-Control': 'no-cache',
+          "Cache-Control": "no-cache",
-          'Connection': 'keep-alive'
+          Connection: "keep-alive",
        },
      })
    },
  }
      });
    }
  };
 }
 module.exports = {
  createSTT,
  createLLM,
-  createStreamingLLM
+  createStreamingLLM,
-}; 
+}