diff --git a/src/common/ai/factory.js b/src/common/ai/factory.js
new file mode 100644
index 0000000..05a6780
--- /dev/null
+++ b/src/common/ai/factory.js
@@ -0,0 +1,67 @@
+const providers = {
+  openai: require('./providers/openai'),
+  gemini: require('./providers/gemini'),
+  // 추가 provider는 여기에 등록
+};
+
+/**
+ * Creates an STT session based on provider
+ * @param {string} provider - Provider name ('openai', 'gemini', etc.)
+ * @param {object} opts - Configuration options (apiKey, language, callbacks, etc.)
+ * @returns {Promise<object>} STT session object with sendRealtimeInput and close methods
+ */
+function createSTT(provider, opts) {
+  if (!providers[provider]?.createSTT) {
+    throw new Error(`STT not supported for provider: ${provider}`);
+  }
+  return providers[provider].createSTT(opts);
+}
+
+/**
+ * Creates an LLM instance based on provider
+ * @param {string} provider - Provider name ('openai', 'gemini', etc.)
+ * @param {object} opts - Configuration options (apiKey, model, temperature, etc.)
+ * @returns {object} LLM instance with generateContent method
+ */
+function createLLM(provider, opts) {
+  if (!providers[provider]?.createLLM) {
+    throw new Error(`LLM not supported for provider: ${provider}`);
+  }
+  return providers[provider].createLLM(opts);
+}
+
+/**
+ * Creates a streaming LLM instance based on provider
+ * @param {string} provider - Provider name ('openai', 'gemini', etc.)
+ * @param {object} opts - Configuration options (apiKey, model, temperature, etc.)
+ * @returns {object} Streaming LLM instance
+ */
+function createStreamingLLM(provider, opts) {
+  if (!providers[provider]?.createStreamingLLM) {
+    throw new Error(`Streaming LLM not supported for provider: ${provider}`);
+  }
+  return providers[provider].createStreamingLLM(opts);
+}
+
+/**
+ * Gets list of available providers
+ * @returns {object} Object with stt and llm arrays
+ */
+function getAvailableProviders() {
+  const sttProviders = [];
+  const llmProviders = [];
+  
+  for (const [name, provider] of Object.entries(providers)) {
+    if (provider.createSTT) sttProviders.push(name);
+    if (provider.createLLM) llmProviders.push(name);
+  }
+  
+  return { stt: sttProviders, llm: llmProviders };
+}
+
+module.exports = {
+  createSTT,
+  createLLM,
+  createStreamingLLM,
+  getAvailableProviders
+}; 
\ No newline at end of file
diff --git a/src/common/ai/providers/gemini.js b/src/common/ai/providers/gemini.js
new file mode 100644
index 0000000..a2284a1
--- /dev/null
+++ b/src/common/ai/providers/gemini.js
@@ -0,0 +1,310 @@
+const { GoogleGenerativeAI } = require('@google/generative-ai');
+const { GoogleGenAI } = require('@google/genai');
+
+/**
+ * Creates a Gemini STT session
+ * @param {object} opts - Configuration options
+ * @param {string} opts.apiKey - Gemini API key
+ * @param {string} [opts.language='en-US'] - Language code
+ * @param {object} [opts.callbacks] - Event callbacks
+ * @returns {Promise<object>} STT session
+ */
+async function createSTT({ apiKey, language = 'en-US', callbacks = {}, ...config }) {
+  const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
+
+  // Language code BCP-47 conversion
+  const lang = language.includes('-') ? language : `${language}-US`;
+
+  const session = await liveClient.live.connect({
+    model: 'gemini-live-2.5-flash-preview',
+    callbacks,
+    config: {
+      inputAudioTranscription: {},
+      speechConfig: { languageCode: lang },
+    },
+  });
+
+  return {
+    sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
+    close: async () => session.close(),
+  };
+}
+
+/**
+ * Creates a Gemini LLM instance
+ * @param {object} opts - Configuration options
+ * @param {string} opts.apiKey - Gemini API key
+ * @param {string} [opts.model='gemini-2.5-flash'] - Model name
+ * @param {number} [opts.temperature=0.7] - Temperature
+ * @param {number} [opts.maxTokens=8192] - Max tokens
+ * @returns {object} LLM instance
+ */
+function createLLM({ apiKey, model = 'gemini-2.5-flash', temperature = 0.7, maxTokens = 8192, ...config }) {
+  const client = new GoogleGenerativeAI(apiKey);
+  
+  return {
+    generateContent: async (parts) => {
+      const geminiModel = client.getGenerativeModel({ model: model });
+      
+      let systemPrompt = '';
+      let userContent = [];
+      
+      for (const part of parts) {
+        if (typeof part === 'string') {
+          if (systemPrompt === '' && part.includes('You are')) {
+            systemPrompt = part;
+          } else {
+            userContent.push(part);
+          }
+        } else if (part.inlineData) {
+          // Convert base64 image data to Gemini format
+          userContent.push({
+            inlineData: {
+              mimeType: part.inlineData.mimeType,
+              data: part.inlineData.data
+            }
+          });
+        }
+      }
+      
+      // Prepare content array
+      const content = [];
+      
+      // Add system instruction if present
+      if (systemPrompt) {
+        // For Gemini, we'll prepend system prompt to user content
+        content.push(systemPrompt + '\n\n' + userContent[0]);
+        content.push(...userContent.slice(1));
+      } else {
+        content.push(...userContent);
+      }
+      
+      try {
+        const result = await geminiModel.generateContent(content);
+        const response = await result.response;
+        
+        return {
+          response: {
+            text: () => response.text()
+          }
+        };
+      } catch (error) {
+        console.error('Gemini API error:', error);
+        throw error;
+      }
+    },
+    
+    // For compatibility with chat-style interfaces
+    chat: async (messages) => {
+      // Extract system instruction if present
+      let systemInstruction = '';
+      const history = [];
+      let lastMessage;
+
+      messages.forEach((msg, index) => {
+        if (msg.role === 'system') {
+          systemInstruction = msg.content;
+          return;
+        }
+        
+        // Gemini's history format
+        const role = msg.role === 'user' ? 'user' : 'model';
+
+        if (index === messages.length - 1) {
+            lastMessage = msg;
+        } else {
+            history.push({ role, parts: [{ text: msg.content }] });
+        }
+      });
+      
+      const geminiModel = client.getGenerativeModel({ 
+        model: model,
+        systemInstruction: systemInstruction
+      });
+      
+      const chat = geminiModel.startChat({
+        history: history,
+        generationConfig: {
+          temperature: temperature,
+          maxOutputTokens: maxTokens,
+        }
+      });
+      
+      // Get the last user message content
+      let content = lastMessage.content;
+      
+      // Handle multimodal content for the last message
+      if (Array.isArray(content)) {
+        const geminiContent = [];
+        for (const part of content) {
+          if (typeof part === 'string') {
+            geminiContent.push(part);
+          } else if (part.type === 'text') {
+            geminiContent.push(part.text);
+          } else if (part.type === 'image_url' && part.image_url) {
+            // Convert base64 image to Gemini format
+            const base64Data = part.image_url.url.split(',')[1];
+            geminiContent.push({
+              inlineData: {
+                mimeType: 'image/png',
+                data: base64Data
+              }
+            });
+          }
+        }
+        content = geminiContent;
+      }
+      
+      const result = await chat.sendMessage(content);
+      const response = await result.response;
+      return {
+        content: response.text(),
+        raw: result
+      };
+    }
+  };
+}
+
+/**
+ * Creates a Gemini streaming LLM instance
+ * @param {object} opts - Configuration options
+ * @param {string} opts.apiKey - Gemini API key
+ * @param {string} [opts.model='gemini-2.5-flash'] - Model name
+ * @param {number} [opts.temperature=0.7] - Temperature
+ * @param {number} [opts.maxTokens=8192] - Max tokens
+ * @returns {object} Streaming LLM instance
+ */
+function createStreamingLLM({ apiKey, model = 'gemini-2.5-flash', temperature = 0.7, maxTokens = 8192, ...config }) {
+  const client = new GoogleGenerativeAI(apiKey);
+  
+  return {
+    streamChat: async (messages) => {
+      console.log('[Gemini Provider] Starting streaming request');
+      
+      // Extract system instruction if present
+      let systemInstruction = '';
+      const nonSystemMessages = [];
+      
+      for (const msg of messages) {
+        if (msg.role === 'system') {
+          systemInstruction = msg.content;
+        } else {
+          nonSystemMessages.push(msg);
+        }
+      }
+      
+      const geminiModel = client.getGenerativeModel({ 
+        model: model,
+        systemInstruction: systemInstruction || undefined
+      });
+      
+      const chat = geminiModel.startChat({
+        history: [],
+        generationConfig: {
+          temperature,
+          maxOutputTokens: maxTokens || 8192,
+        }
+      });
+      
+      // Create a ReadableStream to handle Gemini's streaming
+      const stream = new ReadableStream({
+        async start(controller) {
+          try {
+            console.log('[Gemini Provider] Processing messages:', nonSystemMessages.length, 'messages (excluding system)');
+            
+            // Get the last user message
+            const lastMessage = nonSystemMessages[nonSystemMessages.length - 1];
+            let lastUserMessage = lastMessage.content;
+            
+            // Handle case where content might be an array (multimodal)
+            if (Array.isArray(lastUserMessage)) {
+              // Extract text content from array
+              const textParts = lastUserMessage.filter(part => 
+                typeof part === 'string' || (part && part.type === 'text')
+              );
+              lastUserMessage = textParts.map(part => 
+                typeof part === 'string' ? part : part.text
+              ).join(' ');
+            }
+            
+            console.log('[Gemini Provider] Sending message to Gemini:', 
+              typeof lastUserMessage === 'string' ? lastUserMessage.substring(0, 100) + '...' : 'multimodal content');
+            
+            // Prepare the message content for Gemini
+            let geminiContent = [];
+            
+            // Handle multimodal content properly
+            if (Array.isArray(lastMessage.content)) {
+              for (const part of lastMessage.content) {
+                if (typeof part === 'string') {
+                  geminiContent.push(part);
+                } else if (part.type === 'text') {
+                  geminiContent.push(part.text);
+                } else if (part.type === 'image_url' && part.image_url) {
+                  // Convert base64 image to Gemini format
+                  const base64Data = part.image_url.url.split(',')[1];
+                  geminiContent.push({
+                    inlineData: {
+                      mimeType: 'image/png',
+                      data: base64Data
+                    }
+                  });
+                }
+              }
+            } else {
+              geminiContent = [lastUserMessage];
+            }
+            
+            console.log('[Gemini Provider] Prepared Gemini content:', 
+              geminiContent.length, 'parts');
+            
+            // Stream the response
+            let chunkCount = 0;
+            let totalContent = '';
+            
+            for await (const chunk of chat.sendMessageStream(geminiContent)) {
+              chunkCount++;
+              const chunkText = chunk.text() || '';
+              totalContent += chunkText;
+              
+              // Format as SSE data
+              const data = JSON.stringify({
+                choices: [{
+                  delta: {
+                    content: chunkText
+                  }
+                }]
+              });
+              controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`));
+            }
+            
+            console.log(`[Gemini Provider] Streamed ${chunkCount} chunks, total length: ${totalContent.length} chars`);
+            
+            // Send the final done message
+            controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
+            controller.close();
+            console.log('[Gemini Provider] Streaming completed successfully');
+          } catch (error) {
+            console.error('[Gemini Provider] Streaming error:', error);
+            controller.error(error);
+          }
+        }
+      });
+      
+      // Create a Response object with the stream
+      return new Response(stream, {
+        headers: {
+          'Content-Type': 'text/event-stream',
+          'Cache-Control': 'no-cache',
+          'Connection': 'keep-alive'
+        }
+      });
+    }
+  };
+}
+
+module.exports = {
+  createSTT,
+  createLLM,
+  createStreamingLLM
+}; 
\ No newline at end of file
diff --git a/src/common/ai/providers/openai.js b/src/common/ai/providers/openai.js
new file mode 100644
index 0000000..a27c547
--- /dev/null
+++ b/src/common/ai/providers/openai.js
@@ -0,0 +1,255 @@
+const OpenAI = require('openai');
+const WebSocket = require('ws');
+
+/**
+ * Creates an OpenAI STT session
+ * @param {object} opts - Configuration options
+ * @param {string} opts.apiKey - OpenAI API key
+ * @param {string} [opts.language='en'] - Language code
+ * @param {object} [opts.callbacks] - Event callbacks
+ * @param {boolean} [opts.usePortkey=false] - Whether to use Portkey
+ * @param {string} [opts.portkeyVirtualKey] - Portkey virtual key
+ * @returns {Promise<object>} STT session
+ */
+async function createSTT({ apiKey, language = 'en', callbacks = {}, usePortkey = false, portkeyVirtualKey, ...config }) {
+  const keyType = usePortkey ? 'vKey' : 'apiKey';
+  const key = usePortkey ? (portkeyVirtualKey || apiKey) : apiKey;
+
+  const wsUrl = keyType === 'apiKey'
+    ? 'wss://api.openai.com/v1/realtime?intent=transcription'
+    : 'wss://api.portkey.ai/v1/realtime?intent=transcription';
+
+  const headers = keyType === 'apiKey'
+    ? {
+        'Authorization': `Bearer ${key}`,
+        'OpenAI-Beta': 'realtime=v1',
+      }
+    : {
+        'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
+        'x-portkey-virtual-key': key,
+        'OpenAI-Beta': 'realtime=v1',
+      };
+
+  const ws = new WebSocket(wsUrl, { headers });
+
+  return new Promise((resolve, reject) => {
+    ws.onopen = () => {
+      console.log("WebSocket session opened.");
+
+      const sessionConfig = {
+        type: 'transcription_session.update',
+        session: {
+          input_audio_format: 'pcm16',
+          input_audio_transcription: {
+            model: 'gpt-4o-mini-transcribe',
+            prompt: config.prompt || '',
+            language: language || 'en'
+          },
+          turn_detection: {
+            type: 'server_vad',
+            threshold: 0.5,
+            prefix_padding_ms: 50,
+            silence_duration_ms: 25,
+          },
+          input_audio_noise_reduction: {
+            type: 'near_field'
+          }
+        }
+      };
+      
+      ws.send(JSON.stringify(sessionConfig));
+      
+      resolve({
+        sendRealtimeInput: (audioData) => {
+          if (ws.readyState === WebSocket.OPEN) {
+            const message = {
+              type: 'input_audio_buffer.append',
+              audio: audioData
+            };
+            ws.send(JSON.stringify(message));
+          }
+        },
+        close: () => {
+          if (ws.readyState === WebSocket.OPEN) {
+            ws.send(JSON.stringify({ type: 'session.close' }));
+            ws.close(1000, 'Client initiated close.');
+          }
+        }
+      });
+    };
+
+    ws.onmessage = (event) => {
+      const message = JSON.parse(event.data);
+      if (callbacks && callbacks.onmessage) {
+        callbacks.onmessage(message);
+      }
+    };
+
+    ws.onerror = (error) => {
+      console.error('WebSocket error:', error.message);
+      if (callbacks && callbacks.onerror) {
+        callbacks.onerror(error);
+      }
+      reject(error);
+    };
+
+    ws.onclose = (event) => {
+      console.log(`WebSocket closed: ${event.code} ${event.reason}`);
+      if (callbacks && callbacks.onclose) {
+        callbacks.onclose(event);
+      }
+    };
+  });
+}
+
+/**
+ * Creates an OpenAI LLM instance
+ * @param {object} opts - Configuration options
+ * @param {string} opts.apiKey - OpenAI API key
+ * @param {string} [opts.model='gpt-4.1'] - Model name
+ * @param {number} [opts.temperature=0.7] - Temperature
+ * @param {number} [opts.maxTokens=2048] - Max tokens
+ * @param {boolean} [opts.usePortkey=false] - Whether to use Portkey
+ * @param {string} [opts.portkeyVirtualKey] - Portkey virtual key
+ * @returns {object} LLM instance
+ */
+function createLLM({ apiKey, model = 'gpt-4.1', temperature = 0.7, maxTokens = 2048, usePortkey = false, portkeyVirtualKey, ...config }) {
+  const client = new OpenAI({ apiKey });
+  
+  const callApi = async (messages) => {
+    if (!usePortkey) {
+      const response = await client.chat.completions.create({
+        model: model,
+        messages: messages,
+        temperature: temperature,
+        max_tokens: maxTokens
+      });
+      return {
+        content: response.choices[0].message.content.trim(),
+        raw: response
+      };
+    } else {
+      const fetchUrl = 'https://api.portkey.ai/v1/chat/completions';
+      const response = await fetch(fetchUrl, {
+        method: 'POST',
+        headers: {
+            'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
+            'x-portkey-virtual-key': portkeyVirtualKey || apiKey,
+            'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({
+            model: model,
+            messages,
+            temperature,
+            max_tokens: maxTokens,
+        }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`Portkey API error: ${response.status} ${response.statusText}`);
+      }
+
+      const result = await response.json();
+      return {
+        content: result.choices[0].message.content.trim(),
+        raw: result
+      };
+    }
+  };
+
+  return {
+    generateContent: async (parts) => {
+      const messages = [];
+      let systemPrompt = '';
+      let userContent = [];
+      
+      for (const part of parts) {
+        if (typeof part === 'string') {
+          if (systemPrompt === '' && part.includes('You are')) {
+            systemPrompt = part;
+          } else {
+            userContent.push({ type: 'text', text: part });
+          }
+        } else if (part.inlineData) {
+          userContent.push({
+            type: 'image_url',
+            image_url: { url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}` }
+          });
+        }
+      }
+      
+      if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
+      if (userContent.length > 0) messages.push({ role: 'user', content: userContent });
+      
+      const result = await callApi(messages);
+
+      return {
+        response: {
+          text: () => result.content
+        },
+        raw: result.raw
+      };
+    },
+    
+    // For compatibility with chat-style interfaces
+    chat: async (messages) => {
+      return await callApi(messages);
+    }
+  };
+}
+
+/**
+ * Creates an OpenAI streaming LLM instance
+ * @param {object} opts - Configuration options
+ * @param {string} opts.apiKey - OpenAI API key
+ * @param {string} [opts.model='gpt-4.1'] - Model name
+ * @param {number} [opts.temperature=0.7] - Temperature
+ * @param {number} [opts.maxTokens=2048] - Max tokens
+ * @param {boolean} [opts.usePortkey=false] - Whether to use Portkey
+ * @param {string} [opts.portkeyVirtualKey] - Portkey virtual key
+ * @returns {object} Streaming LLM instance
+ */
+function createStreamingLLM({ apiKey, model = 'gpt-4.1', temperature = 0.7, maxTokens = 2048, usePortkey = false, portkeyVirtualKey, ...config }) {
+  return {
+    streamChat: async (messages) => {
+      const fetchUrl = usePortkey 
+        ? 'https://api.portkey.ai/v1/chat/completions'
+        : 'https://api.openai.com/v1/chat/completions';
+      
+      const headers = usePortkey
+        ? {
+            'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
+            'x-portkey-virtual-key': portkeyVirtualKey || apiKey,
+            'Content-Type': 'application/json',
+          }
+        : {
+            Authorization: `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          };
+
+      const response = await fetch(fetchUrl, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify({
+          model: model,
+          messages,
+          temperature,
+          max_tokens: maxTokens,
+          stream: true,
+        }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
+      }
+
+      return response;
+    }
+  };
+}
+
+module.exports = {
+  createSTT,
+  createLLM,
+  createStreamingLLM
+}; 
\ No newline at end of file
diff --git a/src/common/services/aiProviderService.js b/src/common/services/aiProviderService.js
deleted file mode 100644
index fe24e47..0000000
--- a/src/common/services/aiProviderService.js
+++ /dev/null
@@ -1,377 +0,0 @@
-const { createOpenAiGenerativeClient, getOpenAiGenerativeModel } = require('./openAiClient.js');
-const { createGeminiClient, getGeminiGenerativeModel, createGeminiChat } = require('./googleGeminiClient.js');
-
-/**
- * Creates an AI client based on the provider
- * @param {string} apiKey - The API key
- * @param {string} provider - The provider ('openai' or 'gemini')
- * @returns {object} The AI client
- */
-function createAIClient(apiKey, provider = 'openai') {
-    switch (provider) {
-        case 'openai':
-            return createOpenAiGenerativeClient(apiKey);
-        case 'gemini':
-            return createGeminiClient(apiKey);
-        default:
-            throw new Error(`Unsupported AI provider: ${provider}`);
-    }
-}
-
-/**
- * Gets a generative model based on the provider
- * @param {object} client - The AI client
- * @param {string} provider - The provider ('openai' or 'gemini')
- * @param {string} model - The model name (optional)
- * @returns {object} The model object
- */
-function getGenerativeModel(client, provider = 'openai', model) {
-    switch (provider) {
-        case 'openai':
-            return getOpenAiGenerativeModel(client, model || 'gpt-4.1');
-        case 'gemini':
-            return getGeminiGenerativeModel(client, model || 'gemini-2.5-flash');
-        default:
-            throw new Error(`Unsupported AI provider: ${provider}`);
-    }
-}
-
-/**
- * Makes a chat completion request based on the provider
- * @param {object} params - Request parameters
- * @returns {Promise<object>} The completion response
- */
-async function makeChatCompletion({ apiKey, provider = 'openai', messages, temperature = 0.7, maxTokens = 1024, model, stream = false }) {
-    if (provider === 'openai') {
-        const fetchUrl = 'https://api.openai.com/v1/chat/completions';
-        const response = await fetch(fetchUrl, {
-            method: 'POST',
-            headers: {
-                Authorization: `Bearer ${apiKey}`,
-                'Content-Type': 'application/json',
-            },
-            body: JSON.stringify({
-                model: model || 'gpt-4.1',
-                messages,
-                temperature,
-                max_tokens: maxTokens,
-                stream,
-            }),
-        });
-
-        if (!response.ok) {
-            throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
-        }
-
-        if (stream) {
-            return response;
-        }
-
-        const result = await response.json();
-        return {
-            content: result.choices[0].message.content.trim(),
-            raw: result
-        };
-    } else if (provider === 'gemini') {
-        const client = createGeminiClient(apiKey);
-        const genModel = getGeminiGenerativeModel(client, model || 'gemini-2.5-flash');
-        
-        // Convert OpenAI format messages to Gemini format
-        const parts = [];
-        for (const message of messages) {
-            if (message.role === 'system') {
-                parts.push(message.content);
-            } else if (message.role === 'user') {
-                if (typeof message.content === 'string') {
-                    parts.push(message.content);
-                } else if (Array.isArray(message.content)) {
-                    // Handle multimodal content
-                    for (const part of message.content) {
-                        if (part.type === 'text') {
-                            parts.push(part.text);
-                        } else if (part.type === 'image_url' && part.image_url?.url) {
-                            // Extract base64 data from data URL
-                            const base64Match = part.image_url.url.match(/^data:(.+);base64,(.+)$/);
-                            if (base64Match) {
-                                parts.push({
-                                    inlineData: {
-                                        mimeType: base64Match[1],
-                                        data: base64Match[2]
-                                    }
-                                });
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        
-        const result = await genModel.generateContent(parts);
-        return {
-            content: result.response.text(),
-            raw: result
-        };
-    } else {
-        throw new Error(`Unsupported AI provider: ${provider}`);
-    }
-}
-
-/**
- * Makes a chat completion request with Portkey support
- * @param {object} params - Request parameters including Portkey options
- * @returns {Promise<object>} The completion response
- */
-async function makeChatCompletionWithPortkey({ 
-    apiKey, 
-    provider = 'openai', 
-    messages, 
-    temperature = 0.7, 
-    maxTokens = 1024, 
-    model, 
-    usePortkey = false,
-    portkeyVirtualKey = null 
-}) {
-    if (!usePortkey) {
-        return makeChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
-    }
-    
-    // Portkey is only supported for OpenAI currently
-    if (provider !== 'openai') {
-        console.warn('Portkey is only supported for OpenAI provider, falling back to direct API');
-        return makeChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
-    }
-    
-    const fetchUrl = 'https://api.portkey.ai/v1/chat/completions';
-    const response = await fetch(fetchUrl, {
-        method: 'POST',
-        headers: {
-            'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
-            'x-portkey-virtual-key': portkeyVirtualKey || apiKey,
-            'Content-Type': 'application/json',
-        },
-        body: JSON.stringify({
-            model: model || 'gpt-4.1',
-            messages,
-            temperature,
-            max_tokens: maxTokens,
-        }),
-    });
-
-    if (!response.ok) {
-        throw new Error(`Portkey API error: ${response.status} ${response.statusText}`);
-    }
-
-    const result = await response.json();
-    return {
-        content: result.choices[0].message.content.trim(),
-        raw: result
-    };
-}
-
-/**
- * Makes a streaming chat completion request
- * @param {object} params - Request parameters
- * @returns {Promise<Response>} The streaming response
- */
-async function makeStreamingChatCompletion({ apiKey, provider = 'openai', messages, temperature = 0.7, maxTokens = 1024, model }) {
-    if (provider === 'openai') {
-        const fetchUrl = 'https://api.openai.com/v1/chat/completions';
-        const response = await fetch(fetchUrl, {
-            method: 'POST',
-            headers: {
-                Authorization: `Bearer ${apiKey}`,
-                'Content-Type': 'application/json',
-            },
-            body: JSON.stringify({
-                model: model || 'gpt-4.1',
-                messages,
-                temperature,
-                max_tokens: maxTokens,
-                stream: true,
-            }),
-        });
-
-        if (!response.ok) {
-            throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
-        }
-
-        return response;
-    } else if (provider === 'gemini') {
-        console.log('[AIProviderService] Starting Gemini streaming request');
-        // Gemini streaming requires a different approach
-        // We'll create a ReadableStream that mimics OpenAI's SSE format
-        const geminiClient = createGeminiClient(apiKey);
-        
-        // Extract system instruction if present
-        let systemInstruction = '';
-        const nonSystemMessages = [];
-        
-        for (const msg of messages) {
-            if (msg.role === 'system') {
-                systemInstruction = msg.content;
-            } else {
-                nonSystemMessages.push(msg);
-            }
-        }
-        
-        const chat = createGeminiChat(geminiClient, model || 'gemini-2.0-flash-exp', {
-            temperature,
-            maxOutputTokens: maxTokens || 8192,
-            systemInstruction: systemInstruction || undefined
-        });
-        
-        // Create a ReadableStream to handle Gemini's streaming
-        const stream = new ReadableStream({
-            async start(controller) {
-                try {
-                    console.log('[AIProviderService] Processing messages for Gemini:', nonSystemMessages.length, 'messages (excluding system)');
-                    
-                    // Get the last user message
-                    const lastMessage = nonSystemMessages[nonSystemMessages.length - 1];
-                    let lastUserMessage = lastMessage.content;
-                    
-                    // Handle case where content might be an array (multimodal)
-                    if (Array.isArray(lastUserMessage)) {
-                        // Extract text content from array
-                        const textParts = lastUserMessage.filter(part => 
-                            typeof part === 'string' || (part && part.type === 'text')
-                        );
-                        lastUserMessage = textParts.map(part => 
-                            typeof part === 'string' ? part : part.text
-                        ).join(' ');
-                    }
-                    
-                    console.log('[AIProviderService] Sending message to Gemini:', 
-                        typeof lastUserMessage === 'string' ? lastUserMessage.substring(0, 100) + '...' : 'multimodal content');
-                    
-                    // Prepare the message content for Gemini
-                    let geminiContent = [];
-                    
-                    // Handle multimodal content properly
-                    if (Array.isArray(lastMessage.content)) {
-                        for (const part of lastMessage.content) {
-                            if (typeof part === 'string') {
-                                geminiContent.push(part);
-                            } else if (part.type === 'text') {
-                                geminiContent.push(part.text);
-                            } else if (part.type === 'image_url' && part.image_url) {
-                                // Convert base64 image to Gemini format
-                                const base64Data = part.image_url.url.split(',')[1];
-                                geminiContent.push({
-                                    inlineData: {
-                                        mimeType: 'image/png',
-                                        data: base64Data
-                                    }
-                                });
-                            }
-                        }
-                    } else {
-                        geminiContent = [lastUserMessage];
-                    }
-                    
-                    console.log('[AIProviderService] Prepared Gemini content:', 
-                        geminiContent.length, 'parts');
-                    
-                    // Stream the response
-                    let chunkCount = 0;
-                    let totalContent = '';
-                    
-                    for await (const chunk of chat.sendMessageStream(geminiContent)) {
-                        chunkCount++;
-                        const chunkText = chunk.text || '';
-                        totalContent += chunkText;
-                        
-                        // Format as SSE data
-                        const data = JSON.stringify({
-                            choices: [{
-                                delta: {
-                                    content: chunkText
-                                }
-                            }]
-                        });
-                        controller.enqueue(new TextEncoder().encode(`data: ${data}\n\n`));
-                    }
-                    
-                    console.log(`[AIProviderService] Streamed ${chunkCount} chunks, total length: ${totalContent.length} chars`);
-                    
-                    // Send the final done message
-                    controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
-                    controller.close();
-                    console.log('[AIProviderService] Gemini streaming completed successfully');
-                } catch (error) {
-                    console.error('[AIProviderService] Gemini streaming error:', error);
-                    controller.error(error);
-                }
-            }
-        });
-        
-        // Create a Response object with the stream
-        return new Response(stream, {
-            headers: {
-                'Content-Type': 'text/event-stream',
-                'Cache-Control': 'no-cache',
-                'Connection': 'keep-alive'
-            }
-        });
-    } else {
-        throw new Error(`Unsupported AI provider: ${provider}`);
-    }
-}
-
-/**
- * Makes a streaming chat completion request with Portkey support
- * @param {object} params - Request parameters
- * @returns {Promise<Response>} The streaming response
- */
-async function makeStreamingChatCompletionWithPortkey({ 
-    apiKey, 
-    provider = 'openai', 
-    messages, 
-    temperature = 0.7, 
-    maxTokens = 1024, 
-    model, 
-    usePortkey = false,
-    portkeyVirtualKey = null 
-}) {
-    if (!usePortkey) {
-        return makeStreamingChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
-    }
-    
-    // Portkey is only supported for OpenAI currently
-    if (provider !== 'openai') {
-        console.warn('Portkey is only supported for OpenAI provider, falling back to direct API');
-        return makeStreamingChatCompletion({ apiKey, provider, messages, temperature, maxTokens, model });
-    }
-    
-    const fetchUrl = 'https://api.portkey.ai/v1/chat/completions';
-    const response = await fetch(fetchUrl, {
-        method: 'POST',
-        headers: {
-            'x-portkey-api-key': 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
-            'x-portkey-virtual-key': portkeyVirtualKey || apiKey,
-            'Content-Type': 'application/json',
-        },
-        body: JSON.stringify({
-            model: model || 'gpt-4.1',
-            messages,
-            temperature,
-            max_tokens: maxTokens,
-            stream: true,
-        }),
-    });
-
-    if (!response.ok) {
-        throw new Error(`Portkey API error: ${response.status} ${response.statusText}`);
-    }
-
-    return response;
-}
-
-module.exports = {
-    createAIClient,
-    getGenerativeModel,
-    makeChatCompletion,
-    makeChatCompletionWithPortkey,
-    makeStreamingChatCompletion,
-    makeStreamingChatCompletionWithPortkey
-};
\ No newline at end of file
diff --git a/src/common/services/googleGeminiClient.js b/src/common/services/googleGeminiClient.js
deleted file mode 100644
index 877c82e..0000000
--- a/src/common/services/googleGeminiClient.js
+++ /dev/null
@@ -1,171 +0,0 @@
-const { GoogleGenerativeAI } = require('@google/generative-ai');
-const { GoogleGenAI } = require('@google/genai');
-
-/**
- * Creates and returns a Google Gemini client instance for generative AI.
- * @param {string} apiKey - The API key for authentication.
- * @returns {GoogleGenerativeAI} The initialized Gemini client.
- */
-function createGeminiClient(apiKey) {
-    return new GoogleGenerativeAI(apiKey);
-}
-
-/**
- * Gets a Gemini model for text/image generation.
- * @param {GoogleGenerativeAI} client - The Gemini client instance.
- * @param {string} [model='gemini-2.5-flash'] - The name for the text/vision model.
- * @returns {object} Model object with generateContent method
- */
-function getGeminiGenerativeModel(client, model = 'gemini-2.5-flash') {
-    const genAI = client;
-    const geminiModel = genAI.getGenerativeModel({ model: model });
-    
-    return {
-        generateContent: async (parts) => {
-            let systemPrompt = '';
-            let userContent = [];
-            
-            for (const part of parts) {
-                if (typeof part === 'string') {
-                    if (systemPrompt === '' && part.includes('You are')) {
-                        systemPrompt = part;
-                    } else {
-                        userContent.push(part);
-                    }
-                } else if (part.inlineData) {
-                    // Convert base64 image data to Gemini format
-                    userContent.push({
-                        inlineData: {
-                            mimeType: part.inlineData.mimeType,
-                            data: part.inlineData.data
-                        }
-                    });
-                }
-            }
-            
-            // Prepare content array
-            const content = [];
-            
-            // Add system instruction if present
-            if (systemPrompt) {
-                // For Gemini, we'll prepend system prompt to user content
-                content.push(systemPrompt + '\n\n' + userContent[0]);
-                content.push(...userContent.slice(1));
-            } else {
-                content.push(...userContent);
-            }
-            
-            try {
-                const result = await geminiModel.generateContent(content);
-                const response = await result.response;
-                
-                return {
-                    response: {
-                        text: () => response.text()
-                    }
-                };
-            } catch (error) {
-                console.error('Gemini API error:', error);
-                throw error;
-            }
-        }
-    };
-}
-
-/**
- * Creates a Gemini chat session for multi-turn conversations.
- * @param {GoogleGenerativeAI} client - The Gemini client instance.
- * @param {string} [model='gemini-2.5-flash'] - The model to use.
- * @param {object} [config={}] - Configuration options.
- * @returns {object} Chat session object
- */
-function createGeminiChat(client, model = 'gemini-2.5-flash', config = {}) {
-    const genAI = client;
-    const geminiModel = genAI.getGenerativeModel({ 
-        model: model,
-        systemInstruction: config.systemInstruction
-    });
-    
-    const chat = geminiModel.startChat({
-        history: config.history || [],
-        generationConfig: {
-            temperature: config.temperature || 0.7,
-            maxOutputTokens: config.maxOutputTokens || 8192,
-        }
-    });
-    
-    return {
-        sendMessage: async (message) => {
-            const result = await chat.sendMessage(message);
-            const response = await result.response;
-            return {
-                text: response.text()
-            };
-        },
-        sendMessageStream: async function* (message) {
-            const result = await chat.sendMessageStream(message);
-            for await (const chunk of result.stream) {
-                yield {
-                    text: chunk.text()
-                };
-            }
-        },
-        getHistory: () => chat.getHistory()
-    };
-}
-
-// async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
-//         const liveClient = new GoogleGenAI({
-//                 vertexai: false,   // Vertex AI 사용 안함
-//                 apiKey,
-//             });
-        
-//             // 라이브 STT 세션 열기
-//             const session = await liveClient.live.connect({
-//                 model: 'gemini-live-2.5-flash-preview',
-//                 callbacks,
-//                 config: {
-//                     inputAudioTranscription: {},                 // 실시간 STT 필수
-//                     speechConfig: { languageCode: language },
-//                 },
-//             });
-
-//         return {
-//             sendRealtimeInput: async data => session.send({
-//                 audio: { data, mimeType: 'audio/pcm;rate=24000' }
-//             }),
-//             close: async () => session.close(),
-//         };
-// }
-
-async function connectToGeminiSession(apiKey, { language = 'en-US', callbacks = {} } = {}) {
-        // ① 옛날 스타일 helper 재사용
-        const liveClient = new GoogleGenAI({ vertexai: false, apiKey });
-    
-        // ② 언어 코드 강제 BCP-47 변환
-        const lang = language.includes('-') ? language : `${language}-US`;
-    
-        const session = await liveClient.live.connect({
-            model: 'gemini-live-2.5-flash-preview',
-            callbacks,
-            config: {
-                inputAudioTranscription: {},
-                speechConfig: { languageCode: lang },
-            },
-        });
-    
-        // ③ SDK 0.5+ : sendRealtimeInput 가 정식 이름
-        return {
-            sendRealtimeInput: async payload => session.sendRealtimeInput(payload),
-            close: async () => session.close(),
-        };
-     }
-
-
-
-module.exports = {
-    createGeminiClient,
-    getGeminiGenerativeModel,
-    createGeminiChat,
-    connectToGeminiSession,
-};
\ No newline at end of file
diff --git a/src/common/services/openAiClient.js b/src/common/services/openAiClient.js
deleted file mode 100644
index 56a9141..0000000
--- a/src/common/services/openAiClient.js
+++ /dev/null
@@ -1,177 +0,0 @@
-const OpenAI = require('openai');
-const WebSocket = require('ws');
-
-/**
- * Creates and returns an OpenAI client instance for STT (Speech-to-Text).
- * @param {string} apiKey - The API key for authentication.
- * @returns {OpenAI} The initialized OpenAI client.
- */
-function createOpenAiClient(apiKey) {
-    return new OpenAI({
-        apiKey: apiKey,
-    });
-}
-
-/**
- * Creates and returns an OpenAI client instance for text/image generation.
- * @param {string} apiKey - The API key for authentication.
- * @returns {OpenAI} The initialized OpenAI client.
- */
-function createOpenAiGenerativeClient(apiKey) {
-    return new OpenAI({
-        apiKey: apiKey,
-    });
-}
-
-/**
- * Connects to an OpenAI Realtime WebSocket session for STT.
- * @param {string} key     - Portkey vKey  or  OpenAI apiKey.
- * @param {object} config - The configuration object for the realtime session.
- * @param {'apiKey'|'vKey'} keyType -   key type ('apiKey' | 'vKey').
- * @returns {Promise<object>} A promise that resolves to the session object with send and close methods.
- */
-async function connectToOpenAiSession(key, config, keyType) {
-    if (keyType !== 'apiKey' && keyType !== 'vKey') {
-        throw new Error('keyType must be either "apiKey" or "vKey".');
-    }
-
-    const wsUrl = keyType === 'apiKey'
-        ? 'wss://api.openai.com/v1/realtime?intent=transcription'
-        : 'wss://api.portkey.ai/v1/realtime?intent=transcription';
-
-    const headers = keyType === 'apiKey'
-        ? {
-            'Authorization': `Bearer ${key}`,
-            'OpenAI-Beta' : 'realtime=v1',
-          }
-        : {
-            'x-portkey-api-key'   : 'gRv2UGRMq6GGLJ8aVEB4e7adIewu',
-            'x-portkey-virtual-key': key,
-            'OpenAI-Beta'         : 'realtime=v1',
-          };
-
-    const ws = new WebSocket(wsUrl, { headers });
-
-    return new Promise((resolve, reject) => {
-        ws.onopen = () => {
-            console.log("WebSocket session opened.");
-
-            const sessionConfig = {
-                type: 'transcription_session.update',
-                session: {
-                    input_audio_format: 'pcm16',
-                    input_audio_transcription: {
-                        model: 'gpt-4o-mini-transcribe',
-                        prompt: config.prompt || '',
-                        language: config.language || 'en'
-                    },
-                    turn_detection: {
-                        type: 'server_vad',
-                        threshold: 0.5,
-                        prefix_padding_ms: 50,
-                        silence_duration_ms: 25,
-                    },
-                    input_audio_noise_reduction: {
-                        type: 'near_field'
-                    }
-                }
-            };
-            
-            ws.send(JSON.stringify(sessionConfig));
-            
-            resolve({
-                sendRealtimeInput: (audioData) => {
-                    if (ws.readyState === WebSocket.OPEN) {
-                        const message = {
-                            type: 'input_audio_buffer.append',
-                            audio: audioData
-                        };
-                        ws.send(JSON.stringify(message));
-                    }
-                },
-                close: () => {
-                    if (ws.readyState === WebSocket.OPEN) {
-                        ws.send(JSON.stringify({ type: 'session.close' }));
-                        ws.close(1000, 'Client initiated close.');
-                    }
-                }
-            });
-        };
-
-        ws.onmessage = (event) => {
-            const message = JSON.parse(event.data);
-            if (config.callbacks && config.callbacks.onmessage) {
-                config.callbacks.onmessage(message);
-            }
-        };
-
-        ws.onerror = (error) => {
-            console.error('WebSocket error:', error.message);
-            if (config.callbacks && config.callbacks.onerror) {
-                config.callbacks.onerror(error);
-            }
-            reject(error);
-        };
-
-        ws.onclose = (event) => {
-            console.log(`WebSocket closed: ${event.code} ${event.reason}`);
-            if (config.callbacks && config.callbacks.onclose) {
-                config.callbacks.onclose(event);
-            }
-        };
-    });
-}
-
-/**
- * Gets a GPT model for text/image generation.
- * @param {OpenAI} client - The OpenAI client instance.
- * @param {string} [model='gpt-4.1'] - The name for the text/vision model.
- * @returns {object} Model object with generateContent method
- */
-function getOpenAiGenerativeModel(client, model = 'gpt-4.1') {
-    return {
-        generateContent: async (parts) => {
-            const messages = [];
-            let systemPrompt = '';
-            let userContent = [];
-            
-            for (const part of parts) {
-                if (typeof part === 'string') {
-                    if (systemPrompt === '' && part.includes('You are')) {
-                        systemPrompt = part;
-                    } else {
-                        userContent.push({ type: 'text', text: part });
-                    }
-                } else if (part.inlineData) {
-                    userContent.push({
-                        type: 'image_url',
-                        image_url: { url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}` }
-                    });
-                }
-            }
-            
-            if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
-            if (userContent.length > 0) messages.push({ role: 'user', content: userContent });
-            
-            const response = await client.chat.completions.create({
-                model: model,
-                messages: messages,
-                temperature: 0.7,
-                max_tokens: 2048
-            });
-            
-            return {
-                response: {
-                    text: () => response.choices[0].message.content
-                }
-            };
-        }
-    };
-}
-
-module.exports = {
-    createOpenAiClient,
-    connectToOpenAiSession,
-    createOpenAiGenerativeClient,
-    getOpenAiGenerativeModel,
-};
\ No newline at end of file
diff --git a/src/features/ask/askService.js b/src/features/ask/askService.js
index d07d79d..03e4ed0 100644
--- a/src/features/ask/askService.js
+++ b/src/features/ask/askService.js
@@ -1,172 +1,10 @@
 const { ipcMain, BrowserWindow } = require('electron');
-const { makeStreamingChatCompletionWithPortkey } = require('../../common/services/aiProviderService');
+const { createStreamingLLM } = require('../../common/ai/factory');
 const { getStoredApiKey, getStoredProvider, windowPool, captureScreenshot } = require('../../electron/windowManager');
 const authService = require('../../common/services/authService');
 const sessionRepository = require('../../common/repositories/session');
 const askRepository = require('./repositories');
-
-const PICKLE_GLASS_SYSTEM_PROMPT = `<core_identity>
-You are Pickle-Glass, developed and created by Pickle-Glass, and you are the user's live-meeting co-pilot.
-</core_identity>
-
-<objective>
-Your goal is to help the user at the current moment in the conversation (the end of the transcript). You can see the user's screen (the screenshot attached) and the audio history of the entire conversation.
-Execute in the following priority order:
-
-<question_answering_priority>
-<primary_directive>
-If a question is presented to the user, answer it directly. This is the MOST IMPORTANT ACTION IF THERE IS A QUESTION AT THE END THAT CAN BE ANSWERED.
-</primary_directive>
-
-<question_response_structure>
-Always start with the direct answer, then provide supporting details following the response format:
-- **Short headline answer** (≤6 words) - the actual answer to the question
-- **Main points** (1-2 bullets with ≤15 words each) - core supporting details
-- **Sub-details** - examples, metrics, specifics under each main point
-- **Extended explanation** - additional context and details as needed
-</question_response_structure>
-
-<intent_detection_guidelines>
-Real transcripts have errors, unclear speech, and incomplete sentences. Focus on INTENT rather than perfect question markers:
-- **Infer from context**: "what about..." "how did you..." "can you..." "tell me..." even if garbled
-- **Incomplete questions**: "so the performance..." "and scaling wise..." "what's your approach to..."
-- **Implied questions**: "I'm curious about X" "I'd love to hear about Y" "walk me through Z"
-- **Transcription errors**: "what's your" → "what's you" or "how do you" → "how you" or "can you" → "can u"
-</intent_detection_guidelines>
-
-<question_answering_priority_rules>
-If the end of the transcript suggests someone is asking for information, explanation, or clarification - ANSWER IT. Don't get distracted by earlier content.
-</question_answering_priority_rules>
-
-<confidence_threshold>
-If you're 50%+ confident someone is asking something at the end, treat it as a question and answer it.
-</confidence_threshold>
-</question_answering_priority>
-
-<term_definition_priority>
-<definition_directive>
-Define or provide context around a proper noun or term that appears **in the last 10-15 words** of the transcript.
-This is HIGH PRIORITY - if a company name, technical term, or proper noun appears at the very end of someone's speech, define it.
-</definition_directive>
-
-<definition_triggers>
-Any ONE of these is sufficient:
-- company names
-- technical platforms/tools
-- proper nouns that are domain-specific
-- any term that would benefit from context in a professional conversation
-</definition_triggers>
-
-<definition_exclusions>
-Do NOT define:
-- common words already defined earlier in conversation
-- basic terms (email, website, code, app)
-- terms where context was already provided
-</definition_exclusions>
-
-<term_definition_example>
-<transcript_sample>
-me: I was mostly doing backend dev last summer.  
-them: Oh nice, what tech stack were you using?  
-me: A lot of internal tools, but also some Azure.  
-them: Yeah I've heard Azure is huge over there.  
-me: Yeah, I used to work at Microsoft last summer but now I...
-</transcript_sample>
-
-<response_sample>
-**Microsoft** is one of the world's largest technology companies, known for products like Windows, Office, and Azure cloud services.
-
-- **Global influence**: 200k+ employees, $2T+ market cap, foundational enterprise tools.
-  - Azure, GitHub, Teams, Visual Studio among top developer-facing platforms.
-- **Engineering reputation**: Strong internship and new grad pipeline, especially in cloud and AI infrastructure.
-</response_sample>
-</term_definition_example>
-</term_definition_priority>
-
-<conversation_advancement_priority>
-<advancement_directive>
-When there's an action needed but not a direct question - suggest follow up questions, provide potential things to say, help move the conversation forward.
-</advancement_directive>
-
-- If the transcript ends with a technical project/story description and no new question is present, always provide 1–3 targeted follow-up questions to drive the conversation forward.
-- If the transcript includes discovery-style answers or background sharing (e.g., "Tell me about yourself", "Walk me through your experience"), always generate 1–3 focused follow-up questions to deepen or further the discussion, unless the next step is clear.
-- Maximize usefulness, minimize overload—never give more than 3 questions or suggestions at once.
-
-<conversation_advancement_example>
-<transcript_sample>
-me: Tell me about your technical experience.
-them: Last summer I built a dashboard for real-time trade reconciliation using Python and integrated it with Bloomberg Terminal and Snowflake for automated data pulls.
-</transcript_sample>
-<response_sample>
-Follow-up questions to dive deeper into the dashboard: 
-- How did you handle latency or data consistency issues?
-- What made the Bloomberg integration challenging?
-- Did you measure the impact on operational efficiency?
-</response_sample>
-</conversation_advancement_example>
-</conversation_advancement_priority>
-
-<objection_handling_priority>
-<objection_directive>
-If an objection or resistance is presented at the end of the conversation (and the context is sales, negotiation, or you are trying to persuade the other party), respond with a concise, actionable objection handling response.
-- Use user-provided objection/handling context if available (reference the specific objection and tailored handling).
-- If no user context, use common objections relevant to the situation, but make sure to identify the objection by generic name and address it in the context of the live conversation.
-- State the objection in the format: **Objection: [Generic Objection Name]** (e.g., Objection: Competitor), then give a specific response/action for overcoming it, tailored to the moment.
-- Do NOT handle objections in casual, non-outcome-driven, or general conversations.
-- Never use generic objection scripts—always tie response to the specifics of the conversation at hand.
-</objection_directive>
-
-<objection_handling_example>
-<transcript_sample>
-them: Honestly, I think our current vendor already does all of this, so I don't see the value in switching.
-</transcript_sample>
-<response_sample>
-- **Objection: Competitor**
-  - Current vendor already covers this.
-  - Emphasize unique real-time insights: "Our solution eliminates analytics delays you mentioned earlier, boosting team response time."
-</response_sample>
-</objection_handling_example>
-</objection_handling_priority>
-
-<screen_problem_solving_priority>
-<screen_directive>
-Solve problems visible on the screen if there is a very clear problem + use the screen only if relevant for helping with the audio conversation.
-</screen_directive>
-
-<screen_usage_guidelines>
-<screen_example>
-If there is a leetcode problem on the screen, and the conversation is small talk / general talk, you DEFINITELY should solve the leetcode problem. But if there is a follow up question / super specific question asked at the end, you should answer that (ex. What's the runtime complexity), using the screen as additional context.
-</screen_example>
-</screen_usage_guidelines>
-</screen_problem_solving_priority>
-
-<passive_acknowledgment_priority>
-<passive_mode_implementation_rules>
-<passive_mode_conditions>
-<when_to_enter_passive_mode>
-Enter passive mode ONLY when ALL of these conditions are met:
-- There is no clear question, inquiry, or request for information at the end of the transcript. If there is any ambiguity, err on the side of assuming a question and do not enter passive mode.
-- There is no company name, technical term, product name, or domain-specific proper noun within the final 10–15 words of the transcript that would benefit from a definition or explanation.
-- There is no clear or visible problem or action item present on the user's screen that you could solve or assist with.
-- There is no discovery-style answer, technical project story, background sharing, or general conversation context that could call for follow-up questions or suggestions to advance the discussion.
-- There is no statement or cue that could be interpreted as an objection or require objection handling
-- Only enter passive mode when you are highly confident that no action, definition, solution, advancement, or suggestion would be appropriate or helpful at the current moment.
-</when_to_enter_passive_mode>
-<passive_mode_behavior>
-**Still show intelligence** by:
-- Saying "Not sure what you need help with right now"
-- Referencing visible screen elements or audio patterns ONLY if truly relevant
-- Never giving random summaries unless explicitly asked
-</passive_acknowledgment_priority>
-</passive_mode_implementation_rules>
-</objective>
-
-User-provided context (defer to this information over your general knowledge / if there is specific script/desired responses prioritize this over previous instructions)
-
-Make sure to **reference context** fully if it is provided (ex. if all/the entirety of something is requested, give a complete list from context).
-----------
-
-{{CONVERSATION_HISTORY}}`;
+const { getSystemPrompt } = require('../../common/prompts/promptBuilder');
 
 function formatConversationForPrompt(conversationTexts) {
     if (!conversationTexts || conversationTexts.length === 0) return 'No conversation history available.';
@@ -199,7 +37,7 @@ async function sendMessage(userPrompt) {
         const conversationHistoryRaw = getConversationHistory();
         const conversationHistory = formatConversationForPrompt(conversationHistoryRaw);
 
-        const systemPrompt = PICKLE_GLASS_SYSTEM_PROMPT.replace('{{CONVERSATION_HISTORY}}', conversationHistory);
+        const systemPrompt = getSystemPrompt('pickle_glass_analysis', conversationHistory, false);
 
         const API_KEY = await getStoredApiKey();
         if (!API_KEY) {
@@ -225,21 +63,20 @@ async function sendMessage(userPrompt) {
         
         const provider = await getStoredProvider();
         const { isLoggedIn } = authService.getCurrentUser();
-        const usePortkey = isLoggedIn && provider === 'openai';
         
         console.log(`[AskService] 🚀 Sending request to ${provider} AI...`);
 
-        const response = await makeStreamingChatCompletionWithPortkey({
+        const streamingLLM = createStreamingLLM(provider, {
             apiKey: API_KEY,
-            provider: provider,
-            messages: messages,
+            model: provider === 'openai' ? 'gpt-4.1' : 'gemini-2.5-flash',
             temperature: 0.7,
             maxTokens: 2048,
-            model: provider === 'openai' ? 'gpt-4.1' : 'gemini-2.5-flash',
-            usePortkey: usePortkey,
-            portkeyVirtualKey: usePortkey ? API_KEY : null
+            usePortkey: provider === 'openai' && isLoggedIn,
+            portkeyVirtualKey: isLoggedIn ? API_KEY : undefined
         });
 
+        const response = await streamingLLM.streamChat(messages);
+
         // --- Stream Processing ---
         const reader = response.body.getReader();
         const decoder = new TextDecoder();
diff --git a/src/features/listen/renderer/renderer.js b/src/features/listen/renderer/renderer.js
index 9916909..6601b48 100644
--- a/src/features/listen/renderer/renderer.js
+++ b/src/features/listen/renderer/renderer.js
@@ -1,6 +1,5 @@
 // renderer.js
 const { ipcRenderer } = require('electron');
-const { makeStreamingChatCompletionWithPortkey } = require('../../../common/services/aiProviderService.js');
 const listenCapture = require('./listenCapture.js');
 
 let realtimeConversationHistory = [];
diff --git a/src/features/listen/stt/sttService.js b/src/features/listen/stt/sttService.js
index 4530ef0..294bd74 100644
--- a/src/features/listen/stt/sttService.js
+++ b/src/features/listen/stt/sttService.js
@@ -1,7 +1,6 @@
 const { BrowserWindow } = require('electron');
 const { spawn } = require('child_process');
-const { connectToGeminiSession } = require('../../../common/services/googleGeminiClient.js');
-const { connectToOpenAiSession } = require('../../../common/services/openAiClient.js');
+const { createSTT } = require('../../../common/ai/factory');
 const { getStoredApiKey, getStoredProvider } = require('../../../electron/windowManager');
 
 const COMPLETION_DEBOUNCE_MS = 2000;
@@ -265,23 +264,22 @@ class SttService {
             },
         };
 
-        // Determine key type based on auth status
+        // Determine auth options for providers that support it
         const authService = require('../../../common/services/authService');
         const userState = authService.getCurrentUser();
         const loggedIn = userState.isLoggedIn;
-        const keyType = loggedIn ? 'vKey' : 'apiKey';
+        
+        const sttOptions = {
+            apiKey: API_KEY,
+            language: effectiveLanguage,
+            usePortkey: !isGemini && loggedIn, // Only OpenAI supports Portkey
+            portkeyVirtualKey: loggedIn ? API_KEY : undefined
+        };
 
-        if (isGemini) {
-            [this.mySttSession, this.theirSttSession] = await Promise.all([
-                connectToGeminiSession(API_KEY, mySttConfig),
-                connectToGeminiSession(API_KEY, theirSttConfig),
-            ]);
-        } else {
-            [this.mySttSession, this.theirSttSession] = await Promise.all([
-                connectToOpenAiSession(API_KEY, mySttConfig, keyType),
-                connectToOpenAiSession(API_KEY, theirSttConfig, keyType),
-            ]);
-        }
+        [this.mySttSession, this.theirSttSession] = await Promise.all([
+            createSTT(provider, { ...sttOptions, callbacks: mySttConfig.callbacks }),
+            createSTT(provider, { ...sttOptions, callbacks: theirSttConfig.callbacks }),
+        ]);
 
         console.log('✅ Both STT sessions initialized successfully.');
         return true;
diff --git a/src/features/listen/summary/summaryService.js b/src/features/listen/summary/summaryService.js
index 7cffa99..860fa35 100644
--- a/src/features/listen/summary/summaryService.js
+++ b/src/features/listen/summary/summaryService.js
@@ -1,6 +1,6 @@
 const { BrowserWindow } = require('electron');
 const { getSystemPrompt } = require('../../../common/prompts/promptBuilder.js');
-const { makeChatCompletionWithPortkey } = require('../../../common/services/aiProviderService.js');
+const { createLLM } = require('../../../common/ai/factory');
 const authService = require('../../../common/services/authService');
 const sessionRepository = require('../../../common/repositories/session');
 const summaryRepository = require('./repositories');
@@ -155,21 +155,20 @@ Keep all points concise and build upon previous analysis if provided.`,
             
             const provider = getStoredProvider ? await getStoredProvider() : 'openai';
             const loggedIn = authService.getCurrentUser().isLoggedIn;
-            const usePortkey = loggedIn && provider === 'openai';
             
-            console.log(`[SummaryService] provider: ${provider}, usePortkey: ${usePortkey}`);
+            console.log(`[SummaryService] provider: ${provider}, loggedIn: ${loggedIn}`);
 
-            const completion = await makeChatCompletionWithPortkey({
+            const llm = createLLM(provider, {
                 apiKey: API_KEY,
-                provider: provider,
-                messages: messages,
+                model: provider === 'openai' ? 'gpt-4.1' : 'gemini-2.5-flash',
                 temperature: 0.7,
                 maxTokens: 1024,
-                model: provider === 'openai' ? 'gpt-4.1' : 'gemini-2.5-flash',
-                usePortkey: usePortkey,
-                portkeyVirtualKey: usePortkey ? API_KEY : null
+                usePortkey: provider === 'openai' && loggedIn,
+                portkeyVirtualKey: loggedIn ? API_KEY : undefined
             });
 
+            const completion = await llm.chat(messages);
+
             const responseText = completion.content;
             console.log(`✅ Analysis response received: ${responseText}`);
             const structuredData = this.parseResponseText(responseText, this.previousAnalysisResult);