new endpoint settings

2026-02-10 03:28:46 +08:00
parent b20af7cbde
commit 81f654c3fd
2 changed files with 27 additions and 2 deletions
@@ -8,12 +8,20 @@ from app.core.config import settings
 from pydantic import BaseModel
 from google import genai
 import asyncio
+from google.genai import types
+from app.core.prompts import GEMINI_SYSTEM_PROMPT

 router = APIRouter()

 class LLMRequest(BaseModel):
    prompt: str
    context: str = ""
+    system_prompt: str | None = None
+    knowledge_base: str | None = None
+    temperature: float = 0.7
+    top_p: float = 0.95
+    top_k: int = 40
+    max_output_tokens: int = 8192

 # Shared client instance (global)
 _client = None
@@ -58,11 +66,23 @@ async def gemini_chat(
        if chat_data.context:
            prompt_content = f"Context: {chat_data.context}\n\nPrompt: {chat_data.prompt}"

+        # Prepare system instruction
+        system_instruction = chat_data.system_prompt or GEMINI_SYSTEM_PROMPT
+        if chat_data.knowledge_base:
+            system_instruction += f"\n\nKnowledge Base:\n{chat_data.knowledge_base}"
+
        # Using the async generation method provided by the new google-genai library
        # We use await to ensure we don't block the event loop
        response = await client.aio.models.generate_content(
-            model="gemini-2.0-flash", 
-            contents=prompt_content
+            model="gemini-2.5-flash", 
+            contents=prompt_content,
+            config=types.GenerateContentConfig(
+                system_instruction=system_instruction,
+                temperature=chat_data.temperature,
+                top_p=chat_data.top_p,
+                top_k=chat_data.top_k,
+                max_output_tokens=chat_data.max_output_tokens
+            )
        )
        
        # Track usage if valid module