new endpoint settings
This commit is contained in:
@@ -8,12 +8,20 @@ from app.core.config import settings
|
||||
from pydantic import BaseModel
|
||||
from google import genai
|
||||
import asyncio
|
||||
from google.genai import types
|
||||
from app.core.prompts import GEMINI_SYSTEM_PROMPT
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
class LLMRequest(BaseModel):
|
||||
prompt: str
|
||||
context: str = ""
|
||||
system_prompt: str | None = None
|
||||
knowledge_base: str | None = None
|
||||
temperature: float = 0.7
|
||||
top_p: float = 0.95
|
||||
top_k: int = 40
|
||||
max_output_tokens: int = 8192
|
||||
|
||||
# Shared client instance (global)
|
||||
_client = None
|
||||
@@ -58,11 +66,23 @@ async def gemini_chat(
|
||||
if chat_data.context:
|
||||
prompt_content = f"Context: {chat_data.context}\n\nPrompt: {chat_data.prompt}"
|
||||
|
||||
# Prepare system instruction
|
||||
system_instruction = chat_data.system_prompt or GEMINI_SYSTEM_PROMPT
|
||||
if chat_data.knowledge_base:
|
||||
system_instruction += f"\n\nKnowledge Base:\n{chat_data.knowledge_base}"
|
||||
|
||||
# Using the async generation method provided by the new google-genai library
|
||||
# We use await to ensure we don't block the event loop
|
||||
response = await client.aio.models.generate_content(
|
||||
model="gemini-2.0-flash",
|
||||
contents=prompt_content
|
||||
model="gemini-2.5-flash",
|
||||
contents=prompt_content,
|
||||
config=types.GenerateContentConfig(
|
||||
system_instruction=system_instruction,
|
||||
temperature=chat_data.temperature,
|
||||
top_p=chat_data.top_p,
|
||||
top_k=chat_data.top_k,
|
||||
max_output_tokens=chat_data.max_output_tokens
|
||||
)
|
||||
)
|
||||
|
||||
# Track usage if valid module
|
||||
|
||||
Reference in New Issue
Block a user