Added new endpoints for the gemini live feature

2026-04-02 14:50:03 +08:00
parent 622cf89211
commit cfec9c9bf5
4 changed files with 340 additions and 1 deletions
@@ -63,6 +63,91 @@ Verify if the gateway is online.
 ---
 ## 🎙️ Gemini Live Endpoints (Live Practice)
 Used by the Live Practice plugin template for real-time voice conversations.
 ### 4. Generate Live Ephemeral Token
 Exchanges the module API key for a short-lived token the browser can use to open a direct WebSocket to Gemini Live. The server API key is never exposed to the client.
 - **URL**: `http://localhost:8191/api/v1/gemini/live-token`
 - **Method**: `POST`
 - **Headers**: `X-API-Key: <your_key>`
 **Request Payload:**
 ```json
 {
  "model": "gemini-2.0-flash-live-001",
  "system_instruction": "You are a customer service representative named Alex...",
  "voice_name": "Puck"
 }
 ```
 **Response:**
 ```json
 {
  "token": "ephemeral-token-value",
  "expires_at": "2026-04-02T01:00:00Z",
  "websocket_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
 }
 ```
 The client connects to the WebSocket URI with `?key={token}` appended.
 ---
 ### 5. Score Conversation
 Evaluates a completed conversation transcript against a scorecard using Gemini Flash with structured JSON output.
 - **URL**: `http://localhost:8191/api/v1/gemini/score`
 - **Method**: `POST`
 - **Headers**: `X-API-Key: <your_key>`
 **Request Payload:**
 ```json
 {
  "transcript": [
    { "role": "assistant", "text": "Hi, how can I help you today?" },
    { "role": "user", "text": "I have a problem with my order." }
  ],
  "scorecard": {
    "criteria": [
      {
        "name": "Greeting Quality",
        "weight": 25,
        "description": "Did the learner greet the customer warmly?",
        "good_example": "Hi there! Thanks for reaching out, I'm happy to help.",
        "poor_example": "What do you want?"
      }
    ]
  },
  "pass_threshold": 70
 }
 ```
 **Response:**
 ```json
 {
  "overall_score": 82.5,
  "passed": true,
  "criteria_scores": [
    {
      "name": "Greeting Quality",
      "score": 90,
      "feedback": "The learner greeted warmly and set a positive tone."
    }
  ],
  "positives": [
    "Maintained a professional tone throughout.",
    "Actively listened and acknowledged the customer's concern."
  ],
  "improvements": [
    "Could offer a specific resolution timeline earlier.",
    "Consider using the customer's name for a more personal touch."
  ]
 }
 ```
 ---
 ## 🛠️ Management Endpoints (Internal)
 These endpoints power the Admin Dashboard. They require a Bearer token or Master Key depending on implementation.
@@ -0,0 +1,85 @@
 from fastapi import APIRouter, Depends, Request, HTTPException
 from app.api.deps import get_current_module
 from app.models.module import Module
 from sqlalchemy.orm import Session
 from app.core.database import get_db
 from app.core.limiter import limiter
 from app.core.config import settings
 from pydantic import BaseModel
 import httpx
 router = APIRouter()
 GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta"
 GEMINI_LIVE_WSS_URI = (
    "wss://generativelanguage.googleapis.com/ws/"
    "google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
 )
 class LiveTokenRequest(BaseModel):
    model: str = "gemini-2.0-flash-live-001"
    system_instruction: str
    voice_name: str | None = "Puck"
@router.post("/live-token")
@limiter.limit(settings.RATE_LIMIT)
 async def generate_live_token(
    request: Request,
    body: LiveTokenRequest,
    module: Module = Depends(get_current_module),
    db: Session = Depends(get_db),
 ):
    if not settings.GOOGLE_API_KEY or settings.GOOGLE_API_KEY == "your-google-api-key":
        return {
            "token": "mock-ephemeral-token",
            "expires_at": "2026-04-02T02:00:00Z",
            "websocket_uri": GEMINI_LIVE_WSS_URI,
        }
    payload = {
        "systemInstruction": {
            "parts": [{"text": body.system_instruction}]
        },
        "generationConfig": {
            "responseModalities": ["AUDIO", "TEXT"],
        },
    }
    if body.voice_name:
        payload["generationConfig"]["speechConfig"] = {
            "voiceConfig": {
                "prebuiltVoiceConfig": {"voiceName": body.voice_name}
            }
        }
    async with httpx.AsyncClient(timeout=30.0) as client:
        response = await client.post(
            f"{GEMINI_API_BASE}/models/{body.model}:generateEphemeralToken",
            json=payload,
            params={"key": settings.GOOGLE_API_KEY},
        )
    if response.status_code != 200:
        raise HTTPException(
            status_code=502,
            detail=f"Gemini token API error {response.status_code}: {response.text}",
        )
    data = response.json()
    # Response name is "ephemeralTokens/{token_value}"
    token_value = data.get("name", "").split("/")[-1]
    expires_at = data.get("expireTime", "")
    if module:
        module.ingress_tokens += 1
        module.total_tokens += 1
        db.commit()
    return {
        "token": token_value,
        "expires_at": expires_at,
        "websocket_uri": GEMINI_LIVE_WSS_URI,
    }
@@ -0,0 +1,167 @@
 from fastapi import APIRouter, Depends, Request, HTTPException
 from app.api.deps import get_current_module
 from app.models.module import Module
 from sqlalchemy.orm import Session
 from app.core.database import get_db
 from app.core.limiter import limiter
 from app.core.config import settings
 from pydantic import BaseModel
 from typing import List
 from google import genai
 from google.genai import types
 import json
 router = APIRouter()
 _client = None
 def get_gemini_client():
    global _client
    if _client is None and settings.GOOGLE_API_KEY and settings.GOOGLE_API_KEY != "your-google-api-key":
        _client = genai.Client(
            api_key=settings.GOOGLE_API_KEY,
            http_options={"api_version": "v1alpha"},
        )
    return _client
 class TranscriptEntry(BaseModel):
    role: str  # "user" or "assistant"
    text: str
 class ScorecardCriterion(BaseModel):
    name: str
    weight: float
    description: str
    good_example: str | None = None
    poor_example: str | None = None
 class Scorecard(BaseModel):
    criteria: List[ScorecardCriterion]
 class ScoreRequest(BaseModel):
    transcript: List[TranscriptEntry]
    scorecard: Scorecard
    pass_threshold: int = 70
 SCORE_RESPONSE_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "overall_score": {"type": "NUMBER"},
        "passed": {"type": "BOOLEAN"},
        "criteria_scores": {
            "type": "ARRAY",
            "items": {
                "type": "OBJECT",
                "properties": {
                    "name": {"type": "STRING"},
                    "score": {"type": "NUMBER"},
                    "feedback": {"type": "STRING"},
                },
            },
        },
        "positives": {"type": "ARRAY", "items": {"type": "STRING"}},
        "improvements": {"type": "ARRAY", "items": {"type": "STRING"}},
    },
    "required": ["overall_score", "passed", "criteria_scores", "positives", "improvements"],
 }
 def _build_prompt(body: ScoreRequest) -> str:
    transcript_text = "\n".join(
        f"{e.role.upper()}: {e.text}" for e in body.transcript
    )
    criteria_text = "\n".join(
        f"- {c.name} (weight: {c.weight}%): {c.description}"
        + (f"\n  Good example: {c.good_example}" if c.good_example else "")
        + (f"\n  Poor example: {c.poor_example}" if c.poor_example else "")
        for c in body.scorecard.criteria
    )
    return f"""You are an expert conversation evaluator for workplace learning simulations.
 Score the following conversation transcript against the provided scorecard criteria.
 TRANSCRIPT:
 {transcript_text}
 SCORING CRITERIA (weights must sum to 100%):
 {criteria_text}
 Instructions:
 - Score each criterion from 0 to 100 based on evidence in the transcript.
 - Calculate overall_score as the weighted average of all criteria scores.
 - Set passed to true if overall_score >= {body.pass_threshold}.
 - Write specific, evidence-based feedback for each criterion (1-2 sentences).
 - List exactly 2-3 positives (specific things done well with transcript evidence).
 - List exactly 2-3 improvements (specific, actionable suggestions).
 Return a single JSON object following the response schema."""
@router.post("/score")
@limiter.limit(settings.RATE_LIMIT)
 async def score_conversation(
    request: Request,
    body: ScoreRequest,
    module: Module = Depends(get_current_module),
    db: Session = Depends(get_db),
 ):
    client = get_gemini_client()
    if not client:
        mock_result = {
            "overall_score": 75.0,
            "passed": True,
            "criteria_scores": [
                {
                    "name": c.name,
                    "score": 75.0,
                    "feedback": f"Mock feedback for {c.name}.",
                }
                for c in body.scorecard.criteria
            ],
            "positives": [
                "Maintained a professional tone throughout.",
                "Responded clearly to the main questions.",
            ],
            "improvements": [
                "Could provide more specific examples.",
                "Consider addressing the customer's emotional state earlier.",
            ],
        }
        return mock_result
    prompt = _build_prompt(body)
    try:
        response = await client.aio.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt,
            config=types.GenerateContentConfig(
                response_mime_type="application/json",
                response_schema=SCORE_RESPONSE_SCHEMA,
                temperature=0.2,
            ),
        )
        result = json.loads(response.text)
        if module:
            usage = response.usage_metadata
            prompt_tokens = usage.prompt_token_count if usage else len(prompt) // 4
            completion_tokens = usage.candidates_token_count if usage else len(response.text) // 4
            module.ingress_tokens += prompt_tokens
            module.egress_tokens += completion_tokens
            module.total_tokens += prompt_tokens + completion_tokens
            db.commit()
        return result
    except json.JSONDecodeError as e:
        raise HTTPException(status_code=502, detail=f"Gemini returned invalid JSON: {str(e)}")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@@ -1,7 +1,9 @@
 from fastapi import APIRouter
-from app.api.endpoints import storyline, gemini, openai
+from app.api.endpoints import storyline, gemini, openai, gemini_live, gemini_score
 api_router = APIRouter()
 api_router.include_router(storyline.router, prefix="/storyline", tags=["storyline"])
 api_router.include_router(gemini.router, prefix="/gemini", tags=["gemini"])
 api_router.include_router(openai.router, prefix="/openai", tags=["openai"])
 api_router.include_router(gemini_live.router, prefix="/gemini", tags=["gemini-live"])
 api_router.include_router(gemini_score.router, prefix="/gemini", tags=["gemini-live"])