diff --git a/api_documentation.md b/api_documentation.md index b5ea94b..ff135ef 100644 --- a/api_documentation.md +++ b/api_documentation.md @@ -63,6 +63,91 @@ Verify if the gateway is online. --- +## 🎙️ Gemini Live Endpoints (Live Practice) + +Used by the Live Practice plugin template for real-time voice conversations. + +### 4. Generate Live Ephemeral Token +Exchanges the module API key for a short-lived token the browser can use to open a direct WebSocket to Gemini Live. The server API key is never exposed to the client. +- **URL**: `http://localhost:8191/api/v1/gemini/live-token` +- **Method**: `POST` +- **Headers**: `X-API-Key: ` + +**Request Payload:** +```json +{ + "model": "gemini-2.0-flash-live-001", + "system_instruction": "You are a customer service representative named Alex...", + "voice_name": "Puck" +} +``` + +**Response:** +```json +{ + "token": "ephemeral-token-value", + "expires_at": "2026-04-02T01:00:00Z", + "websocket_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent" +} +``` + +The client connects to the WebSocket URI with `?key={token}` appended. + +--- + +### 5. Score Conversation +Evaluates a completed conversation transcript against a scorecard using Gemini Flash with structured JSON output. +- **URL**: `http://localhost:8191/api/v1/gemini/score` +- **Method**: `POST` +- **Headers**: `X-API-Key: ` + +**Request Payload:** +```json +{ + "transcript": [ + { "role": "assistant", "text": "Hi, how can I help you today?" }, + { "role": "user", "text": "I have a problem with my order." } + ], + "scorecard": { + "criteria": [ + { + "name": "Greeting Quality", + "weight": 25, + "description": "Did the learner greet the customer warmly?", + "good_example": "Hi there! Thanks for reaching out, I'm happy to help.", + "poor_example": "What do you want?" + } + ] + }, + "pass_threshold": 70 +} +``` + +**Response:** +```json +{ + "overall_score": 82.5, + "passed": true, + "criteria_scores": [ + { + "name": "Greeting Quality", + "score": 90, + "feedback": "The learner greeted warmly and set a positive tone." + } + ], + "positives": [ + "Maintained a professional tone throughout.", + "Actively listened and acknowledged the customer's concern." + ], + "improvements": [ + "Could offer a specific resolution timeline earlier.", + "Consider using the customer's name for a more personal touch." + ] +} +``` + +--- + ## 🛠️ Management Endpoints (Internal) These endpoints power the Admin Dashboard. They require a Bearer token or Master Key depending on implementation. diff --git a/app/api/endpoints/gemini_live.py b/app/api/endpoints/gemini_live.py new file mode 100644 index 0000000..9c3708a --- /dev/null +++ b/app/api/endpoints/gemini_live.py @@ -0,0 +1,85 @@ +from fastapi import APIRouter, Depends, Request, HTTPException +from app.api.deps import get_current_module +from app.models.module import Module +from sqlalchemy.orm import Session +from app.core.database import get_db +from app.core.limiter import limiter +from app.core.config import settings +from pydantic import BaseModel +import httpx + +router = APIRouter() + +GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta" +GEMINI_LIVE_WSS_URI = ( + "wss://generativelanguage.googleapis.com/ws/" + "google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent" +) + + +class LiveTokenRequest(BaseModel): + model: str = "gemini-2.0-flash-live-001" + system_instruction: str + voice_name: str | None = "Puck" + + +@router.post("/live-token") +@limiter.limit(settings.RATE_LIMIT) +async def generate_live_token( + request: Request, + body: LiveTokenRequest, + module: Module = Depends(get_current_module), + db: Session = Depends(get_db), +): + if not settings.GOOGLE_API_KEY or settings.GOOGLE_API_KEY == "your-google-api-key": + return { + "token": "mock-ephemeral-token", + "expires_at": "2026-04-02T02:00:00Z", + "websocket_uri": GEMINI_LIVE_WSS_URI, + } + + payload = { + "systemInstruction": { + "parts": [{"text": body.system_instruction}] + }, + "generationConfig": { + "responseModalities": ["AUDIO", "TEXT"], + }, + } + + if body.voice_name: + payload["generationConfig"]["speechConfig"] = { + "voiceConfig": { + "prebuiltVoiceConfig": {"voiceName": body.voice_name} + } + } + + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.post( + f"{GEMINI_API_BASE}/models/{body.model}:generateEphemeralToken", + json=payload, + params={"key": settings.GOOGLE_API_KEY}, + ) + + if response.status_code != 200: + raise HTTPException( + status_code=502, + detail=f"Gemini token API error {response.status_code}: {response.text}", + ) + + data = response.json() + + # Response name is "ephemeralTokens/{token_value}" + token_value = data.get("name", "").split("/")[-1] + expires_at = data.get("expireTime", "") + + if module: + module.ingress_tokens += 1 + module.total_tokens += 1 + db.commit() + + return { + "token": token_value, + "expires_at": expires_at, + "websocket_uri": GEMINI_LIVE_WSS_URI, + } diff --git a/app/api/endpoints/gemini_score.py b/app/api/endpoints/gemini_score.py new file mode 100644 index 0000000..9bab36c --- /dev/null +++ b/app/api/endpoints/gemini_score.py @@ -0,0 +1,167 @@ +from fastapi import APIRouter, Depends, Request, HTTPException +from app.api.deps import get_current_module +from app.models.module import Module +from sqlalchemy.orm import Session +from app.core.database import get_db +from app.core.limiter import limiter +from app.core.config import settings +from pydantic import BaseModel +from typing import List +from google import genai +from google.genai import types +import json + +router = APIRouter() + +_client = None + + +def get_gemini_client(): + global _client + if _client is None and settings.GOOGLE_API_KEY and settings.GOOGLE_API_KEY != "your-google-api-key": + _client = genai.Client( + api_key=settings.GOOGLE_API_KEY, + http_options={"api_version": "v1alpha"}, + ) + return _client + + +class TranscriptEntry(BaseModel): + role: str # "user" or "assistant" + text: str + + +class ScorecardCriterion(BaseModel): + name: str + weight: float + description: str + good_example: str | None = None + poor_example: str | None = None + + +class Scorecard(BaseModel): + criteria: List[ScorecardCriterion] + + +class ScoreRequest(BaseModel): + transcript: List[TranscriptEntry] + scorecard: Scorecard + pass_threshold: int = 70 + + +SCORE_RESPONSE_SCHEMA = { + "type": "OBJECT", + "properties": { + "overall_score": {"type": "NUMBER"}, + "passed": {"type": "BOOLEAN"}, + "criteria_scores": { + "type": "ARRAY", + "items": { + "type": "OBJECT", + "properties": { + "name": {"type": "STRING"}, + "score": {"type": "NUMBER"}, + "feedback": {"type": "STRING"}, + }, + }, + }, + "positives": {"type": "ARRAY", "items": {"type": "STRING"}}, + "improvements": {"type": "ARRAY", "items": {"type": "STRING"}}, + }, + "required": ["overall_score", "passed", "criteria_scores", "positives", "improvements"], +} + + +def _build_prompt(body: ScoreRequest) -> str: + transcript_text = "\n".join( + f"{e.role.upper()}: {e.text}" for e in body.transcript + ) + criteria_text = "\n".join( + f"- {c.name} (weight: {c.weight}%): {c.description}" + + (f"\n Good example: {c.good_example}" if c.good_example else "") + + (f"\n Poor example: {c.poor_example}" if c.poor_example else "") + for c in body.scorecard.criteria + ) + return f"""You are an expert conversation evaluator for workplace learning simulations. +Score the following conversation transcript against the provided scorecard criteria. + +TRANSCRIPT: +{transcript_text} + +SCORING CRITERIA (weights must sum to 100%): +{criteria_text} + +Instructions: +- Score each criterion from 0 to 100 based on evidence in the transcript. +- Calculate overall_score as the weighted average of all criteria scores. +- Set passed to true if overall_score >= {body.pass_threshold}. +- Write specific, evidence-based feedback for each criterion (1-2 sentences). +- List exactly 2-3 positives (specific things done well with transcript evidence). +- List exactly 2-3 improvements (specific, actionable suggestions). + +Return a single JSON object following the response schema.""" + + +@router.post("/score") +@limiter.limit(settings.RATE_LIMIT) +async def score_conversation( + request: Request, + body: ScoreRequest, + module: Module = Depends(get_current_module), + db: Session = Depends(get_db), +): + client = get_gemini_client() + + if not client: + mock_result = { + "overall_score": 75.0, + "passed": True, + "criteria_scores": [ + { + "name": c.name, + "score": 75.0, + "feedback": f"Mock feedback for {c.name}.", + } + for c in body.scorecard.criteria + ], + "positives": [ + "Maintained a professional tone throughout.", + "Responded clearly to the main questions.", + ], + "improvements": [ + "Could provide more specific examples.", + "Consider addressing the customer's emotional state earlier.", + ], + } + return mock_result + + prompt = _build_prompt(body) + + try: + response = await client.aio.models.generate_content( + model="gemini-2.5-flash", + contents=prompt, + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=SCORE_RESPONSE_SCHEMA, + temperature=0.2, + ), + ) + + result = json.loads(response.text) + + if module: + usage = response.usage_metadata + prompt_tokens = usage.prompt_token_count if usage else len(prompt) // 4 + completion_tokens = usage.candidates_token_count if usage else len(response.text) // 4 + module.ingress_tokens += prompt_tokens + module.egress_tokens += completion_tokens + module.total_tokens += prompt_tokens + completion_tokens + db.commit() + + return result + + except json.JSONDecodeError as e: + raise HTTPException(status_code=502, detail=f"Gemini returned invalid JSON: {str(e)}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) diff --git a/app/api/router.py b/app/api/router.py index efc9698..33932de 100644 --- a/app/api/router.py +++ b/app/api/router.py @@ -1,7 +1,9 @@ from fastapi import APIRouter -from app.api.endpoints import storyline, gemini, openai +from app.api.endpoints import storyline, gemini, openai, gemini_live, gemini_score api_router = APIRouter() api_router.include_router(storyline.router, prefix="/storyline", tags=["storyline"]) api_router.include_router(gemini.router, prefix="/gemini", tags=["gemini"]) api_router.include_router(openai.router, prefix="/openai", tags=["openai"]) +api_router.include_router(gemini_live.router, prefix="/gemini", tags=["gemini-live"]) +api_router.include_router(gemini_score.router, prefix="/gemini", tags=["gemini-live"])