Feat: Implement token tracking, soft delete, and Admin UI improvements
This commit is contained in:
@@ -26,6 +26,9 @@ class ModuleResponse(BaseModel):
|
||||
is_active: bool
|
||||
created_at: datetime
|
||||
last_rotated_at: datetime
|
||||
ingress_tokens: int
|
||||
egress_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -55,10 +58,14 @@ def create_module(
|
||||
|
||||
@router.get("/modules", response_model=List[ModuleResponse])
|
||||
def get_modules(
|
||||
include_archived: bool = False,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: str = Depends(get_current_user)
|
||||
):
|
||||
return db.query(Module).all()
|
||||
query = db.query(Module)
|
||||
if not include_archived:
|
||||
query = query.filter(Module.is_active == True)
|
||||
return query.all()
|
||||
|
||||
@router.post("/modules/{module_id}/rotate", response_model=ModuleResponse)
|
||||
def rotate_module_key(
|
||||
@@ -79,12 +86,35 @@ def rotate_module_key(
|
||||
@router.delete("/modules/{module_id}")
|
||||
def delete_module(
|
||||
module_id: int,
|
||||
hard_delete: bool = False,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: str = Depends(get_current_user)
|
||||
):
|
||||
db_module = db.query(Module).filter(Module.id == module_id).first()
|
||||
if not db_module:
|
||||
raise HTTPException(status_code=404, detail="Module not found")
|
||||
db.delete(db_module)
|
||||
|
||||
if hard_delete:
|
||||
db.delete(db_module)
|
||||
message = "Module permanently deleted"
|
||||
else:
|
||||
db_module.is_active = False
|
||||
message = "Module archived"
|
||||
|
||||
db.commit()
|
||||
return {"status": "success"}
|
||||
return {"status": "success", "message": message}
|
||||
|
||||
@router.post("/modules/{module_id}/restore", response_model=ModuleResponse)
|
||||
def restore_module(
|
||||
module_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: str = Depends(get_current_user)
|
||||
):
|
||||
db_module = db.query(Module).filter(Module.id == module_id).first()
|
||||
if not db_module:
|
||||
raise HTTPException(status_code=404, detail="Module not found")
|
||||
|
||||
db_module.is_active = True
|
||||
db.commit()
|
||||
db.refresh(db_module)
|
||||
return db_module
|
||||
|
||||
@@ -33,11 +33,35 @@ async def get_api_key(
|
||||
# 3. Check Database for Module key (Database round-trip)
|
||||
module = db.query(Module).filter(Module.secret_key == api_key_header, Module.is_active == True).first()
|
||||
if module:
|
||||
# Save to cache for next time
|
||||
auth_cache[api_key_header] = True
|
||||
return api_key_header
|
||||
# Save module ID to cache for next time
|
||||
auth_cache[api_key_header] = module.id
|
||||
return module
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Could not validate credentials or API Key is inactive"
|
||||
)
|
||||
|
||||
async def get_current_module(
|
||||
api_key_header: str = Security(api_key_header),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
# 1. Fallback to global static key (Admin) - No module tracking
|
||||
if api_key_header == settings.API_KEY:
|
||||
return None
|
||||
|
||||
# 2. Check Cache
|
||||
if api_key_header in auth_cache:
|
||||
module_id = auth_cache[api_key_header]
|
||||
return db.query(Module).filter(Module.id == module_id).first()
|
||||
|
||||
# 3. DB Lookup
|
||||
module = db.query(Module).filter(Module.secret_key == api_key_header, Module.is_active == True).first()
|
||||
if module:
|
||||
auth_cache[api_key_header] = module.id
|
||||
return module
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Could not validate credentials"
|
||||
)
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from app.api.deps import get_api_key
|
||||
from app.api.deps import get_api_key, get_current_module
|
||||
from app.models.module import Module
|
||||
from sqlalchemy.orm import Session
|
||||
from app.core.database import get_db
|
||||
from app.core.limiter import limiter
|
||||
from app.core.config import settings
|
||||
from pydantic import BaseModel
|
||||
@@ -26,16 +29,29 @@ def get_gemini_client():
|
||||
async def gemini_chat(
|
||||
request: Request,
|
||||
chat_data: LLMRequest,
|
||||
api_key: str = Depends(get_api_key)
|
||||
api_key: str = Depends(get_api_key),
|
||||
module: Module = Depends(get_current_module),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
client = get_gemini_client()
|
||||
|
||||
try:
|
||||
if not client:
|
||||
# Mock response
|
||||
response_text = f"MOCK: Gemini response to '{chat_data.prompt}'"
|
||||
if module:
|
||||
# Estimate tokens for mock
|
||||
prompt_tokens = len(chat_data.prompt) // 4
|
||||
completion_tokens = len(response_text) // 4
|
||||
module.ingress_tokens += prompt_tokens
|
||||
module.egress_tokens += completion_tokens
|
||||
module.total_tokens += (prompt_tokens + completion_tokens)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "mock",
|
||||
"model": "gemini",
|
||||
"response": f"MOCK: Gemini response to '{chat_data.prompt}'"
|
||||
"response": response_text
|
||||
}
|
||||
|
||||
# Using the async generation method provided by the new google-genai library
|
||||
@@ -45,6 +61,20 @@ async def gemini_chat(
|
||||
contents=chat_data.prompt
|
||||
)
|
||||
|
||||
# Track usage if valid module
|
||||
if module:
|
||||
# Estimate tokens since metadata might vary
|
||||
# 1 char ~= 0.25 tokens (rough estimate if exact count not returned)
|
||||
# Gemini response usually has usage_metadata
|
||||
usage = response.usage_metadata
|
||||
prompt_tokens = usage.prompt_token_count if usage else len(chat_data.prompt) // 4
|
||||
completion_tokens = usage.candidates_token_count if usage else len(response.text) // 4
|
||||
|
||||
module.ingress_tokens += prompt_tokens
|
||||
module.egress_tokens += completion_tokens
|
||||
module.total_tokens += (prompt_tokens + completion_tokens)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"model": "gemini",
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from app.api.deps import get_api_key
|
||||
from app.api.deps import get_api_key, get_current_module
|
||||
from app.models.module import Module
|
||||
from sqlalchemy.orm import Session
|
||||
from app.core.database import get_db
|
||||
from app.core.limiter import limiter
|
||||
from app.core.config import settings
|
||||
from pydantic import BaseModel
|
||||
@@ -22,14 +25,27 @@ if settings.OPENAI_API_KEY and settings.OPENAI_API_KEY != "your-openai-api-key":
|
||||
async def openai_chat(
|
||||
request: Request,
|
||||
chat_data: LLMRequest,
|
||||
api_key: str = Depends(get_api_key)
|
||||
api_key: str = Depends(get_api_key),
|
||||
module: Module = Depends(get_current_module),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
try:
|
||||
if not client:
|
||||
# Mock response
|
||||
response_text = f"MOCK: OpenAI response to '{chat_data.prompt}'"
|
||||
if module:
|
||||
# Estimate tokens for mock
|
||||
prompt_tokens = len(chat_data.prompt) // 4
|
||||
completion_tokens = len(response_text) // 4
|
||||
module.ingress_tokens += prompt_tokens
|
||||
module.egress_tokens += completion_tokens
|
||||
module.total_tokens += (prompt_tokens + completion_tokens)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "mock",
|
||||
"model": "openai",
|
||||
"response": f"MOCK: OpenAI response to '{chat_data.prompt}'"
|
||||
"response": response_text
|
||||
}
|
||||
|
||||
# Perform Async call to OpenAI
|
||||
@@ -37,6 +53,16 @@ async def openai_chat(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": chat_data.prompt}]
|
||||
)
|
||||
|
||||
# Track usage
|
||||
if module:
|
||||
usage = response.usage
|
||||
if usage:
|
||||
module.ingress_tokens += usage.prompt_tokens
|
||||
module.egress_tokens += usage.completion_tokens
|
||||
module.total_tokens += usage.total_tokens
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"model": "openai",
|
||||
|
||||
Reference in New Issue
Block a user