import time from typing import List, Optional from pydantic import BaseModel class ChatMessage(BaseModel): role: str content: str class ChatCompletionRequest(BaseModel): model: str = "mock-gpt-model" messages: List[ChatMessage] max_tokens: Optional[int] = 512 temperature: Optional[float] = 0.5 stream: Optional[bool] = False from fastapi import FastAPI, Depends, HTTPException, status from fastapi.security import HTTPBearer app = FastAPI(title="OpenAI-compatible API") bearer_scheme = HTTPBearer(auto_error=False) async def credentials(authorization=Depends(bearer_scheme)): if authorization and authorization.credentials == 'sk-1234': # api key is valid return authorization.credentials # raise http error 401 raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key", ) import asyncio import json async def _resp_async_generator(text_resp: str): # let's pretend every word is a token and return it over time tokens = text_resp.split(" ") for i, token in enumerate(tokens): chunk = { "id": i, "object": "chat.completion.chunk", "created": time.time(), "model": "mock-gpt-model", "choices": [{"delta": {"content": token + " "}}], } yield f"data: {json.dumps(chunk)}\n\n" await asyncio.sleep(0.1) yield "data: [DONE]\n\n" from starlette.responses import StreamingResponse @app.post("/v1/chat/completions", dependencies=[Depends(credentials)]) async def chat_completions(request: ChatCompletionRequest): if request.messages and request.messages[0].role == 'user': resp_content = "As a mock AI Assistant, I only can echo your last message:" + request.messages[-1].content else: resp_content = "As a mock AI Assistant, I only can echo your last message, but there were no messages!" if request.stream: return StreamingResponse(_resp_async_generator(resp_content), media_type="text/event-stream") return { "id": time.gmtime().tm_year, "object": "chat.completion", "created": time.time(), "model": request.model, "choices": [{ "message": ChatMessage(role="assistant", content=resp_content) }] } if __name__ == '__main__': import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)