mock-openai.py
· 2.4 KiB · Python
Raw
import time
from typing import List, Optional
from pydantic import BaseModel
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str = "mock-gpt-model"
messages: List[ChatMessage]
max_tokens: Optional[int] = 512
temperature: Optional[float] = 0.5
stream: Optional[bool] = False
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi.security import HTTPBearer
app = FastAPI(title="OpenAI-compatible API")
bearer_scheme = HTTPBearer(auto_error=False)
async def credentials(authorization=Depends(bearer_scheme)):
if authorization and authorization.credentials == 'sk-1234':
# api key is valid
return authorization.credentials
# raise http error 401
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
)
import asyncio
import json
async def _resp_async_generator(text_resp: str):
# let's pretend every word is a token and return it over time
tokens = text_resp.split(" ")
for i, token in enumerate(tokens):
chunk = {
"id": i,
"object": "chat.completion.chunk",
"created": time.time(),
"model": "mock-gpt-model",
"choices": [{"delta": {"content": token + " "}}],
}
yield f"data: {json.dumps(chunk)}\n\n"
await asyncio.sleep(0.1)
yield "data: [DONE]\n\n"
from starlette.responses import StreamingResponse
@app.post("/v1/chat/completions", dependencies=[Depends(credentials)])
async def chat_completions(request: ChatCompletionRequest):
if request.messages and request.messages[0].role == 'user':
resp_content = "As a mock AI Assistant, I only can echo your last message:" + request.messages[-1].content
else:
resp_content = "As a mock AI Assistant, I only can echo your last message, but there were no messages!"
if request.stream:
return StreamingResponse(_resp_async_generator(resp_content), media_type="text/event-stream")
return {
"id": time.gmtime().tm_year,
"object": "chat.completion",
"created": time.time(),
"model": request.model,
"choices": [{
"message": ChatMessage(role="assistant", content=resp_content)
}]
}
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
1 | import time |
2 | from typing import List, Optional |
3 | |
4 | from pydantic import BaseModel |
5 | |
6 | |
7 | class ChatMessage(BaseModel): |
8 | role: str |
9 | content: str |
10 | |
11 | |
12 | class ChatCompletionRequest(BaseModel): |
13 | model: str = "mock-gpt-model" |
14 | messages: List[ChatMessage] |
15 | max_tokens: Optional[int] = 512 |
16 | temperature: Optional[float] = 0.5 |
17 | stream: Optional[bool] = False |
18 | |
19 | |
20 | from fastapi import FastAPI, Depends, HTTPException, status |
21 | from fastapi.security import HTTPBearer |
22 | |
23 | app = FastAPI(title="OpenAI-compatible API") |
24 | |
25 | bearer_scheme = HTTPBearer(auto_error=False) |
26 | |
27 | |
28 | async def credentials(authorization=Depends(bearer_scheme)): |
29 | if authorization and authorization.credentials == 'sk-1234': |
30 | # api key is valid |
31 | return authorization.credentials |
32 | |
33 | # raise http error 401 |
34 | raise HTTPException( |
35 | status_code=status.HTTP_401_UNAUTHORIZED, |
36 | detail="Invalid API key", |
37 | ) |
38 | |
39 | |
40 | import asyncio |
41 | import json |
42 | |
43 | |
44 | async def _resp_async_generator(text_resp: str): |
45 | # let's pretend every word is a token and return it over time |
46 | tokens = text_resp.split(" ") |
47 | |
48 | for i, token in enumerate(tokens): |
49 | chunk = { |
50 | "id": i, |
51 | "object": "chat.completion.chunk", |
52 | "created": time.time(), |
53 | "model": "mock-gpt-model", |
54 | "choices": [{"delta": {"content": token + " "}}], |
55 | } |
56 | yield f"data: {json.dumps(chunk)}\n\n" |
57 | await asyncio.sleep(0.1) |
58 | yield "data: [DONE]\n\n" |
59 | |
60 | |
61 | from starlette.responses import StreamingResponse |
62 | |
63 | |
64 | @app.post("/v1/chat/completions", dependencies=[Depends(credentials)]) |
65 | async def chat_completions(request: ChatCompletionRequest): |
66 | if request.messages and request.messages[0].role == 'user': |
67 | resp_content = "As a mock AI Assistant, I only can echo your last message:" + request.messages[-1].content |
68 | else: |
69 | resp_content = "As a mock AI Assistant, I only can echo your last message, but there were no messages!" |
70 | |
71 | if request.stream: |
72 | return StreamingResponse(_resp_async_generator(resp_content), media_type="text/event-stream") |
73 | |
74 | return { |
75 | "id": time.gmtime().tm_year, |
76 | "object": "chat.completion", |
77 | "created": time.time(), |
78 | "model": request.model, |
79 | "choices": [{ |
80 | "message": ChatMessage(role="assistant", content=resp_content) |
81 | }] |
82 | } |
83 | |
84 | if __name__ == '__main__': |
85 | import uvicorn |
86 | |
87 | uvicorn.run(app, host="0.0.0.0", port=8000) |
88 |