19 Commits

Author SHA1 Message Date
495f32610b refactor: pipe the read and write process
this refactor simplify the process logic and fix several bugs and
performance issue.

bug fixed:
- cors headers not being sent in some situation
performance:
- perform upstream reqeust while clien is uploading content
2024-05-27 14:47:00 +08:00
45bba95f5d Merge remote-tracking branch 'comp/master' 2024-04-09 19:07:45 +08:00
75ff8fbc2e Refactor CORS handling and remove response's CORS headers 2024-04-09 19:07:28 +08:00
66758e0008 更新.gitlab-ci.yml文件 2024-04-08 10:29:08 +00:00
40fc2067a5 Merge remote-tracking branch 'comp/master' 2024-04-08 18:22:10 +08:00
1a56101ca8 add dockerignore 2024-04-08 18:21:55 +08:00
e373e3ac63 更新.gitlab-ci.yml文件 2024-04-08 07:48:05 +00:00
24a2e609f8 更新.gitlab-ci.yml文件 2024-04-08 05:11:47 +00:00
e442303847 更新.gitlab-ci.yml文件 2024-04-08 03:52:19 +00:00
8b95fbb5da 更新.gitlab-ci.yml文件 2024-04-08 03:40:32 +00:00
34aa4babc4 更新.gitlab-ci.yml文件 2024-04-08 03:21:24 +00:00
e6ff1f5ca4 更新.gitlab-ci.yml文件 2024-04-08 03:10:24 +00:00
6b6f245e45 Update README.md with complex configuration example 2024-04-08 11:04:29 +08:00
995eea9d67 Update README.md 2024-02-18 17:28:16 +08:00
db7f0eb316 timeout 2024-02-18 16:45:37 +08:00
990628b455 bro gooooo 2024-02-17 00:30:27 +08:00
e8b89fc41a record all json request body 2024-02-16 22:42:48 +08:00
46ee30ced7 use path as default model name 2024-02-16 22:31:32 +08:00
f2e32340e3 fix typo 2024-02-16 17:47:40 +08:00
10 changed files with 405 additions and 400 deletions

View File

@@ -1,3 +1,4 @@
openai-api-route openai-api-route
db.sqlite db.sqlite
/config.yaml /config.yaml
/.*

52
.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,52 @@
# To contribute improvements to CI/CD templates, please follow the Development guide at:
# https://docs.gitlab.com/ee/development/cicd/templates.html
# This specific template is located at:
# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Docker.gitlab-ci.yml
# Build a Docker image with CI/CD and push to the GitLab registry.
# Docker-in-Docker documentation: https://docs.gitlab.com/ee/ci/docker/using_docker_build.html
#
# This template uses one generic job with conditional builds
# for the default branch and all other (MR) branches.
docker-build:
# Use the official docker image.
image: docker:cli
stage: build
services:
- docker:dind
variables:
CI_REGISTRY: registry.waykey.net:7999
CI_REGISTRY_IMAGE: $CI_REGISTRY/spiderman/datamining/openai-api-route
DOCKER_IMAGE_NAME: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
before_script:
- docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
# All branches are tagged with $DOCKER_IMAGE_NAME (defaults to commit ref slug)
# Default branch is also tagged with `latest`
script:
- docker build --pull -t "$DOCKER_IMAGE_NAME" .
- docker push "$DOCKER_IMAGE_NAME"
- |
if [[ "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH" ]]; then
docker tag "$DOCKER_IMAGE_NAME" "$CI_REGISTRY_IMAGE:latest"
docker push "$CI_REGISTRY_IMAGE:latest"
fi
# Run this job in a branch where a Dockerfile exists
rules:
- if: $CI_COMMIT_BRANCH
exists:
- Dockerfile
deploy:
environment: production
image: kroniak/ssh-client
stage: deploy
before_script:
- chmod 600 $CI_SSH_PRIVATE_KEY
script:
- ssh -o StrictHostKeyChecking=no -i $CI_SSH_PRIVATE_KEY root@192.168.1.13 "cd /mnt/data/srv/openai-api-route && podman-compose pull && podman-compose down && podman-compose up -d"
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
exists:
- Dockerfile

206
README.md
View File

@@ -7,16 +7,134 @@
- 自定义 Authorization 验证头 - 自定义 Authorization 验证头
- 支持所有类型的接口 (`/v1/*`) - 支持所有类型的接口 (`/v1/*`)
- 提供 Prometheus Metrics 统计接口 (`/v1/metrics`) - 提供 Prometheus Metrics 统计接口 (`/v1/metrics`)
- 按照定义顺序请求 OpenAI 上游 - 按照定义顺序请求 OpenAI 上游,出错或超时自动按顺序尝试下一个
- 识别 ChatCompletions Stream 请求,针对 Stream 请求使用 5 秒超时。具体超时策略请参阅 [超时策略](#超时策略) 一节 - 识别 ChatCompletions Stream 请求,针对 Stream 请求使用更短的超时。具体超时策略请参阅 [超时策略](#超时策略) 一节
- 记录完整的请求内容、使用的上游、IP 地址、响应时间以及 GPT 回复文本 - 有选择地记录请求内容、请求头、使用的上游、IP 地址、响应时间以及响应等内容。具体记录策略请参阅 [记录策略](#记录策略) 一节
- 请求出错时发送 飞书 或 Matrix 消息通知 - 请求出错时发送 飞书 或 Matrix 平台的消息通知
- 支持 Replicate 平台上的模型 - 支持 Replicate 平台上的 mistral 模型beta
本文档详细介绍了如何使用负载均衡和能力 API 的方法和端点。 本文档详细介绍了如何使用负载均衡和能力 API 的方法和端点。
## 配置文件
默认情况下程序会使用当前目录下的 `config.yaml` 文件,您可以通过使用 `-config your-config.yaml` 参数指定配置文件路径。
以下是一个配置文件示例,你可以在 `config.sample.yaml` 文件中找到同样的内容
```yaml
authorization: woshimima
# 默认超时时间,默认 120 秒,流式请求是 10 秒
timeout: 120
stream_timeout: 10
# 使用 sqlite 作为数据库储存请求记录
dbtype: sqlite
dbaddr: ./db.sqlite
# 使用 postgres 作为数据库储存请求记录
# dbtype: postgres
# dbaddr: "host=127.0.0.1 port=5432 user=postgres dbname=openai_api_route sslmode=disable password=woshimima"
upstreams:
- sk: hahaha
endpoint: "https://localhost:8888/v1"
allow:
# whisper 等非 JSON API 识别不到 model则使用 URL 路径作为模型名称
- /v1/audio/transcriptions
- sk: "secret_key_1"
endpoint: "https://api.openai.com/v2"
timeout: 120 # 请求超时时间默认120秒
stream_timeout: 10 # 如果识别到 stream: true, 则使用该超时时间
allow: # 可选的模型白名单
- gpt-3.5-trubo
- gpt-3.5-trubo-0613
# 您可以设置很多个上游,程序将依次按顺序尝试
- sk: "secret_key_2"
endpoint: "https://api.openai.com/v1"
timeout: 30
deny:
- gpt-4
- sk: "key_for_replicate"
type: replicate
allow:
- mistralai/mixtral-8x7b-instruct-v0.1
```
### 配置多个验证头
您可以使用英文逗号 `,` 分割多个验证头。每个验证头都是有效的,程序会记录每个请求使用的验证头
```yaml
authorization: woshimima,iampassword
```
您也可以为上游单独设置验证头
```yaml
authorization: woshimima,iampassword
upstreams:
- sk: key
authorization: woshimima
```
如此,只有携带 `woshimima` 验证头的用户可以使用该上游。
### 复杂配置示例
```yaml
# 默认验证头
authorization: woshimima
upstreams:
# 允许所有人使用的文字转语音
- sk: xxx
endpoint: http://localhost:5000/v1
noauth: true
allow:
- /v1/audio/transcriptions
# guest 专用的 gpt-3.5-turbo-0125 模型
- sk:
endpoint: https://api.xxx.local/v1
authorization: guest
allow:
- gpt-3.5-turbo-0125
```
## 部署方法 ## 部署方法
有两种推荐的部署方法:
1. 使用预先构建好的容器 `docker.io/heimoshuiyu/openai-api-route:latest`
2. 自行编译
### 使用容器运行
> 注意,如果您使用 sqlite 数据库,您可能还需要修改配置文件以将 SQLite 数据库文件放置在数据卷中。
```bash
docker run -d --name openai-api-route -v /path/to/config.yaml:/config.yaml docker.io/heimoshuiyu/openai-api-route:latest
```
使用 Docker Compose
```yaml
version: '3'
services:
openai-api-route:
image: docker.io/heimoshuiyu/openai-api-route:latest
ports:
- 8888:8888
volumes:
- ./config.yaml:/config.yaml
```
### 编译 ### 编译
以下是编译和运行该负载均衡 API 的步骤: 以下是编译和运行该负载均衡 API 的步骤:
@@ -41,78 +159,6 @@
./openai-api-route ./openai-api-route
``` ```
默认情况下API 将会在本地的 8888 端口进行监听。
如果您希望使用不同的监听地址,可以使用 `-addr` 参数来指定,例如:
```
./openai-api-route -addr 0.0.0.0:8080
```
这将会将监听地址设置为 0.0.0.0:8080。
6. 如果数据库不存在,系统会自动创建一个名为 `db.sqlite` 的数据库文件。
如果您希望使用不同的数据库地址,可以使用 `-database` 参数来指定,例如:
```
./openai-api-route -database /path/to/database.db
```
这将会将数据库地址设置为 `/path/to/database.db`。
7. 现在,您已经成功编译并运行了负载均衡和能力 API。您可以根据需要添加上游、管理上游并使用 API 进行相关操作。
### 运行
以下是运行命令的用法:
```
Usage of ./openai-api-route:
-addr string
监听地址(默认为 ":8888"
-upstreams string
上游配置文件(默认为 "./upstreams.yaml"
-dbtype
数据库类型 (sqlite 或 postgres默认为 sqlite)
-database string
数据库地址(默认为 "./db.sqlite"
如果数据库为 postgres ,则此值应 PostgreSQL DSN 格式
例如 "host=127.0.0.1 port=5432 user=postgres dbname=openai_api_route sslmode=disable password=woshimima"
-list
列出所有上游
-noauth
不检查传入的授权头
```
以下是一个 `./upstreams.yaml` 文件配置示例
```yaml
authorization: woshimima
# 使用 sqlite 作为数据库储存请求记录
dbtype: sqlite
dbaddr: ./db.sqlite
# 使用 postgres 作为数据库储存请求记录
# dbtype: postgres
# dbaddr: "host=127.0.0.1 port=5432 user=postgres dbname=openai_api_route sslmode=disable password=woshimima"
upstreams:
- sk: "key_for_replicate"
type: replicate
allow: ["mistralai/mixtral-8x7b-instruct-v0.1"]
- sk: "secret_key_1"
endpoint: "https://api.openai.com/v2"
- sk: "secret_key_2"
endpoint: "https://api.openai.com/v1"
timeout: 30
```
请注意,程序会根据情况修改 timeout 的值
您可以直接运行 `./openai-api-route` 命令,如果数据库不存在,系统会自动创建。
## 模型允许与屏蔽列表 ## 模型允许与屏蔽列表
如果对某个上游设置了 allow 或 deny 列表,则负载均衡只允许或禁用用户使用这些模型。负载均衡程序会先判断白名单,再判断黑名单。 如果对某个上游设置了 allow 或 deny 列表,则负载均衡只允许或禁用用户使用这些模型。负载均衡程序会先判断白名单,再判断黑名单。
@@ -137,8 +183,4 @@ upstreams:
1. **默认超时时间**:如果没有特殊条件,服务将使用默认的超时时间,即 60 秒。 1. **默认超时时间**:如果没有特殊条件,服务将使用默认的超时时间,即 60 秒。
2. **流式请求**:如果请求体被识别为流式(`requestBody.Stream` 为 `true`),并且请求体检查(`requestBodyOK`)没有发现问题,超时时间将被设置为 5 秒。这适用于那些预期会快速响应的流式请求。 2. **流式请求**:如果请求体被识别为流式(`requestBody.Stream` 为 `true`),并且请求体检查(`requestBodyOK`)没有发现问题,超时时间将被设置为 5 秒。这适用于那些预期会快速响应的流式请求。
3. **大请求体**:如果请求体的大小超过 128KB即 `len(inBody) > 1024*128`),超时时间将被设置为 20 秒。这考虑到了处理大型数据可能需要更长的时间。
4. **上游超时配置**:如果上游服务器在配置中指定了超时时间(`upstream.Timeout` 大于 0服务将使用该值作为超时时间。这个值是以秒为单位的。

26
auth.go
View File

@@ -2,30 +2,14 @@ package main
import ( import (
"errors" "errors"
"log"
"strings" "strings"
"github.com/gin-gonic/gin"
) )
func handleAuth(c *gin.Context) error { func checkAuth(authorization string, config string) error {
var err error for _, auth := range strings.Split(config, ",") {
if authorization == strings.Trim(auth, " ") {
authorization := c.Request.Header.Get("Authorization") return nil
if !strings.HasPrefix(authorization, "Bearer") {
err = errors.New("authorization header should start with 'Bearer'")
return err
}
authorization = strings.Trim(authorization[len("Bearer"):], " ")
log.Println("Received authorization", authorization)
for _, auth := range strings.Split(config.Authorization, ",") {
if authorization != strings.Trim(auth, " ") {
err = errors.New("wrong authorization header")
return err
} }
} }
return errors.New("wrong authorization header")
return nil
} }

32
cors.go
View File

@@ -1,28 +1,20 @@
package main package main
import ( import (
"log"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
) )
// this function is aborded func sendCORSHeaders(c *gin.Context) {
func corsMiddleware() gin.HandlerFunc { log.Println("sendCORSHeaders")
return func(c *gin.Context) { if c.Writer.Header().Get("Access-Control-Allow-Origin") == "" {
// set cors header c.Header("Access-Control-Allow-Origin", "*")
header := c.Request.Header }
if header.Get("Access-Control-Allow-Origin") == "" { if c.Writer.Header().Get("Access-Control-Allow-Methods") == "" {
c.Header("Access-Control-Allow-Origin", "*") c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE, PATCH")
} }
if header.Get("Access-Control-Allow-Methods") == "" { if c.Writer.Header().Get("Access-Control-Allow-Headers") == "" {
c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE, PATCH") c.Header("Access-Control-Allow-Headers", "Origin, Authorization, Content-Type")
}
if header.Get("Access-Control-Allow-Headers") == "" {
c.Header("Access-Control-Allow-Headers", "Origin, Authorization, Content-Type")
}
} }
} }
func sendCORSHeaders(c *gin.Context) {
c.Header("Access-Control-Allow-Origin", "*")
c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE, PATCH")
c.Header("Access-Control-Allow-Headers", "Origin, Authorization, Content-Type")
}

122
main.go
View File

@@ -1,8 +1,11 @@
package main package main
import ( import (
"bytes"
"encoding/json"
"flag" "flag"
"fmt" "fmt"
"io"
"log" "log"
"net/http" "net/http"
"os" "os"
@@ -12,6 +15,7 @@ import (
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/penglongli/gin-metrics/ginmetrics" "github.com/penglongli/gin-metrics/ginmetrics"
"gorm.io/driver/postgres" "gorm.io/driver/postgres"
"gorm.io/driver/sqlite" "gorm.io/driver/sqlite"
"gorm.io/gorm" "gorm.io/gorm"
) )
@@ -85,7 +89,7 @@ func main() {
} }
errText := strings.Join(c.Errors.Errors(), "\n") errText := strings.Join(c.Errors.Errors(), "\n")
c.JSON(-1, gin.H{ c.JSON(-1, gin.H{
"error": errText, "openai-api-route error": errText,
}) })
}) })
@@ -99,7 +103,8 @@ func main() {
}) })
engine.POST("/v1/*any", func(c *gin.Context) { engine.POST("/v1/*any", func(c *gin.Context) {
hostname, err := os.Hostname() var err error
hostname, _ := os.Hostname()
if config.Hostname != "" { if config.Hostname != "" {
hostname = config.Hostname hostname = config.Hostname
} }
@@ -109,32 +114,68 @@ func main() {
CreatedAt: time.Now(), CreatedAt: time.Now(),
Authorization: c.Request.Header.Get("Authorization"), Authorization: c.Request.Header.Get("Authorization"),
UserAgent: c.Request.Header.Get("User-Agent"), UserAgent: c.Request.Header.Get("User-Agent"),
Model: c.Request.URL.Path,
} }
// check authorization header authorization := c.Request.Header.Get("Authorization")
if !*noauth { if strings.HasPrefix(authorization, "Bearer") {
err := handleAuth(c) authorization = strings.Trim(authorization[len("Bearer"):], " ")
if err != nil { } else {
c.Header("Content-Type", "application/json") authorization = strings.Trim(authorization, " ")
sendCORSHeaders(c) log.Println("[auth] Warning: authorization header should start with 'Bearer'")
c.AbortWithError(403, err)
return
}
} }
log.Println("Received authorization '" + authorization + "'")
for index, upstream := range config.Upstreams { availUpstreams := make([]OPENAI_UPSTREAM, 0)
for _, upstream := range config.Upstreams {
if upstream.SK == "" { if upstream.SK == "" {
sendCORSHeaders(c) sendCORSHeaders(c)
c.AbortWithError(500, fmt.Errorf("[processRequest.begin]: invaild SK (secret key) '%s'", upstream.SK)) c.AbortWithError(500, fmt.Errorf("[processRequest.begin]: invaild SK (secret key) %s", upstream.SK))
continue continue
} }
if !*noauth && !upstream.Noauth {
if checkAuth(authorization, upstream.Authorization) != nil {
continue
}
}
availUpstreams = append(availUpstreams, upstream)
}
if len(availUpstreams) == 0 {
sendCORSHeaders(c)
c.AbortWithError(500, fmt.Errorf("[processRequest.begin]: no available upstream for your token"))
}
log.Println("[processRequest.begin]: availUpstreams", len(availUpstreams))
bufIO := bytes.NewBuffer(make([]byte, 0, 1024))
wrapedBody := false
for index, _upstream := range availUpstreams {
// copy
upstream := _upstream
record.UpstreamEndpoint = upstream.Endpoint
record.UpstreamSK = upstream.SK
shouldResponse := index == len(config.Upstreams)-1 shouldResponse := index == len(config.Upstreams)-1
if len(config.Upstreams) == 1 { if len(availUpstreams) == 1 {
// [todo] copy problem
upstream.Timeout = 120 upstream.Timeout = 120
} }
// buffer for incoming request
if !wrapedBody {
log.Println("[processRequest.begin]: wrap request body")
c.Request.Body = io.NopCloser(io.TeeReader(c.Request.Body, bufIO))
wrapedBody = true
} else {
log.Println("[processRequest.begin]: reuse request body")
c.Request.Body = io.NopCloser(bytes.NewReader(bufIO.Bytes()))
}
if upstream.Type == "replicate" { if upstream.Type == "replicate" {
err = processReplicateRequest(c, &upstream, &record, shouldResponse) err = processReplicateRequest(c, &upstream, &record, shouldResponse)
} else if upstream.Type == "openai" { } else if upstream.Type == "openai" {
@@ -143,31 +184,52 @@ func main() {
err = fmt.Errorf("[processRequest.begin]: unsupported upstream type '%s'", upstream.Type) err = fmt.Errorf("[processRequest.begin]: unsupported upstream type '%s'", upstream.Type)
} }
if err != nil { if err == nil {
if err == http.ErrAbortHandler { log.Println("[processRequest.done]: Success from upstream", upstream.Endpoint)
abortErr := "[processRequest.done]: AbortHandler, client's connection lost?, no upstream will try, stop here" break
log.Println(abortErr)
record.Response += abortErr
record.Status = 500
break
}
log.Println("[processRequest.done]: Error from upstream", upstream.Endpoint, "should retry", err)
continue
} }
break if err == http.ErrAbortHandler {
abortErr := "[processRequest.done]: AbortHandler, client's connection lost?, no upstream will try, stop here"
log.Println(abortErr)
record.Response += abortErr
record.Status = 500
break
}
log.Println("[processRequest.done]: Error from upstream", upstream.Endpoint, "should retry", err, "should response:", shouldResponse)
// error process, break
if shouldResponse {
c.Header("Content-Type", "application/json")
sendCORSHeaders(c)
c.AbortWithError(500, err)
}
}
// parse and record request body
requestBodyBytes := bufIO.Bytes()
if len(requestBodyBytes) < 1024*1024 && (strings.HasPrefix(c.Request.Header.Get("Content-Type"), "application/json") ||
strings.HasPrefix(c.Request.Header.Get("Content-Type"), "text/")) {
record.Body = string(requestBodyBytes)
}
requestBody, err := ParseRequestBody(requestBodyBytes)
if err != nil {
log.Println("[processRequest.done]: Error to parse request body:", err)
} else {
record.Model = requestBody.Model
} }
log.Println("[final]: Record result:", record.Status, record.Response) log.Println("[final]: Record result:", record.Status, record.Response)
record.ElapsedTime = time.Now().Sub(record.CreatedAt) record.ElapsedTime = time.Since(record.CreatedAt)
// async record request // async record request
go func() { go func() {
// encoder headers to record.Headers in json string
headers, _ := json.Marshal(c.Request.Header)
record.Headers = string(headers)
// turncate request if too long // turncate request if too long
if len(record.Body) > 1024*128 { log.Println("[async.record]: body length:", len(record.Body))
log.Println("[async.record]: Warning: Truncate request body")
record.Body = record.Body[:1024*128]
}
if db.Create(&record).Error != nil { if db.Create(&record).Error != nil {
log.Println("[async.record]: Error to save record:", record) log.Println("[async.record]: Error to save record:", record)
} }

View File

@@ -8,22 +8,14 @@ import (
"io" "io"
"log" "log"
"net/http" "net/http"
"net/http/httputil"
"net/url" "net/url"
"strings" "strings"
"time" "time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"golang.org/x/net/context"
) )
func processRequest(c *gin.Context, upstream *OPENAI_UPSTREAM, record *Record, shouldResponse bool) error { func processRequest(c *gin.Context, upstream *OPENAI_UPSTREAM, record *Record, shouldResponse bool) error {
var errCtx []error
record.UpstreamEndpoint = upstream.Endpoint
record.UpstreamSK = upstream.SK
record.Response = ""
// [TODO] record request body
// reverse proxy // reverse proxy
remote, err := url.Parse(upstream.Endpoint) remote, err := url.Parse(upstream.Endpoint)
@@ -32,246 +24,102 @@ func processRequest(c *gin.Context, upstream *OPENAI_UPSTREAM, record *Record, s
} }
path := strings.TrimPrefix(c.Request.URL.Path, "/v1") path := strings.TrimPrefix(c.Request.URL.Path, "/v1")
// recoognize whisper url
if strings.HasPrefix(path, "/audio/transcriptions") || strings.HasPrefix(path, "/audio/translations") {
record.Model = "whisper"
}
remote.Path = upstream.URL.Path + path remote.Path = upstream.URL.Path + path
log.Println("[proxy.begin]:", remote) log.Println("[proxy.begin]:", remote)
log.Println("[proxy.begin]: shouldResposne:", shouldResponse) log.Println("[proxy.begin]: shouldResposne:", shouldResponse)
haveResponse := false client := &http.Client{}
request := &http.Request{}
request.ContentLength = c.Request.ContentLength
request.Method = c.Request.Method
request.URL = remote
proxy := httputil.NewSingleHostReverseProxy(remote) // process header
proxy.Director = nil if upstream.KeepHeader {
var inBody []byte request.Header = c.Request.Header
proxy.Rewrite = func(proxyRequest *httputil.ProxyRequest) {
in := proxyRequest.In
ctx, cancel := context.WithCancel(context.Background())
proxyRequest.Out = proxyRequest.Out.WithContext(ctx)
out := proxyRequest.Out
// read request body
inBody, err = io.ReadAll(in.Body)
if err != nil {
errCtx = append(errCtx, errors.New("[proxy.rewrite]: reverse proxy middleware failed to read request body "+err.Error()))
return
}
// record chat message from user
record.Body = string(inBody)
requestBody, requestBodyOK := ParseRequestBody(inBody)
// record if parse success
if requestBodyOK == nil && record.Model != "" {
record.Model = requestBody.Model
}
// check allow list
if len(upstream.Allow) > 0 {
isAllow := false
for _, allow := range upstream.Allow {
if allow == record.Model {
isAllow = true
break
}
}
if !isAllow {
errCtx = append(errCtx, errors.New("[proxy.rewrite]: model not allowed"))
return
}
}
// check block list
if len(upstream.Deny) > 0 {
for _, deny := range upstream.Deny {
if deny == record.Model {
errCtx = append(errCtx, errors.New("[proxy.rewrite]: model denied"))
return
}
}
}
// set timeout, default is 60 second
timeout := 60 * time.Second
if requestBodyOK == nil && requestBody.Stream {
timeout = 5 * time.Second
}
if len(inBody) > 1024*128 {
timeout = 20 * time.Second
}
if upstream.Timeout > 0 {
// convert upstream.Timeout(second) to nanosecond
timeout = time.Duration(upstream.Timeout) * time.Second
}
// timeout out request
go func() {
time.Sleep(timeout)
if !haveResponse {
log.Println("[proxy.timeout]: Timeout upstream", upstream.Endpoint, timeout)
errTimeout := errors.New("[proxy.timeout]: Timeout upstream")
errCtx = append(errCtx, errTimeout)
if shouldResponse {
c.Header("Content-Type", "application/json")
sendCORSHeaders(c)
c.AbortWithError(502, errTimeout)
}
cancel()
}
}()
out.Body = io.NopCloser(bytes.NewReader(inBody))
out.Host = remote.Host
out.URL.Scheme = remote.Scheme
out.URL.Host = remote.Host
out.Header = http.Header{}
out.Header.Set("Host", remote.Host)
if upstream.SK == "asis" {
out.Header.Set("Authorization", c.Request.Header.Get("Authorization"))
} else {
out.Header.Set("Authorization", "Bearer "+upstream.SK)
}
out.Header.Set("Content-Type", c.Request.Header.Get("Content-Type"))
}
var buf bytes.Buffer
var contentType string
proxy.ModifyResponse = func(r *http.Response) error {
haveResponse = true
record.ResponseTime = time.Now().Sub(record.CreatedAt)
record.Status = r.StatusCode
// handle reverse proxy cors header if upstream do not set that
if r.Header.Get("Access-Control-Allow-Origin") == "" {
c.Header("Access-Control-Allow-Origin", "*")
}
if r.Header.Get("Access-Control-Allow-Methods") == "" {
c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE, PATCH")
}
if r.Header.Get("Access-Control-Allow-Headers") == "" {
c.Header("Access-Control-Allow-Headers", "Origin, Authorization, Content-Type")
}
if !shouldResponse && r.StatusCode != 200 {
log.Println("[proxy.modifyResponse]: upstream return not 200 and should not response", r.StatusCode)
return errors.New("upstream return not 200 and should not response")
}
if r.StatusCode != 200 {
body, err := io.ReadAll(r.Body)
if err != nil {
errRet := errors.New("[proxy.modifyResponse]: failed to read response from upstream " + err.Error())
return errRet
}
errRet := errors.New(fmt.Sprintf("[error]: openai-api-route upstream return '%s' with '%s'", r.Status, string(body)))
log.Println(errRet)
record.Status = r.StatusCode
return errRet
}
// count success
r.Body = io.NopCloser(io.TeeReader(r.Body, &buf))
contentType = r.Header.Get("content-type")
return nil
}
proxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) {
haveResponse = true
record.ResponseTime = time.Now().Sub(record.CreatedAt)
log.Println("[proxy.errorHandler]", err, upstream.SK, upstream.Endpoint, errCtx)
errCtx = append(errCtx, err)
// abort to error handle
if shouldResponse {
c.Header("Content-Type", "application/json")
sendCORSHeaders(c)
for _, err := range errCtx {
c.AbortWithError(502, err)
}
}
log.Println("[proxy.errorHandler]: response is", r.Response)
if record.Status == 0 {
record.Status = 502
}
record.Response += "[proxy.ErrorHandler]: " + err.Error()
if r.Response != nil {
record.Status = r.Response.StatusCode
}
}
err = ServeHTTP(proxy, c.Writer, c.Request)
if err != nil {
log.Println("[proxy.serve]: error from ServeHTTP:", err)
// panic means client has abort the http connection
// since the connection is lost, we return
// and the reverse process should not try the next upsteam
return http.ErrAbortHandler
}
// return context error
if len(errCtx) > 0 {
log.Println("[proxy.serve]: error from ServeHTTP:", errCtx)
// fix inrequest body
c.Request.Body = io.NopCloser(bytes.NewReader(inBody))
return errCtx[len(errCtx)-1]
}
resp, err := io.ReadAll(io.NopCloser(&buf))
if err != nil {
record.Response = "failed to read response from upstream " + err.Error()
log.Println(record.Response)
} else { } else {
request.Header = http.Header{}
}
// record response // process header authorization
// stream mode if upstream.SK == "asis" {
if strings.HasPrefix(contentType, "text/event-stream") { request.Header.Set("Authorization", c.Request.Header.Get("Authorization"))
for _, line := range strings.Split(string(resp), "\n") { } else {
chunk := StreamModeChunk{} request.Header.Set("Authorization", "Bearer "+upstream.SK)
line = strings.TrimPrefix(line, "data:") }
line = strings.TrimSpace(line) request.Header.Set("Content-Type", c.Request.Header.Get("Content-Type"))
if line == "" { request.Header.Set("Host", remote.Host)
continue request.Header.Set("Content-Length", c.Request.Header.Get("Content-Length"))
}
err := json.Unmarshal([]byte(line), &chunk) request.Body = c.Request.Body
if err != nil {
log.Println("[proxy.parseChunkError]:", err)
continue
}
if len(chunk.Choices) == 0 { resp, err := client.Do(request)
continue if err != nil {
} body := []byte{}
record.Response += chunk.Choices[0].Delta.Content if resp != nil && resp.Body != nil {
} body, _ = io.ReadAll(resp.Body)
} else if strings.HasPrefix(contentType, "text") && strings.HasPrefix(record.Model, "whisper") {
// whisper model response
record.Response = string(resp)
record.Body = ""
} else if strings.HasPrefix(contentType, "application/json") {
var fetchResp FetchModeResponse
err := json.Unmarshal(resp, &fetchResp)
if err != nil {
log.Println("[proxy.parseJSONError]: error parsing fetch response:", err)
return nil
}
if !strings.HasPrefix(fetchResp.Model, "gpt-") {
log.Println("[proxy.record]: Not GPT model, skip recording response:", fetchResp.Model)
return nil
}
if len(fetchResp.Choices) == 0 {
log.Println("[proxy.record]: Error: fetch response choice length is 0")
return nil
}
record.Response = fetchResp.Choices[0].Message.Content
} else {
log.Println("[proxy.record]: Unknown content type", contentType)
} }
return errors.New(err.Error() + " " + string(body))
}
defer resp.Body.Close()
record.Status = resp.StatusCode
if resp.StatusCode != 200 {
body, _ := io.ReadAll(resp.Body)
record.Status = resp.StatusCode
errRet := fmt.Errorf("[error]: openai-api-route upstream return '%s' with '%s'", resp.Status, string(body))
log.Println(errRet)
return errRet
}
// copy response header
for k, v := range resp.Header {
c.Header(k, v[0])
}
sendCORSHeaders(c)
respBodyBuffer := bytes.NewBuffer(make([]byte, 0, 4*1024))
respBodyTeeReader := io.TeeReader(resp.Body, respBodyBuffer)
record.ResponseTime = time.Since(record.CreatedAt)
io.Copy(c.Writer, respBodyTeeReader)
record.ElapsedTime = time.Since(record.CreatedAt)
// parse and record response
if strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json") {
var fetchResp FetchModeResponse
err := json.NewDecoder(respBodyBuffer).Decode(&fetchResp)
if err == nil {
if len(fetchResp.Choices) > 0 {
record.Response = fetchResp.Choices[0].Message.Content
}
}
} else if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/event-stream") {
lines := bytes.Split(respBodyBuffer.Bytes(), []byte("\n"))
for _, line := range lines {
line = bytes.TrimSpace(line)
line = bytes.TrimPrefix(line, []byte("data:"))
line = bytes.TrimSpace(line)
if len(line) == 0 {
continue
}
chunk := StreamModeChunk{}
err = json.Unmarshal(line, &chunk)
if err != nil {
log.Println("[proxy.parseChunkError]:", err)
break
}
if len(chunk.Choices) == 0 {
continue
}
record.Response += chunk.Choices[0].Delta.Content
}
} else if strings.HasPrefix(resp.Header.Get("Content-Type"), "text") {
body, _ := io.ReadAll(respBodyBuffer)
record.Response = string(body)
} else {
log.Println("[proxy.record]: Unknown content type", resp.Header.Get("Content-Type"))
} }
return nil return nil

View File

@@ -19,6 +19,7 @@ type Record struct {
Status int Status int
Authorization string // the autorization header send by client Authorization string // the autorization header send by client
UserAgent string UserAgent string
Headers string
} }
type StreamModeChunk struct { type StreamModeChunk struct {

View File

@@ -35,7 +35,6 @@ func _processReplicateRequest(c *gin.Context, upstream *OPENAI_UPSTREAM, record
} }
// record request body // record request body
record.Body = string(inBody)
// parse request body // parse request body
inRequest := &OpenAIChatRequest{} inRequest := &OpenAIChatRequest{}
@@ -357,7 +356,7 @@ func _processReplicateRequest(c *gin.Context, upstream *OPENAI_UPSTREAM, record
FinishReason: "stop", FinishReason: "stop",
}) })
record.Body = strings.Join(result.Output, "") record.Response = strings.Join(result.Output, "")
record.Status = 200 record.Status = 200
// gin return // gin return

View File

@@ -14,16 +14,22 @@ type Config struct {
DBType string `yaml:"dbtype"` DBType string `yaml:"dbtype"`
DBAddr string `yaml:"dbaddr"` DBAddr string `yaml:"dbaddr"`
Authorization string `yaml:"authorization"` Authorization string `yaml:"authorization"`
Timeout int64 `yaml:"timeout"`
StreamTimeout int64 `yaml:"stream_timeout"`
Upstreams []OPENAI_UPSTREAM `yaml:"upstreams"` Upstreams []OPENAI_UPSTREAM `yaml:"upstreams"`
} }
type OPENAI_UPSTREAM struct { type OPENAI_UPSTREAM struct {
SK string `yaml:"sk"` SK string `yaml:"sk"`
Endpoint string `yaml:"endpoint"` Endpoint string `yaml:"endpoint"`
Timeout int64 `yaml:"timeout"` Timeout int64 `yaml:"timeout"`
Allow []string `yaml:"allow"` StreamTimeout int64 `yaml:"stream_timeout"`
Deny []string `yaml:"deny"` Allow []string `yaml:"allow"`
Type string `yaml:"type"` Deny []string `yaml:"deny"`
URL *url.URL Type string `yaml:"type"`
KeepHeader bool `yaml:"keep_header"`
Authorization string `yaml:"authorization"`
Noauth bool `yaml:"noauth"`
URL *url.URL
} }
func readConfig(filepath string) Config { func readConfig(filepath string) Config {
@@ -54,6 +60,14 @@ func readConfig(filepath string) Config {
log.Println("DBAddr not set, use default value: ./db.sqlite") log.Println("DBAddr not set, use default value: ./db.sqlite")
config.DBAddr = "./db.sqlite" config.DBAddr = "./db.sqlite"
} }
if config.Timeout == 0 {
log.Println("Timeout not set, use default value: 120")
config.Timeout = 120
}
if config.StreamTimeout == 0 {
log.Println("StreamTimeout not set, use default value: 10")
config.StreamTimeout = 10
}
for i, upstream := range config.Upstreams { for i, upstream := range config.Upstreams {
// parse upstream endpoint URL // parse upstream endpoint URL
@@ -68,6 +82,16 @@ func readConfig(filepath string) Config {
if (config.Upstreams[i].Type != "openai") && (config.Upstreams[i].Type != "replicate") { if (config.Upstreams[i].Type != "openai") && (config.Upstreams[i].Type != "replicate") {
log.Fatalf("Unsupported upstream type '%s'", config.Upstreams[i].Type) log.Fatalf("Unsupported upstream type '%s'", config.Upstreams[i].Type)
} }
// apply authorization from global config if not set
if config.Upstreams[i].Authorization == "" && !config.Upstreams[i].Noauth {
config.Upstreams[i].Authorization = config.Authorization
}
if config.Upstreams[i].Timeout == 0 {
config.Upstreams[i].Timeout = config.Timeout
}
if config.Upstreams[i].StreamTimeout == 0 {
config.Upstreams[i].StreamTimeout = config.StreamTimeout
}
} }
return config return config