support whisper stt

2023-08-25 18:19:10 +08:00
parent 687ebf790c
commit cdb0c9a1b5
3 changed files with 115 additions and 3 deletions
--- a/src/app.tsx
+++ b/src/app.tsx
@@ -33,6 +33,8 @@ export interface ChatStore {
  presence_penalty: number;
  frequency_penalty: number;
  develop_mode: boolean;
+  whisper_api: string;
+  whisper_key: string;
 }

 const _defaultAPIEndpoint = "https://api.openai.com/v1/chat/completions";
@@ -43,7 +45,9 @@ const newChatStore = (
  streamMode = true,
  model = "gpt-3.5-turbo-0613",
  temperature = 1.0,
-  dev = false
+  dev = false,
+  whisper_api = "",
+  whisper_key = ""
 ): ChatStore => {
  return {
    chatgpt_api_web_version: CHATGPT_API_WEB_VERSION,
@@ -64,6 +68,8 @@ const newChatStore = (
    presence_penalty: 0,
    frequency_penalty: 0,
    develop_mode: getDefaultParams("dev", dev),
+    whisper_api: getDefaultParams("whisper-api", whisper_api),
+    whisper_key: getDefaultParams("whisper-key", whisper_key),
  };
 };

@@ -194,7 +200,9 @@ export function App() {
          chatStore.streamMode,
          chatStore.model,
          chatStore.temperature,
-          !!chatStore.develop_mode
+          !!chatStore.develop_mode,
+          chatStore.whisper_api,
+          chatStore.whisper_key
        )
      )
    );
--- a/src/chatbox.tsx
+++ b/src/chatbox.tsx
@@ -29,6 +29,8 @@ export default function ChatBOX(props: {
  const [showGenerating, setShowGenerating] = useState(false);
  const [generatingMessage, setGeneratingMessage] = useState("");
  const [showRetry, setShowRetry] = useState(false);
+  const [isRecording, setIsRecording] = useState("Mic");
+  const mediaRef = createRef();

  const messagesEndRef = createRef();
  useEffect(() => {
@@ -504,6 +506,98 @@ export default function ChatBOX(props: {
        >
          Send
        </button>
+        {chatStore.whisper_api &&
+          (chatStore.whisper_key || chatStore.apiKey) && (
+            <button
+              className="disabled:line-through disabled:bg-slate-500 rounded m-1 p-1 border-2 bg-cyan-400 hover:bg-cyan-600"
+              disabled={isRecording === "Transcribing"}
+              ref={mediaRef}
+              onClick={async () => {
+                if (isRecording === "Recording") {
+                  // @ts-ignore
+                  window.mediaRecorder.stop();
+                  setIsRecording("Transcribing");
+                  return;
+                }
+
+                // build prompt
+                const prompt = (
+                  chatStore.history
+                    .filter(({ hide }) => !hide)
+                    .slice(chatStore.postBeginIndex)
+                    .map(({ content }) => content)
+                    .join(" ") +
+                  " " +
+                  inputMsg
+                ).trim();
+                console.log({ prompt });
+
+                setIsRecording("Recording");
+                console.log("start recording");
+
+                const mediaRecorder = new MediaRecorder(
+                  await navigator.mediaDevices.getUserMedia({
+                    audio: true,
+                  }),
+                  { audioBitsPerSecond: 64 * 1000 }
+                );
+
+                // mount mediaRecorder to ref
+                // @ts-ignore
+                window.mediaRecorder = mediaRecorder;
+
+                mediaRecorder.start();
+                const audioChunks: Blob[] = [];
+                mediaRecorder.addEventListener("dataavailable", (event) => {
+                  audioChunks.push(event.data);
+                });
+                mediaRecorder.addEventListener("stop", async () => {
+                  setIsRecording("Transcribing");
+                  const audioBlob = new Blob(audioChunks);
+                  const audioUrl = URL.createObjectURL(audioBlob);
+                  console.log({ audioUrl });
+                  const audio = new Audio(audioUrl);
+                  // audio.play();
+                  const reader = new FileReader();
+                  reader.readAsDataURL(audioBlob);
+
+                  // file-like object with mimetype
+                  const blob = new Blob([audioBlob], {
+                    type: "application/octet-stream",
+                  });
+
+                  reader.onloadend = async () => {
+                    const base64data = reader.result;
+
+                    // post to openai whisper api
+                    const formData = new FormData();
+                    // append file
+                    formData.append("file", blob, "audio.ogx");
+                    formData.append("model", "whisper-1");
+                    formData.append("response_format", "text");
+                    formData.append("prompt", prompt);
+
+                    const response = await fetch(chatStore.whisper_api, {
+                      method: "POST",
+                      headers: {
+                        Authorization: `Bearer ${
+                          chatStore.whisper_api || chatStore.apiKey
+                        }`,
+                      },
+                      body: formData,
+                    });
+
+                    const { text } = await response.json();
+
+                    setInputMsg(inputMsg + text);
+                    setIsRecording("Mic");
+                  };
+                });
+              }}
+            >
+              {isRecording}
+            </button>
+          )}
        {chatStore.develop_mode && (
          <button
            className="disabled:line-through disabled:bg-slate-500 rounded m-1 p-1 border-2 bg-cyan-400 hover:bg-cyan-600"
--- a/src/settings.tsx
+++ b/src/settings.tsx
@@ -69,7 +69,7 @@ const LongInput = (props: {
 const Input = (props: {
  chatStore: ChatStore;
  setChatStore: (cs: ChatStore) => void;
-  field: "apiKey" | "apiEndpoint";
+  field: "apiKey" | "apiEndpoint" | "whisper_api" | "whisper_key";
  help: string;
 }) => {
  const [hideInput, setHideInput] = useState(true);
@@ -315,6 +315,16 @@ export default (props: {
            readOnly={false}
            {...props}
          />
+          <Input
+            field="whisper_api"
+            help="Whisper 语言转文字服务，填入此api才会开启，默认为 https://api.openai.com/v1/audio/transcptions"
+            {...props}
+          />
+          <Input
+            field="whisper_key"
+            help="用于 Whisper 服务的 key，默认为 上方使用的OPENAI key，可在此单独配置专用key"
+            {...props}
+          />
          <div className="flex justify-between">
            <p className="m-2 p-2">
              Accumulated cost in all sessions ${totalCost.toFixed(4)}