support whisper stt

This commit is contained in:
2023-08-25 18:19:10 +08:00
parent 687ebf790c
commit cdb0c9a1b5
3 changed files with 115 additions and 3 deletions

View File

@@ -33,6 +33,8 @@ export interface ChatStore {
presence_penalty: number;
frequency_penalty: number;
develop_mode: boolean;
whisper_api: string;
whisper_key: string;
}
const _defaultAPIEndpoint = "https://api.openai.com/v1/chat/completions";
@@ -43,7 +45,9 @@ const newChatStore = (
streamMode = true,
model = "gpt-3.5-turbo-0613",
temperature = 1.0,
dev = false
dev = false,
whisper_api = "",
whisper_key = ""
): ChatStore => {
return {
chatgpt_api_web_version: CHATGPT_API_WEB_VERSION,
@@ -64,6 +68,8 @@ const newChatStore = (
presence_penalty: 0,
frequency_penalty: 0,
develop_mode: getDefaultParams("dev", dev),
whisper_api: getDefaultParams("whisper-api", whisper_api),
whisper_key: getDefaultParams("whisper-key", whisper_key),
};
};
@@ -194,7 +200,9 @@ export function App() {
chatStore.streamMode,
chatStore.model,
chatStore.temperature,
!!chatStore.develop_mode
!!chatStore.develop_mode,
chatStore.whisper_api,
chatStore.whisper_key
)
)
);

View File

@@ -29,6 +29,8 @@ export default function ChatBOX(props: {
const [showGenerating, setShowGenerating] = useState(false);
const [generatingMessage, setGeneratingMessage] = useState("");
const [showRetry, setShowRetry] = useState(false);
const [isRecording, setIsRecording] = useState("Mic");
const mediaRef = createRef();
const messagesEndRef = createRef();
useEffect(() => {
@@ -504,6 +506,98 @@ export default function ChatBOX(props: {
>
Send
</button>
{chatStore.whisper_api &&
(chatStore.whisper_key || chatStore.apiKey) && (
<button
className="disabled:line-through disabled:bg-slate-500 rounded m-1 p-1 border-2 bg-cyan-400 hover:bg-cyan-600"
disabled={isRecording === "Transcribing"}
ref={mediaRef}
onClick={async () => {
if (isRecording === "Recording") {
// @ts-ignore
window.mediaRecorder.stop();
setIsRecording("Transcribing");
return;
}
// build prompt
const prompt = (
chatStore.history
.filter(({ hide }) => !hide)
.slice(chatStore.postBeginIndex)
.map(({ content }) => content)
.join(" ") +
" " +
inputMsg
).trim();
console.log({ prompt });
setIsRecording("Recording");
console.log("start recording");
const mediaRecorder = new MediaRecorder(
await navigator.mediaDevices.getUserMedia({
audio: true,
}),
{ audioBitsPerSecond: 64 * 1000 }
);
// mount mediaRecorder to ref
// @ts-ignore
window.mediaRecorder = mediaRecorder;
mediaRecorder.start();
const audioChunks: Blob[] = [];
mediaRecorder.addEventListener("dataavailable", (event) => {
audioChunks.push(event.data);
});
mediaRecorder.addEventListener("stop", async () => {
setIsRecording("Transcribing");
const audioBlob = new Blob(audioChunks);
const audioUrl = URL.createObjectURL(audioBlob);
console.log({ audioUrl });
const audio = new Audio(audioUrl);
// audio.play();
const reader = new FileReader();
reader.readAsDataURL(audioBlob);
// file-like object with mimetype
const blob = new Blob([audioBlob], {
type: "application/octet-stream",
});
reader.onloadend = async () => {
const base64data = reader.result;
// post to openai whisper api
const formData = new FormData();
// append file
formData.append("file", blob, "audio.ogx");
formData.append("model", "whisper-1");
formData.append("response_format", "text");
formData.append("prompt", prompt);
const response = await fetch(chatStore.whisper_api, {
method: "POST",
headers: {
Authorization: `Bearer ${
chatStore.whisper_api || chatStore.apiKey
}`,
},
body: formData,
});
const { text } = await response.json();
setInputMsg(inputMsg + text);
setIsRecording("Mic");
};
});
}}
>
{isRecording}
</button>
)}
{chatStore.develop_mode && (
<button
className="disabled:line-through disabled:bg-slate-500 rounded m-1 p-1 border-2 bg-cyan-400 hover:bg-cyan-600"

View File

@@ -69,7 +69,7 @@ const LongInput = (props: {
const Input = (props: {
chatStore: ChatStore;
setChatStore: (cs: ChatStore) => void;
field: "apiKey" | "apiEndpoint";
field: "apiKey" | "apiEndpoint" | "whisper_api" | "whisper_key";
help: string;
}) => {
const [hideInput, setHideInput] = useState(true);
@@ -315,6 +315,16 @@ export default (props: {
readOnly={false}
{...props}
/>
<Input
field="whisper_api"
help="Whisper 语言转文字服务填入此api才会开启默认为 https://api.openai.com/v1/audio/transcptions"
{...props}
/>
<Input
field="whisper_key"
help="用于 Whisper 服务的 key默认为 上方使用的OPENAI key可在此单独配置专用key"
{...props}
/>
<div className="flex justify-between">
<p className="m-2 p-2">
Accumulated cost in all sessions ${totalCost.toFixed(4)}