This commit is contained in:
2024-11-10 00:43:33 +08:00
commit 37c409dcc1
5 changed files with 2755 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

2459
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

11
Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "voice-typing"
version = "0.1.0"
edition = "2021"
[dependencies]
clipboard = "0.5.0"
ctrlc = "3.4.5"
nix = "0.29.0"
notify-rust = "4.11.3"
reqwest = { "version" ="0.12.9", "features" = ["blocking", "multipart"] }

179
src/main.rs Normal file
View File

@@ -0,0 +1,179 @@
use std::process::exit;
static PID_FILE: &str = "/tmp/ffmpeg_audio_recording.pid";
static TEMP_FILE: &str = "/tmp/ffmpeg_audio_recording.ogg";
static API_ENDPOINT: &str = "http://127.0.0.1:5000/v1/audio/transcriptions";
static AUTH_TOKEN: &str = "woshimima";
use ctrlc;
use nix::sys::signal;
use nix::sys::signal::Signal;
use nix::unistd::Pid;
use std::sync::mpsc::channel;
fn main() {
// 如果已经在录制音频,停止录制
match read_pid() {
Some(_) => {
kill_and_delete_pid();
send_notification("Recording stopped", "Recording audio stopped.");
exit(0)
}
None => {
set_pid();
}
}
// 开始录制音频
send_notification("Recording started", "Recording audio...");
start_recording();
let text = upload_audio();
set_clipboard_content(&text);
send_notification("Recording finished", text.as_str());
}
fn upload_audio() -> String {
use reqwest::blocking::Client;
let client = Client::new();
let form = reqwest::blocking::multipart::Form::new()
.file("file", TEMP_FILE).unwrap()
.text("response_format", "text")
.text("prompt", get_clipboard_content())
.text("model", "whisper-1");
let resp = client
.post(API_ENDPOINT)
.header("Authorization", format!("Bearer {}", AUTH_TOKEN))
.multipart(form)
.send()
.unwrap();
let text = resp.text().unwrap();
text
}
#[cfg(target_os = "linux")]
fn start_recording() {
let mut ffmpeg = std::process::Command::new("ffmpeg")
.arg("-f")
.arg("pulse")
.arg("-i")
.arg("default")
.arg("-c:a")
.arg("flac")
.arg("-ac")
.arg("1")
.arg("-f")
.arg("ogg")
.arg("-y")
.arg(TEMP_FILE)
.spawn()
.unwrap();
// 注册监听 Ctrl-C 信号
let (tx, rx) = channel();
ctrlc::set_handler(move || tx.send(()).expect("Could not send signal on channel."))
.expect("Error setting Ctrl-C handler");
// 等待 Ctrl-C 信号
println!("Waiting for Ctrl-C...");
rx.recv().expect("Could not receive from channel.");
println!("Got it! killing ffmpeg)");
signal::kill(Pid::from_raw(ffmpeg.id() as i32), Signal::SIGINT).unwrap();
// 等待 ffmpeg 退出
ffmpeg.wait().unwrap();
println!("ffmpeg exited");
}
fn read_pid() -> Option<String> {
use std::fs::File;
use std::io::Read;
let mut file = match File::open(PID_FILE) {
Ok(ctx) => ctx,
Err(_) => {
return None;
}
};
let mut pid = String::new();
match file.read_to_string(&mut pid) {
Ok(_) => Some(pid),
Err(_) => None,
}
}
fn kill_and_delete_pid() {
use std::fs;
let pid = read_pid().unwrap();
let pid = pid.trim();
let pid = pid.parse::<i32>().unwrap();
match signal::kill(Pid::from_raw(pid), Signal::SIGINT) {
Ok(_) => {},
Err(_) => {
send_notification("Error to kill", "Error to kill pid, cleaning");
},
}
fs::remove_file(PID_FILE).unwrap();
}
fn set_pid() {
use std::fs::File;
use std::io::Write;
let mut file = File::create(PID_FILE).unwrap();
file.write_all(format!("{}", std::process::id()).as_bytes())
.unwrap();
}
fn send_notification(title: &str, content: &str) {
let result = notify_rust::Notification::new()
.summary(title)
.body(content)
.timeout(std::time::Duration::from_secs(2))
.show();
match result {
Ok(_) => {}
Err(error) => {
eprintln!("Error sending notification: {}", error);
}
}
}
fn get_clipboard_content() -> String {
use clipboard::ClipboardContext;
use clipboard::ClipboardProvider;
match ClipboardContext::new().and_then(|mut ctx| ctx.get_contents()) {
Ok(content) => content,
Err(error) => {
send_notification(
"Error getting clipboard content",
&format!("Error: {}", error),
);
String::new()
}
}
}
fn set_clipboard_content(content: &str) {
use clipboard::ClipboardContext;
use clipboard::ClipboardProvider;
match ClipboardContext::new().and_then(|mut ctx| ctx.set_contents(content.to_owned())) {
Ok(_) => {}
Err(error) => {
send_notification(
"Error setting clipboard content",
&format!("Error: {}", error),
);
}
}
}

105
src/voice-typing Executable file
View File

@@ -0,0 +1,105 @@
#!/bin/bash
# Define the path for the PID file and the temporary audio file
PID_FILE="/tmp/ffmpeg_audio_recording.pid"
TEMP_FILE="/tmp/audio_recording.ogg"
# Define the API endpoint and authorization token
API_ENDPOINT="http://127.0.0.1:5000/v1/audio/transcriptions"
AUTH_TOKEN="woshimima"
# Function to send notifications
send_notification() {
local message="$1"
if [ "$(uname)" == "Darwin" ]; then
# macOS notification
osascript -e "display notification \"$message\" with title \"Audio Script\""
else
# Linux notification
notify-send "Audio Script" "$message"
fi
}
# Function to check and kill existing FFmpeg process
check_and_kill_ffmpeg() {
if [ -f "$PID_FILE" ]; then
# PID file exists, read the PID
PID=$(cat "$PID_FILE")
if ps -p $PID > /dev/null 2>&1; then
# Process is running, kill it
echo "Stopping existing FFmpeg process (PID: $PID)..."
kill $PID
exit 0
else
# Process not running, remove the PID file
echo "Removing stale PID file."
rm "$PID_FILE"
fi
fi
}
# Function to get selected text
get_selected_text() {
if [ "$(uname)" == "Darwin" ]; then
SELECTED_TEXT=$(pbpaste)
elif [ "$XDG_SESSION_TYPE" == "x11" ]; then
SELECTED_TEXT=$(xclip -o)
elif [ "$XDG_SESSION_TYPE" == "wayland" ]; then
SELECTED_TEXT=$(wl-paste)
else
echo "Unsupported session type for capturing selected text."
SELECTED_TEXT=""
fi
echo "$SELECTED_TEXT"
}
# Function to copy text to clipboard
copy_to_clipboard() {
local text="$1"
if [ "$(uname)" == "Darwin" ]; then
echo "$text" | pbcopy
elif [ "$XDG_SESSION_TYPE" == "x11" ]; then
echo "$text" | xclip -selection clipboard
elif [ "$XDG_SESSION_TYPE" == "wayland" ]; then
echo "$text" | wl-copy
else
echo "Unsupported session type for clipboard operations."
fi
}
# Call the function to check and potentially kill an existing FFmpeg process
check_and_kill_ffmpeg
# Start recording audio with ffmpeg, outputting to the temporary file
echo "Recording... Press Ctrl+C to stop."
send_notification "Recording..."
if [ "$(uname)" == "Darwin" ]; then
ffmpeg -y -f avfoundation -i ":0" -c:a libopus -ac 1 -ar 16000 -f ogg "$TEMP_FILE" &
else
ffmpeg -y -f pulse -i 0 -c:a flac -ac 1 -f ogg "$TEMP_FILE" &
fi
FFMPEG_PID=$!
# Save the FFmpeg PID to the PID file
echo $FFMPEG_PID > "$PID_FILE"
# Wait for FFmpeg to finish
wait $FFMPEG_PID
# Get the selected text to use as the prompt parameter
PROMPT=$(get_selected_text)
# Proceed to upload the audio file to the API endpoint using curl
echo "Transcribing..."
send_notification "Transcribing..."
API_RESPONSE=$(curl "$API_ENDPOINT" -X POST -F file=@"$TEMP_FILE;filename=input.ogg" \
-H "Authorization: Bearer $AUTH_TOKEN" -F response_format=text -F prompt="$PROMPT")
# Copy the API response to the clipboard
copy_to_clipboard "$API_RESPONSE"
# Remove the PID file at the end of the script
rm "$PID_FILE"
send_notification "$API_RESPONSE"
echo "Done."