feat(transcode): add a generic transcoding package for encoding/decoding/caching

This commit is contained in:
sentriz
2022-03-10 00:42:52 +00:00
parent fd211d706a
commit 165904c2bb
25 changed files with 713 additions and 388 deletions

BIN
server/transcode/testdata/10s.mp3 vendored Normal file

Binary file not shown.

BIN
server/transcode/testdata/5s.mp3 vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,3 @@
go test fuzz v1
byte('Y')
byte('\x05')

View File

@@ -0,0 +1,3 @@
go test fuzz v1
byte('\x15')
byte('}')

View File

@@ -0,0 +1,3 @@
go test fuzz v1
byte('\a')
byte('\x02')

View File

@@ -0,0 +1,129 @@
// author: spijet (https://github.com/spijet/)
// author: sentriz (https://github.com/sentriz/)
//nolint:gochecknoglobals
package transcode
import (
"context"
"fmt"
"io"
"os/exec"
"time"
"github.com/google/shlex"
)
type Transcoder interface {
Transcode(ctx context.Context, profile Profile, in string) (io.ReadCloser, error)
}
var UserProfiles = map[string]Profile{
"mp3": MP3,
"mp3_rg": MP3RG,
"opus_car": OpusCar,
"opus": Opus,
"opus_rg": OpusRG,
}
// Store as simple strings, since we may let the user provide their own profiles soon
var (
MP3 = NewProfile("audio/mpeg", 128, `ffmpeg -v 0 -i <file> -ss <seek> -map 0:a:0 -vn -b:a <bitrate> -c:a libmp3lame -af "volume=replaygain=track:replaygain_preamp=6dB:replaygain_noclip=0, alimiter=level=disabled, asidedata=mode=delete:type=REPLAYGAIN" -metadata replaygain_album_gain= -metadata replaygain_album_peak= -metadata replaygain_track_gain= -metadata replaygain_track_peak= -metadata r128_album_gain= -metadata r128_track_gain= -f mp3 -`)
MP3RG = NewProfile("audio/mpeg", 128, `ffmpeg -v 0 -i <file> -ss <seek> -map 0:a:0 -vn -b:a <bitrate> -c:a libmp3lame -af "volume=replaygain=track:replaygain_preamp=6dB:replaygain_noclip=0, alimiter=level=disabled, asidedata=mode=delete:type=REPLAYGAIN" -metadata replaygain_album_gain= -metadata replaygain_album_peak= -metadata replaygain_track_gain= -metadata replaygain_track_peak= -metadata r128_album_gain= -metadata r128_track_gain= -f mp3 -`)
// this sets a baseline gain which results in the final track being +3~5dB louder than
// Foobar2000's default ReplayGain target volume.
// this makes it easier to listen to music in a car, where all other
// sources are usually ten thousand times louder than RG-adjusted music.
//
// opus always forces output to 48kHz sampling rate, but we can still use upsampling
// to increase RG and alimiter's peak limiting precision, which is desirable in some
// cases. ffmpeg's `soxr` resampler is quite fast on x86-64: it takes around 5 seconds
// on my Ryzen 3600 to transcode an 8-minute FLAC with 2x upsample and RG applied.
//
// -- @spijet
OpusCar = NewProfile("audio/ogg", 96, `ffmpeg -v 0 -i <file> -ss <seek> -map 0:a:0 -vn -b:a <bitrate> -c:a libopus -vbr on -af "aresample=96000:resampler=soxr, volume=replaygain=track:replaygain_preamp=15dB:replaygain_noclip=0, alimiter=level=disabled, asidedata=mode=delete:type=REPLAYGAIN" -f opus -`)
Opus = NewProfile("audio/ogg", 96, `ffmpeg -v 0 -i <file> -ss <seek> -map 0:a:0 -vn -b:a <bitrate> -c:a libopus -vbr on -af "volume=replaygain=track:replaygain_preamp=6dB:replaygain_noclip=0, alimiter=level=disabled, asidedata=mode=delete:type=REPLAYGAIN" -metadata replaygain_album_gain= -metadata replaygain_album_peak= -metadata replaygain_track_gain= -metadata replaygain_track_peak= -metadata r128_album_gain= -metadata r128_track_gain= -f opus -`)
OpusRG = NewProfile("audio/ogg", 96, `ffmpeg -v 0 -i <file> -ss <seek> -map 0:a:0 -vn -b:a <bitrate> -c:a libopus -vbr on -af "volume=replaygain=track:replaygain_preamp=6dB:replaygain_noclip=0, alimiter=level=disabled, asidedata=mode=delete:type=REPLAYGAIN" -metadata replaygain_album_gain= -metadata replaygain_album_peak= -metadata replaygain_track_gain= -metadata replaygain_track_peak= -metadata r128_album_gain= -metadata r128_track_gain= -f opus -`)
PCM16le = NewProfile("audio/wav", 0, `ffmpeg -v 0 -i <file> -ss <seek> -c:a pcm_s16le -ac 2 -f s16le -`)
)
type BitRate int // kb/s
type Profile struct {
bitrate BitRate // the default bitrate, but the user can request a different one
seek time.Duration
mime string
exec string
}
func (p *Profile) BitRate() BitRate { return p.bitrate }
func (p *Profile) Seek() time.Duration { return p.seek }
func (p *Profile) MIME() string { return p.mime }
func NewProfile(mime string, bitrate BitRate, exec string) Profile {
return Profile{mime: mime, bitrate: bitrate, exec: exec}
}
func WithBitrate(p Profile, bitRate BitRate) Profile {
p.bitrate = bitRate
return p
}
func WithSeek(p Profile, seek time.Duration) Profile {
p.seek = seek
return p
}
var ErrNoProfileParts = fmt.Errorf("not enough profile parts")
func parseProfile(profile Profile, in string) (string, []string, error) {
parts, err := shlex.Split(profile.exec)
if err != nil {
return "", nil, fmt.Errorf("split command: %w", err)
}
if len(parts) == 0 {
return "", nil, ErrNoProfileParts
}
name, err := exec.LookPath(parts[0])
if err != nil {
return "", nil, fmt.Errorf("find name: %w", err)
}
var args []string
for _, p := range parts[1:] {
switch p {
case "<file>":
args = append(args, in)
case "<seek>":
args = append(args, fmt.Sprintf("%dus", profile.Seek().Microseconds()))
case "<bitrate>":
args = append(args, fmt.Sprintf("%dk", profile.BitRate()))
default:
args = append(args, p)
}
}
return name, args, nil
}
// GuessExpectedSize guesses how big the transcoded file will be in bytes.
// Handy if we want to send a Content-Length header to the client before
// the transcode has finished. This way, clients like DSub can render their
// scrub bar and duration as the track is streaming.
//
// The estimate should overshoot a bit (2s in this case) otherwise some HTTP
// clients will shit their trousers given some unexpected bytes.
func GuessExpectedSize(profile Profile, length time.Duration) int {
if length == 0 {
return 0
}
bytesPerSec := int(profile.BitRate() * 1000 / 8)
var guess int
guess += bytesPerSec * int(length.Seconds()-profile.seek.Seconds())
guess += bytesPerSec * 2 // 2s pading
guess += 10000 // 10kb byte padding
return guess
}

View File

@@ -0,0 +1,47 @@
//go:build go1.18
// +build go1.18
package transcode_test
import (
"context"
"io"
"testing"
"time"
"github.com/matryer/is"
"go.senan.xyz/gonic/server/transcode"
)
// FuzzGuessExpectedSize makes sure all of our profile's estimated transcode
// file sizes are slightly bigger than the real thing.
func FuzzGuessExpectedSize(f *testing.F) {
var profiles []transcode.Profile
for _, v := range transcode.UserProfiles {
profiles = append(profiles, v)
}
type track struct {
path string
length time.Duration
}
var tracks []track
tracks = append(tracks, track{"testdata/5s.mp3", 5 * time.Second})
tracks = append(tracks, track{"testdata/10s.mp3", 10 * time.Second})
tr := transcode.NewFFmpegTranscoder()
f.Fuzz(func(t *testing.T, pseed uint8, tseed uint8) {
is := is.New(t)
profile := profiles[int(pseed)%len(profiles)]
track := tracks[int(tseed)%len(tracks)]
sizeGuess := transcode.GuessExpectedSize(profile, track.length)
reader, err := tr.Transcode(context.Background(), profile, track.path)
is.NoErr(err)
actual, err := io.ReadAll(reader)
is.NoErr(err)
is.True(sizeGuess > len(actual))
})
}

View File

@@ -0,0 +1,65 @@
package transcode
import (
"context"
"crypto/md5"
"fmt"
"io"
"os"
"path/filepath"
"go.senan.xyz/gonic/iout"
)
const perm = 0644
type CachingTranscoder struct {
cachePath string
transcoder Transcoder
}
var _ Transcoder = (*CachingTranscoder)(nil)
func NewCachingTranscoder(t Transcoder, cachePath string) *CachingTranscoder {
return &CachingTranscoder{transcoder: t, cachePath: cachePath}
}
func (t *CachingTranscoder) Transcode(ctx context.Context, profile Profile, in string) (io.ReadCloser, error) {
if err := os.MkdirAll(t.cachePath, perm^0111); err != nil {
return nil, fmt.Errorf("make cache path: %w", err)
}
name, args, err := parseProfile(profile, in)
if err != nil {
return nil, fmt.Errorf("split command: %w", err)
}
key := cacheKey(name, args)
path := filepath.Join(t.cachePath, key)
cf, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
return nil, fmt.Errorf("open cache file: %w", err)
}
if i, err := cf.Stat(); err == nil && i.Size() > 0 {
return cf, nil
}
out, err := t.transcoder.Transcode(ctx, profile, in)
if err != nil {
return nil, fmt.Errorf("internal transcode: %w", err)
}
return iout.NewTeeCloser(out, cf), nil
}
func cacheKey(cmd string, args []string) string {
// the cache is invalid whenever transcode command (which includes the
// absolute filepath, bit rate args, replay gain args, etc.) changes
sum := md5.New()
_, _ = io.WriteString(sum, cmd)
for _, arg := range args {
_, _ = io.WriteString(sum, arg)
}
return fmt.Sprintf("%x", sum.Sum(nil))
}

View File

@@ -0,0 +1,39 @@
package transcode
import (
"context"
"fmt"
"io"
"os/exec"
)
type FFmpegTranscoder struct{}
var _ Transcoder = (*FFmpegTranscoder)(nil)
func NewFFmpegTranscoder() *FFmpegTranscoder {
return &FFmpegTranscoder{}
}
var ErrFFmpegExit = fmt.Errorf("ffmpeg exited with non 0 status code")
func (*FFmpegTranscoder) Transcode(ctx context.Context, profile Profile, in string) (io.ReadCloser, error) {
name, args, err := parseProfile(profile, in)
if err != nil {
return nil, fmt.Errorf("split command: %w", err)
}
preader, pwriter := io.Pipe()
cmd := exec.CommandContext(ctx, name, args...)
cmd.Stdout = pwriter
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("starting cmd: %w", err)
}
go func() {
_ = pwriter.CloseWithError(cmd.Wait())
}()
return preader, nil
}

View File

@@ -0,0 +1,19 @@
package transcode
import (
"context"
"io"
"os"
)
type NoneTranscoder struct{}
var _ Transcoder = (*NoneTranscoder)(nil)
func NewNoneTranscoder() *NoneTranscoder {
return &NoneTranscoder{}
}
func (*NoneTranscoder) Transcode(ctx context.Context, _ Profile, in string) (io.ReadCloser, error) {
return os.Open(in)
}