Files
gonic/server/scanner/scanner.go
2021-10-03 21:00:37 +01:00

559 lines
13 KiB
Go

package scanner
import (
"errors"
"fmt"
"log"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/jinzhu/gorm"
"github.com/karrick/godirwalk"
"github.com/rainycape/unidecode"
"go.senan.xyz/gonic/server/db"
"go.senan.xyz/gonic/server/mime"
"go.senan.xyz/gonic/server/scanner/stack"
"go.senan.xyz/gonic/server/scanner/tags"
)
var (
ErrAlreadyScanning = errors.New("already scanning")
ErrStatingItem = errors.New("could not stat item")
ErrReadingTags = errors.New("could not read tags")
)
func durSince(t time.Time) time.Duration {
return time.Since(t).Truncate(10 * time.Microsecond)
}
// decoded converts a string to it's latin equivalent.
// it will be used by the model's *UDec fields, and is only set if it
// differs from the original. the fields are used for searching.
func decoded(in string) string {
if u := unidecode.Unidecode(in); u != in {
return u
}
return ""
}
// isScanning acts as an atomic boolean semaphore. we don't
// want to have more than one scan going on at a time
var isScanning int32 //nolint:gochecknoglobals
func IsScanning() bool {
return atomic.LoadInt32(&isScanning) == 1
}
func SetScanning() func() {
atomic.StoreInt32(&isScanning, 1)
return func() {
atomic.StoreInt32(&isScanning, 0)
}
}
type Scanner struct {
db *db.DB
musicPath string
isFull bool
genreSplit string
// these two are for the transaction we do for every album.
// the boolean is there so we dont begin or commit multiple
// times in the handle album or post children callback
trTx *db.DB
trTxOpen bool
// these two are for keeping state between noted in the tree.
// eg. keep track of a parents album or the path to a cover
// we just saw that we need to commit in the post children
// callback
curAlbums *stack.Stack
curCover string
// then the rest are for stats and cleanup at the very end
seenTracks map[int]struct{} // set of p keys
seenAlbums map[int]struct{} // set of p keys
seenTracksNew int // n tracks not seen before
}
func New(musicPath string, db *db.DB, genreSplit string) *Scanner {
return &Scanner{
db: db,
musicPath: musicPath,
genreSplit: genreSplit,
}
}
// ## begin clean funcs
// ## begin clean funcs
// ## begin clean funcs
func (s *Scanner) cleanTracks() error {
start := time.Now()
var previous []int
var missing []int64
err := s.db.
Model(&db.Track{}).
Pluck("id", &previous).
Error
if err != nil {
return fmt.Errorf("plucking ids: %w", err)
}
for _, prev := range previous {
if _, ok := s.seenTracks[prev]; !ok {
missing = append(missing, int64(prev))
}
}
err = s.db.TransactionChunked(missing, func(tx *gorm.DB, chunk []int64) error {
return tx.Where(chunk).Delete(&db.Track{}).Error
})
if err != nil {
return err
}
log.Printf("finished clean tracks in %s, %d removed", durSince(start), len(missing))
return nil
}
func (s *Scanner) cleanAlbums() error {
start := time.Now()
var previous []int
var missing []int64
err := s.db.
Model(&db.Album{}).
Pluck("id", &previous).
Error
if err != nil {
return fmt.Errorf("plucking ids: %w", err)
}
for _, prev := range previous {
if _, ok := s.seenAlbums[prev]; !ok {
missing = append(missing, int64(prev))
}
}
err = s.db.TransactionChunked(missing, func(tx *gorm.DB, chunk []int64) error {
return tx.Where(chunk).Delete(&db.Album{}).Error
})
if err != nil {
return err
}
log.Printf("finished clean albums in %s, %d removed", durSince(start), len(missing))
return nil
}
func (s *Scanner) cleanArtists() error {
start := time.Now()
sub := s.db.
Select("artists.id").
Model(&db.Artist{}).
Joins("LEFT JOIN albums ON albums.tag_artist_id=artists.id").
Where("albums.id IS NULL").
SubQuery()
q := s.db.
Where("artists.id IN ?", sub).
Delete(&db.Artist{})
if err := q.Error; err != nil {
return err
}
log.Printf("finished clean artists in %s, %d removed", durSince(start), q.RowsAffected)
return nil
}
func (s *Scanner) cleanGenres() error {
start := time.Now()
subTrack := s.db.
Select("genres.id").
Model(&db.Genre{}).
Joins("LEFT JOIN track_genres ON track_genres.genre_id=genres.id").
Where("track_genres.genre_id IS NULL").
SubQuery()
subAlbum := s.db.
Select("genres.id").
Model(&db.Genre{}).
Joins("LEFT JOIN album_genres ON album_genres.genre_id=genres.id").
Where("album_genres.genre_id IS NULL").
SubQuery()
q := s.db.
Where("genres.id IN ?", subTrack).
Or("genres.id IN ?", subAlbum).
Delete(&db.Genre{})
log.Printf("finished clean genres in %s, %d removed", durSince(start), q.RowsAffected)
return nil
}
// ## begin entries
// ## begin entries
// ## begin entries
type ScanOptions struct {
IsFull bool
// TODO https://github.com/sentriz/gonic/issues/64
Path string
}
func (s *Scanner) Start(opts ScanOptions) error {
if IsScanning() {
return ErrAlreadyScanning
}
unSet := SetScanning()
defer unSet()
// reset state vars for the new scan
s.isFull = opts.IsFull
s.seenTracks = map[int]struct{}{}
s.seenAlbums = map[int]struct{}{}
s.curAlbums = &stack.Stack{}
s.seenTracksNew = 0
// begin walking
log.Println("starting scan")
var errCount int
start := time.Now()
err := godirwalk.Walk(s.musicPath, &godirwalk.Options{
Callback: s.callbackItem,
PostChildrenCallback: s.callbackPost,
Unsorted: true,
FollowSymbolicLinks: true,
ErrorCallback: func(path string, err error) godirwalk.ErrorAction {
log.Printf("error processing `%s`: %v", path, err)
errCount++
return godirwalk.SkipNode
},
})
if err != nil {
return fmt.Errorf("walking filesystem: %w", err)
}
log.Printf("finished scan in %s, +%d/%d tracks (%d err)\n",
durSince(start),
s.seenTracksNew,
len(s.seenTracks),
errCount,
)
if err := s.cleanTracks(); err != nil {
return fmt.Errorf("clean tracks: %w", err)
}
if err := s.cleanAlbums(); err != nil {
return fmt.Errorf("clean albums: %w", err)
}
if err := s.cleanArtists(); err != nil {
return fmt.Errorf("clean artists: %w", err)
}
if err := s.cleanGenres(); err != nil {
return fmt.Errorf("clean genres: %w", err)
}
// finish up
strNow := strconv.FormatInt(time.Now().Unix(), 10)
s.db.SetSetting("last_scan_time", strNow)
return nil
}
// items are passed to the handle*() functions
type item struct {
fullPath string
relPath string
directory string
filename string
stat os.FileInfo
}
func isCover(filename string) bool {
filename = strings.ToLower(filename)
known := map[string]struct{}{
"cover.png": {},
"cover.jpg": {},
"cover.jpeg": {},
"folder.png": {},
"folder.jpg": {},
"folder.jpeg": {},
"album.png": {},
"album.jpg": {},
"album.jpeg": {},
"albumart.png": {},
"albumart.jpg": {},
"albumart.jpeg": {},
"front.png": {},
"front.jpg": {},
"front.jpeg": {},
}
_, ok := known[filename]
return ok
}
// ## begin callbacks
// ## begin callbacks
// ## begin callbacks
func (s *Scanner) callbackItem(fullPath string, info *godirwalk.Dirent) error {
stat, err := os.Stat(fullPath)
if err != nil {
return fmt.Errorf("%w: %v", ErrStatingItem, err)
}
relPath, err := filepath.Rel(s.musicPath, fullPath)
if err != nil {
return fmt.Errorf("getting relative path: %w", err)
}
directory, filename := path.Split(relPath)
it := &item{
fullPath: fullPath,
relPath: relPath,
directory: directory,
filename: filename,
stat: stat,
}
isDir, err := info.IsDirOrSymlinkToDir()
if err != nil {
return fmt.Errorf("stating link to dir: %w", err)
}
if isDir {
return s.handleAlbum(it)
}
if isCover(filename) {
s.curCover = filename
return nil
}
ext := path.Ext(filename)
if ext == "" {
return nil
}
if _, ok := mime.FromExtension(ext[1:]); ok {
return s.handleTrack(it)
}
return nil
}
func (s *Scanner) callbackPost(fullPath string, info *godirwalk.Dirent) error {
defer func() {
s.curCover = ""
}()
if s.trTxOpen {
s.trTx.Commit()
s.trTxOpen = false
}
// begin taking the current album off the stack and add it's
// parent, cover that we found, etc.
album := s.curAlbums.Pop()
if album.Cover == s.curCover && album.ParentID != 0 {
return nil
}
album.ParentID = s.curAlbums.PeekID()
album.Cover = s.curCover
if err := s.db.Save(album).Error; err != nil {
return fmt.Errorf("writing albums table: %w", err)
}
// we only log changed albums
log.Printf("processed folder `%s`\n",
path.Join(album.LeftPath, album.RightPath))
return nil
}
// ## begin handlers
// ## begin handlers
// ## begin handlers
func (s *Scanner) itemUnchanged(statModTime, updatedInDB time.Time) bool {
if s.isFull {
return false
}
return statModTime.Before(updatedInDB)
}
func (s *Scanner) handleAlbum(it *item) error {
if s.trTxOpen {
// a transaction still being open when we handle an album can
// happen if there is a album that contains /both/ tracks and
// sub albums
s.trTx.Commit()
s.trTxOpen = false
}
album := &db.Album{}
defer func() {
// album's id will come from early return
// or save at the end
s.seenAlbums[album.ID] = struct{}{}
s.curAlbums.Push(album)
}()
err := s.db.
Where(db.Album{
LeftPath: it.directory,
RightPath: it.filename,
}).
First(album).
Error
if !gorm.IsRecordNotFoundError(err) &&
s.itemUnchanged(it.stat.ModTime(), album.UpdatedAt) {
// we found the record but it hasn't changed
return nil
}
album.LeftPath = it.directory
album.RightPath = it.filename
album.RightPathUDec = decoded(it.filename)
album.ModifiedAt = it.stat.ModTime()
if err := s.db.Save(album).Error; err != nil {
return fmt.Errorf("writing albums table: %w", err)
}
return nil
}
func (s *Scanner) handleTrack(it *item) error {
if !s.trTxOpen {
s.trTx = s.db.Begin()
s.trTxOpen = true
}
// init empty track and mark its ID (from lookup or save)
// for later cleanup later
var track db.Track
defer func() {
s.seenTracks[track.ID] = struct{}{}
}()
album := s.curAlbums.Peek()
err := s.trTx.
Select("id, updated_at").
Where(db.Track{
AlbumID: album.ID,
Filename: it.filename,
}).
First(&track).
Error
if !gorm.IsRecordNotFoundError(err) &&
s.itemUnchanged(it.stat.ModTime(), track.UpdatedAt) {
// we found the record but it hasn't changed
return nil
}
trags, err := tags.New(it.fullPath)
if err != nil {
return ErrReadingTags
}
genreIDs, err := s.populateGenres(&track, trags)
if err != nil {
return fmt.Errorf("populate genres: %w", err)
}
// create album and album artist records for first track in album
if album.TagTitle == "" {
albumArtist, err := s.populateAlbumArtist(trags)
if err != nil {
return fmt.Errorf("populate artist: %w", err)
}
albumName := trags.SomeAlbum()
album.TagTitle = albumName
album.TagTitleUDec = decoded(albumName)
album.TagBrainzID = trags.AlbumBrainzID()
album.TagYear = trags.Year()
album.TagArtistID = albumArtist.ID
if err := s.populateAlbumGenres(album, genreIDs); err != nil {
return fmt.Errorf("populate album genres: %w", err)
}
}
track.Filename = it.filename
track.FilenameUDec = decoded(it.filename)
track.Size = int(it.stat.Size())
track.AlbumID = album.ID
track.ArtistID = album.TagArtistID
track.TagTitle = trags.Title()
track.TagTitleUDec = decoded(trags.Title())
track.TagTrackArtist = trags.Artist()
track.TagTrackNumber = trags.TrackNumber()
track.TagDiscNumber = trags.DiscNumber()
track.TagBrainzID = trags.BrainzID()
track.Length = trags.Length() // these two should be calculated
track.Bitrate = trags.Bitrate() // ...from the file instead of tags
if err := s.trTx.Save(&track).Error; err != nil {
return fmt.Errorf("writing track table: %w", err)
}
s.seenTracksNew++
if err := s.populateTrackGenres(&track, genreIDs); err != nil {
return fmt.Errorf("populating track genres : %w", err)
}
return nil
}
func (s *Scanner) populateAlbumArtist(trags *tags.Tags) (*db.Artist, error) {
var artist db.Artist
artistName := trags.SomeAlbumArtist()
err := s.trTx.
Where("name=?", artistName).
Assign(db.Artist{
Name: artistName,
NameUDec: decoded(artistName),
}).
FirstOrCreate(&artist).
Error
if err != nil {
return nil, fmt.Errorf("find or create artist: %w", err)
}
return &artist, nil
}
func (s *Scanner) populateGenres(track *db.Track, trags *tags.Tags) ([]int, error) {
var genreIDs []int
genreNames := strings.Split(trags.SomeGenre(), s.genreSplit)
for _, genreName := range genreNames {
genre := &db.Genre{}
q := s.trTx.FirstOrCreate(genre, db.Genre{
Name: genreName,
})
if err := q.Error; err != nil {
return nil, err
}
genreIDs = append(genreIDs, genre.ID)
}
return genreIDs, nil
}
func (s *Scanner) populateTrackGenres(track *db.Track, genreIDs []int) error {
err := s.trTx.
Where("track_id=?", track.ID).
Delete(db.TrackGenre{}).
Error
if err != nil {
return fmt.Errorf("delete old track genre records: %w", err)
}
err = s.trTx.InsertBulkLeftMany(
"track_genres",
[]string{"track_id", "genre_id"},
track.ID,
genreIDs,
)
if err != nil {
return fmt.Errorf("insert bulk track genres: %w", err)
}
return nil
}
func (s *Scanner) populateAlbumGenres(album *db.Album, genreIDs []int) error {
err := s.trTx.
Where("album_id=?", album.ID).
Delete(db.AlbumGenre{}).
Error
if err != nil {
return fmt.Errorf("delete old album genre records: %w", err)
}
err = s.trTx.InsertBulkLeftMany(
"album_genres",
[]string{"album_id", "genre_id"},
album.ID,
genreIDs,
)
if err != nil {
return fmt.Errorf("insert bulk album genres: %w", err)
}
return nil
}