Files
gonic/server/scanner/scanner.go
2021-11-10 00:22:25 +00:00

498 lines
13 KiB
Go

package scanner
import (
"errors"
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/jinzhu/gorm"
"github.com/karrick/godirwalk"
"github.com/rainycape/unidecode"
"go.senan.xyz/gonic/multierr"
"go.senan.xyz/gonic/server/db"
"go.senan.xyz/gonic/server/mime"
"go.senan.xyz/gonic/server/scanner/tags"
)
var (
ErrAlreadyScanning = errors.New("already scanning")
ErrStatingItem = errors.New("could not stat item")
ErrReadingTags = errors.New("could not read tags")
)
type Scanner struct {
db *db.DB
musicPaths []string
sorted bool
genreSplit string
tagger tags.Reader
scanning *int32
}
func New(musicPaths []string, sorted bool, db *db.DB, genreSplit string, tagger tags.Reader) *Scanner {
return &Scanner{
db: db,
musicPaths: musicPaths,
sorted: sorted,
genreSplit: genreSplit,
tagger: tagger,
scanning: new(int32),
}
}
func (s *Scanner) IsScanning() bool {
return atomic.LoadInt32(s.scanning) == 1
}
type ScanOptions struct {
IsFull bool
}
func (s *Scanner) ScanAndClean(opts ScanOptions) error {
c := &collected{
seenTracks: map[int]struct{}{},
seenAlbums: map[int]struct{}{},
}
if err := s.scan(c, opts.IsFull); err != nil {
return err
}
if err := s.clean(c); err != nil {
return err
}
return nil
}
func (s *Scanner) scan(c *collected, isFull bool) error {
if s.IsScanning() {
return ErrAlreadyScanning
}
atomic.StoreInt32(s.scanning, 1)
defer atomic.StoreInt32(s.scanning, 0)
start := time.Now()
itemErrs := multierr.Err{}
log.Println("starting scan")
defer func() {
log.Printf("finished scan in %s, +%d/%d tracks (%d err)\n",
durSince(start), c.seenTracksNew, len(c.seenTracks), itemErrs.Len())
}()
for _, musicPath := range s.musicPaths {
err := godirwalk.Walk(musicPath, &godirwalk.Options{
Callback: func(_ string, _ *godirwalk.Dirent) error {
return nil
},
PostChildrenCallback: func(itemPath string, _ *godirwalk.Dirent) error {
log.Printf("processing folder `%s`", itemPath)
return s.callback(c, isFull, musicPath, itemPath)
},
Unsorted: !s.sorted,
FollowSymbolicLinks: true,
ErrorCallback: func(path string, err error) godirwalk.ErrorAction {
itemErrs.Add(fmt.Errorf("%q: %w", path, err))
return godirwalk.SkipNode
},
})
if err != nil {
return fmt.Errorf("walking filesystem: %w", err)
}
}
if err := s.db.SetSetting("last_scan_time", strconv.FormatInt(time.Now().Unix(), 10)); err != nil {
return fmt.Errorf("set scan time: %w", err)
}
if itemErrs.Len() > 0 {
return itemErrs
}
return nil
}
func (s *Scanner) clean(c *collected) error {
if err := s.cleanTracks(c.seenTracks); err != nil {
return fmt.Errorf("clean tracks: %w", err)
}
if err := s.cleanAlbums(c.seenAlbums); err != nil {
return fmt.Errorf("clean albums: %w", err)
}
if err := s.cleanArtists(); err != nil {
return fmt.Errorf("clean artists: %w", err)
}
if err := s.cleanGenres(); err != nil {
return fmt.Errorf("clean genres: %w", err)
}
return nil
}
func (s *Scanner) callback(c *collected, isFull bool, rootAbsPath string, itemAbsPath string) error {
if rootAbsPath == itemAbsPath {
return nil
}
relpath, _ := filepath.Rel(rootAbsPath, itemAbsPath)
gs, err := godirwalk.NewScanner(itemAbsPath)
if err != nil {
return err
}
var tracks []string
var cover string
for gs.Scan() {
if isCover(gs.Name()) {
cover = gs.Name()
continue
}
if _, ok := mime.FromExtension(ext(gs.Name())); ok {
tracks = append(tracks, gs.Name())
continue
}
}
tx := s.db.Begin()
defer tx.Commit()
pdir, pbasename := filepath.Split(filepath.Dir(relpath))
parent := &db.Album{}
if err := tx.Where(db.Album{RootDir: rootAbsPath, LeftPath: pdir, RightPath: pbasename}).FirstOrCreate(parent).Error; err != nil {
return fmt.Errorf("first or create parent: %w", err)
}
c.seenAlbums[parent.ID] = struct{}{}
dir, basename := filepath.Split(relpath)
album := &db.Album{}
if err := tx.Where(db.Album{RootDir: rootAbsPath, LeftPath: dir, RightPath: basename}).First(album).Error; err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
return fmt.Errorf("find album: %w", err)
}
if err := populateAlbumBasics(tx, rootAbsPath, parent, album, dir, basename, cover); err != nil {
return fmt.Errorf("populate album basics: %w", err)
}
c.seenAlbums[album.ID] = struct{}{}
sort.Strings(tracks)
for i, basename := range tracks {
abspath := filepath.Join(itemAbsPath, basename)
if err := s.populateTrackAndAlbumArtists(tx, c, i, album, basename, abspath, isFull); err != nil {
return fmt.Errorf("process %q: %w", "", err)
}
}
return nil
}
func (s *Scanner) populateTrackAndAlbumArtists(tx *db.DB, c *collected, i int, album *db.Album, basename string, abspath string, isFull bool) error {
track := &db.Track{AlbumID: album.ID, Filename: filepath.Base(basename)}
if err := tx.Where(track).First(track).Error; err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
return fmt.Errorf("query track: %w", err)
}
c.seenTracks[track.ID] = struct{}{}
stat, err := os.Stat(abspath)
if err != nil {
return fmt.Errorf("stating %q: %w", basename, err)
}
if !isFull && stat.ModTime().Before(track.UpdatedAt) {
return nil
}
trags, err := s.tagger.Read(abspath)
if err != nil {
return fmt.Errorf("%v: %w", err, ErrReadingTags)
}
artistName := trags.SomeAlbumArtist()
albumArtist, err := s.populateAlbumArtist(tx, artistName)
if err != nil {
return fmt.Errorf("populate artist: %w", err)
}
if err := populateTrack(tx, album, albumArtist, track, trags, basename, int(stat.Size())); err != nil {
return fmt.Errorf("process %q: %w", basename, err)
}
c.seenTracks[track.ID] = struct{}{}
c.seenTracksNew++
genreNames := strings.Split(trags.SomeGenre(), s.genreSplit)
genreIDs, err := s.populateGenres(tx, track, genreNames)
if err != nil {
return fmt.Errorf("populate genres: %w", err)
}
if err := s.populateTrackGenres(tx, track, genreIDs); err != nil {
return fmt.Errorf("propulate track genres: %w", err)
}
// metadata for the album table comes only from the the first track's tags
if i > 0 {
return nil
}
if err := populateAlbum(tx, album, albumArtist, trags, stat.ModTime()); err != nil {
return fmt.Errorf("propulate album: %w", err)
}
if err := populateAlbumGenres(tx, album, genreIDs); err != nil {
return fmt.Errorf("populate album genres: %w", err)
}
return nil
}
func populateAlbum(tx *db.DB, album *db.Album, albumArtist *db.Artist, trags tags.Parser, modTime time.Time) error {
albumName := trags.SomeAlbum()
album.TagTitle = albumName
album.TagTitleUDec = decoded(albumName)
album.TagBrainzID = trags.AlbumBrainzID()
album.TagYear = trags.Year()
album.TagArtistID = albumArtist.ID
album.ModifiedAt = modTime
if err := tx.Save(&album).Error; err != nil {
return fmt.Errorf("saving album: %w", err)
}
return nil
}
func populateAlbumBasics(tx *db.DB, rootAbsPath string, parent, album *db.Album, dir, basename string, cover string) error {
album.RootDir = rootAbsPath
album.LeftPath = dir
album.RightPath = basename
album.Cover = cover
album.RightPathUDec = decoded(basename)
album.ParentID = parent.ID
if err := tx.Save(&album).Error; err != nil {
return fmt.Errorf("saving album: %w", err)
}
return nil
}
func populateTrack(tx *db.DB, album *db.Album, albumArtist *db.Artist, track *db.Track, trags tags.Parser, abspath string, size int) error {
basename := filepath.Base(abspath)
track.Filename = basename
track.FilenameUDec = decoded(basename)
track.Size = size
track.AlbumID = album.ID
track.ArtistID = albumArtist.ID
track.TagTitle = trags.Title()
track.TagTitleUDec = decoded(trags.Title())
track.TagTrackArtist = trags.Artist()
track.TagTrackNumber = trags.TrackNumber()
track.TagDiscNumber = trags.DiscNumber()
track.TagBrainzID = trags.BrainzID()
track.Length = trags.Length() // these two should be calculated
track.Bitrate = trags.Bitrate() // ...from the file instead of tags
if err := tx.Save(&track).Error; err != nil {
return fmt.Errorf("saving track: %w", err)
}
return nil
}
func (s *Scanner) populateAlbumArtist(tx *db.DB, artistName string) (*db.Artist, error) {
var artist db.Artist
update := db.Artist{
Name: artistName,
NameUDec: decoded(artistName),
}
if err := tx.Where("name=?", artistName).Assign(update).FirstOrCreate(&artist).Error; err != nil {
return nil, fmt.Errorf("find or create artist: %w", err)
}
return &artist, nil
}
func (s *Scanner) populateGenres(tx *db.DB, track *db.Track, names []string) ([]int, error) {
var filteredNames []string
for _, name := range names {
if clean := strings.TrimSpace(name); clean != "" {
filteredNames = append(filteredNames, clean)
}
}
if len(filteredNames) == 0 {
return []int{}, nil
}
var ids []int
for _, name := range filteredNames {
var genre db.Genre
if err := tx.FirstOrCreate(&genre, db.Genre{Name: name}).Error; err != nil {
return nil, fmt.Errorf("find or create genre: %w", err)
}
ids = append(ids, genre.ID)
}
return ids, nil
}
func (s *Scanner) populateTrackGenres(tx *db.DB, track *db.Track, genreIDs []int) error {
if err := tx.Where("track_id=?", track.ID).Delete(db.TrackGenre{}).Error; err != nil {
return fmt.Errorf("delete old track genre records: %w", err)
}
if err := tx.InsertBulkLeftMany("track_genres", []string{"track_id", "genre_id"}, track.ID, genreIDs); err != nil {
return fmt.Errorf("insert bulk track genres: %w", err)
}
return nil
}
func populateAlbumGenres(tx *db.DB, album *db.Album, genreIDs []int) error {
if err := tx.Where("album_id=?", album.ID).Delete(db.AlbumGenre{}).Error; err != nil {
return fmt.Errorf("delete old album genre records: %w", err)
}
if err := tx.InsertBulkLeftMany("album_genres", []string{"album_id", "genre_id"}, album.ID, genreIDs); err != nil {
return fmt.Errorf("insert bulk album genres: %w", err)
}
return nil
}
func (s *Scanner) cleanTracks(seenTracks map[int]struct{}) error {
start := time.Now()
var previous []int
var missing []int64
err := s.db.
Model(&db.Track{}).
Pluck("id", &previous).
Error
if err != nil {
return fmt.Errorf("plucking ids: %w", err)
}
for _, prev := range previous {
if _, ok := seenTracks[prev]; !ok {
missing = append(missing, int64(prev))
}
}
err = s.db.TransactionChunked(missing, func(tx *gorm.DB, chunk []int64) error {
return tx.Where(chunk).Delete(&db.Track{}).Error
})
if err != nil {
return err
}
log.Printf("finished clean tracks in %s, %d removed", durSince(start), len(missing))
return nil
}
func (s *Scanner) cleanAlbums(seenAlbums map[int]struct{}) error {
start := time.Now()
var previous []int
var missing []int64
err := s.db.
Model(&db.Album{}).
Pluck("id", &previous).
Error
if err != nil {
return fmt.Errorf("plucking ids: %w", err)
}
for _, prev := range previous {
if _, ok := seenAlbums[prev]; !ok {
missing = append(missing, int64(prev))
}
}
err = s.db.TransactionChunked(missing, func(tx *gorm.DB, chunk []int64) error {
return tx.Where(chunk).Delete(&db.Album{}).Error
})
if err != nil {
return err
}
log.Printf("finished clean albums in %s, %d removed", durSince(start), len(missing))
return nil
}
func (s *Scanner) cleanArtists() error {
start := time.Now()
sub := s.db.
Select("artists.id").
Model(&db.Artist{}).
Joins("LEFT JOIN albums ON albums.tag_artist_id=artists.id").
Where("albums.id IS NULL").
SubQuery()
q := s.db.
Where("artists.id IN ?", sub).
Delete(&db.Artist{})
if err := q.Error; err != nil {
return err
}
log.Printf("finished clean artists in %s, %d removed", durSince(start), q.RowsAffected)
return nil
}
func (s *Scanner) cleanGenres() error {
start := time.Now()
subTrack := s.db.
Select("genres.id").
Model(&db.Genre{}).
Joins("LEFT JOIN track_genres ON track_genres.genre_id=genres.id").
Where("track_genres.genre_id IS NULL").
SubQuery()
subAlbum := s.db.
Select("genres.id").
Model(&db.Genre{}).
Joins("LEFT JOIN album_genres ON album_genres.genre_id=genres.id").
Where("album_genres.genre_id IS NULL").
SubQuery()
q := s.db.
Where("genres.id IN ? AND genres.id IN ?", subTrack, subAlbum).
Delete(&db.Genre{})
log.Printf("finished clean genres in %s, %d removed", durSince(start), q.RowsAffected)
return nil
}
func ext(name string) string {
ext := filepath.Ext(name)
if len(ext) == 0 {
return ""
}
return ext[1:]
}
func isCover(name string) bool {
switch path := strings.ToLower(name); path {
case
"cover.png", "cover.jpg", "cover.jpeg",
"folder.png", "folder.jpg", "folder.jpeg",
"album.png", "album.jpg", "album.jpeg",
"albumart.png", "albumart.jpg", "albumart.jpeg",
"front.png", "front.jpg", "front.jpeg":
return true
default:
return false
}
}
// decoded converts a string to it's latin equivalent.
// it will be used by the model's *UDec fields, and is only set if it
// differs from the original. the fields are used for searching.
func decoded(in string) string {
if u := unidecode.Unidecode(in); u != in {
return u
}
return ""
}
func durSince(t time.Time) time.Duration {
return time.Since(t).Truncate(10 * time.Microsecond)
}
type collected struct {
seenTracks map[int]struct{}
seenAlbums map[int]struct{}
seenTracksNew int
}