scanner: refactor cleaning

This commit is contained in:
sentriz
2020-05-01 02:03:46 +01:00
parent 5686517737
commit 08e775c9dd
3 changed files with 116 additions and 83 deletions

View File

@@ -102,8 +102,22 @@ func (db *DB) GetUserFromName(name string) *User {
return user
}
func (db *DB) WithTx(cb func(tx *gorm.DB)) {
func (db *DB) WithTx(cb func(*gorm.DB)) {
tx := db.Begin()
defer tx.Commit()
cb(tx)
}
func (db *DB) WithTxChunked(data []int64, cb func(*gorm.DB, []int64)) {
// https://sqlite.org/limits.html
const size = 999
tx := db.Begin()
defer tx.Commit()
for i := 0; i < len(data); i += size {
end := i + size
if end > len(data) {
end = len(data)
}
cb(tx, data[i:end])
}
}

View File

@@ -21,6 +21,20 @@ import (
"go.senan.xyz/gonic/server/scanner/tags"
)
func durSince(t time.Time) time.Duration {
return time.Since(t).Truncate(10 * time.Microsecond)
}
// decoded converts a string to it's latin equivalent. it will
// be used by the model's *UDec fields, and is only set if it
// differs from the original. the fields are used for searching
func decoded(in string) string {
if u := unidecode.Unidecode(in); u != in {
return u
}
return ""
}
// isScanning acts as an atomic boolean semaphore. we don't
// want to have more than one scan going on at a time
var isScanning int32
@@ -62,12 +76,59 @@ func New(musicPath string, db *db.DB) *Scanner {
return &Scanner{
db: db,
musicPath: musicPath,
seenTracks: make(map[int]struct{}),
seenFolders: make(map[int]struct{}),
seenTracks: map[int]struct{}{},
seenFolders: map[int]struct{}{},
curFolders: &stack.Stack{},
}
}
// ## begin clean funcs
// ## begin clean funcs
// ## begin clean funcs
func (s *Scanner) cleanTracks() (int, error) {
var previous []int
var missing []int64
s.db.Model(&db.Track{}).Pluck("id", &previous)
for _, prev := range previous {
if _, ok := s.seenTracks[prev]; !ok {
missing = append(missing, int64(prev))
}
}
s.db.WithTxChunked(missing, func(tx *gorm.DB, chunk []int64) {
tx.Where(chunk).Delete(&db.Track{})
})
return len(missing), nil
}
func (s *Scanner) cleanFolders() (int, error) {
var previous []int
var missing []int64
s.db.Model(&db.Album{}).Pluck("id", &previous)
for _, prev := range previous {
if _, ok := s.seenFolders[prev]; !ok {
missing = append(missing, int64(prev))
}
}
s.db.WithTxChunked(missing, func(tx *gorm.DB, chunk []int64) {
tx.Where(chunk).Delete(&db.Album{})
})
return len(missing), nil
}
func (s *Scanner) cleanArtists() (int, error) {
q := s.db.Exec(`
DELETE FROM artists
WHERE NOT EXISTS ( SELECT 1 FROM albums
WHERE albums.tag_artist_id=artists.id )
`)
return int(q.RowsAffected), q.Error
}
// ## begin entries
// ## begin entries
// ## begin entries
func (s *Scanner) Start(isFull bool) error {
if IsScanning() {
return errors.New("already scanning")
@@ -76,8 +137,8 @@ func (s *Scanner) Start(isFull bool) error {
defer unSet()
// reset tracking variables when finished
defer func() {
s.seenTracks = make(map[int]struct{})
s.seenFolders = make(map[int]struct{})
s.seenTracks = map[int]struct{}{}
s.seenFolders = map[int]struct{}{}
s.curFolders = &stack.Stack{}
s.seenTracksNew = 0
s.seenTracksErr = 0
@@ -95,56 +156,29 @@ func (s *Scanner) Start(isFull bool) error {
return errors.Wrap(err, "walking filesystem")
}
log.Printf("finished scan in %s, +%d/%d tracks (%d err)\n",
time.Since(start),
durSince(start),
s.seenTracksNew,
len(s.seenTracks),
s.seenTracksErr,
)
// ** begin cleaning
start = time.Now()
var deleted uint
// delete tracks not on filesystem
s.db.WithTx(func(tx *gorm.DB) {
var tracks []*db.Track
tx.Select("id").Find(&tracks)
for _, track := range tracks {
if _, ok := s.seenTracks[track.ID]; !ok {
tx.Delete(track)
deleted++
}
}
})
// delete folders not on filesystem
s.db.WithTx(func(tx *gorm.DB) {
var folders []*db.Album
tx.Select("id").Find(&folders)
for _, folder := range folders {
if _, ok := s.seenFolders[folder.ID]; !ok {
tx.Delete(folder)
}
}
})
// delete albums without tracks
s.db.Exec(`
DELETE FROM albums
WHERE tag_artist_id NOT NULL
AND NOT EXISTS ( SELECT 1 FROM tracks
WHERE tracks.album_id=albums.id
)`)
// delete artists without albums
s.db.Exec(`
DELETE FROM artists
WHERE NOT EXISTS ( SELECT 1 from albums
WHERE albums.tag_artist_id=artists.id
)`)
cleanFuncs := []struct {
name string
f func() (int, error)
}{
{name: "tracks", f: s.cleanTracks},
{name: "folders", f: s.cleanFolders},
{name: "artists", f: s.cleanArtists},
}
for _, clean := range cleanFuncs {
start = time.Now()
deleted, _ := clean.f()
log.Printf("finished clean %s in %s, %d removed",
clean.name, durSince(start), deleted)
}
// finish up
strNow := strconv.FormatInt(time.Now().Unix(), 10)
s.db.SetSetting("last_scan_time", strNow)
//
log.Printf("finished clean in %s, -%d tracks\n",
time.Since(start),
deleted,
)
return nil
}
@@ -246,17 +280,6 @@ func (s *Scanner) callbackPost(fullPath string, info *godirwalk.Dirent) error {
return nil
}
// decoded converts a string to it's latin equivalent. it will
// be used by the model's *UDec fields, and is only set if it
// differs from the original. the fields are used for searching
func decoded(in string) string {
result := unidecode.Unidecode(in)
if result == in {
return ""
}
return result
}
// ## begin handlers
// ## begin handlers
// ## begin handlers
@@ -284,8 +307,8 @@ func (s *Scanner) handleFolder(it *item) error {
}).
First(folder).
Error
if !s.isFull && (!gorm.IsRecordNotFoundError(err) &&
it.stat.ModTime().Before(folder.UpdatedAt)) {
if !gorm.IsRecordNotFoundError(err) &&
it.stat.ModTime().Before(folder.UpdatedAt) {
// we found the record but it hasn't changed
return nil
}
@@ -305,6 +328,11 @@ func (s *Scanner) handleTrack(it *item) error {
}
// ** begin set track basics
track := &db.Track{}
defer func() {
// folder's id will come from early return
// or save at the end
s.seenTracks[track.ID] = struct{}{}
}()
err := s.trTx.
Select("id, updated_at").
Where(db.Track{
@@ -313,10 +341,9 @@ func (s *Scanner) handleTrack(it *item) error {
}).
First(track).
Error
if !s.isFull && !gorm.IsRecordNotFoundError(err) &&
if !gorm.IsRecordNotFoundError(err) &&
it.stat.ModTime().Before(track.UpdatedAt) {
// we found the record but it hasn't changed
s.seenTracks[track.ID] = struct{}{}
return nil
}
track.Filename = it.filename
@@ -382,7 +409,6 @@ func (s *Scanner) handleTrack(it *item) error {
track.TagGenreID = genre.ID
// ** begin save the track
s.trTx.Save(track)
s.seenTracks[track.ID] = struct{}{}
s.seenTracksNew++
// ** begin set album if this is the first track in the folder
folder := s.curFolders.Peek()

View File

@@ -5,9 +5,20 @@ import (
"strings"
"github.com/nicksellen/audiotags"
"github.com/pkg/errors"
)
func intSep(in, sep string) int {
if in == "" {
return 0
}
start := strings.SplitN(in, sep, 2)[0]
out, err := strconv.Atoi(start)
if err != nil {
return 0
}
return out
}
type Tags struct {
raw map[string]string
props *audiotags.AudioProperties
@@ -15,13 +26,7 @@ type Tags struct {
func New(path string) (*Tags, error) {
raw, props, err := audiotags.Read(path)
if err != nil {
return nil, errors.Wrap(err, "audiotags module")
}
return &Tags{
raw: raw,
props: props,
}, nil
return &Tags{raw, props}, err
}
func (t *Tags) firstTag(keys ...string) string {
@@ -45,15 +50,3 @@ func (t *Tags) TrackNumber() int { return intSep(t.firstTag("tracknumber"),
func (t *Tags) DiscNumber() int { return intSep(t.firstTag("discnumber"), "/") } // eg. 1/2
func (t *Tags) Length() int { return t.props.Length }
func (t *Tags) Bitrate() int { return t.props.Bitrate }
func intSep(in, sep string) int {
if in == "" {
return 0
}
start := strings.SplitN(in, sep, 2)[0]
out, err := strconv.Atoi(start)
if err != nil {
return 0
}
return out
}