From 08e775c9dd023e6499ad9310451081f0ccde0719 Mon Sep 17 00:00:00 2001 From: sentriz Date: Fri, 1 May 2020 02:03:46 +0100 Subject: [PATCH] scanner: refactor cleaning --- server/db/db.go | 16 +++- server/scanner/scanner.go | 150 +++++++++++++++++++++--------------- server/scanner/tags/tags.go | 33 ++++---- 3 files changed, 116 insertions(+), 83 deletions(-) diff --git a/server/db/db.go b/server/db/db.go index 79d84a9..6ef3663 100644 --- a/server/db/db.go +++ b/server/db/db.go @@ -102,8 +102,22 @@ func (db *DB) GetUserFromName(name string) *User { return user } -func (db *DB) WithTx(cb func(tx *gorm.DB)) { +func (db *DB) WithTx(cb func(*gorm.DB)) { tx := db.Begin() defer tx.Commit() cb(tx) } + +func (db *DB) WithTxChunked(data []int64, cb func(*gorm.DB, []int64)) { + // https://sqlite.org/limits.html + const size = 999 + tx := db.Begin() + defer tx.Commit() + for i := 0; i < len(data); i += size { + end := i + size + if end > len(data) { + end = len(data) + } + cb(tx, data[i:end]) + } +} diff --git a/server/scanner/scanner.go b/server/scanner/scanner.go index 7f4362d..0019737 100644 --- a/server/scanner/scanner.go +++ b/server/scanner/scanner.go @@ -21,6 +21,20 @@ import ( "go.senan.xyz/gonic/server/scanner/tags" ) +func durSince(t time.Time) time.Duration { + return time.Since(t).Truncate(10 * time.Microsecond) +} + +// decoded converts a string to it's latin equivalent. it will +// be used by the model's *UDec fields, and is only set if it +// differs from the original. the fields are used for searching +func decoded(in string) string { + if u := unidecode.Unidecode(in); u != in { + return u + } + return "" +} + // isScanning acts as an atomic boolean semaphore. we don't // want to have more than one scan going on at a time var isScanning int32 @@ -62,12 +76,59 @@ func New(musicPath string, db *db.DB) *Scanner { return &Scanner{ db: db, musicPath: musicPath, - seenTracks: make(map[int]struct{}), - seenFolders: make(map[int]struct{}), + seenTracks: map[int]struct{}{}, + seenFolders: map[int]struct{}{}, curFolders: &stack.Stack{}, } } +// ## begin clean funcs +// ## begin clean funcs +// ## begin clean funcs + +func (s *Scanner) cleanTracks() (int, error) { + var previous []int + var missing []int64 + s.db.Model(&db.Track{}).Pluck("id", &previous) + for _, prev := range previous { + if _, ok := s.seenTracks[prev]; !ok { + missing = append(missing, int64(prev)) + } + } + s.db.WithTxChunked(missing, func(tx *gorm.DB, chunk []int64) { + tx.Where(chunk).Delete(&db.Track{}) + }) + return len(missing), nil +} + +func (s *Scanner) cleanFolders() (int, error) { + var previous []int + var missing []int64 + s.db.Model(&db.Album{}).Pluck("id", &previous) + for _, prev := range previous { + if _, ok := s.seenFolders[prev]; !ok { + missing = append(missing, int64(prev)) + } + } + s.db.WithTxChunked(missing, func(tx *gorm.DB, chunk []int64) { + tx.Where(chunk).Delete(&db.Album{}) + }) + return len(missing), nil +} + +func (s *Scanner) cleanArtists() (int, error) { + q := s.db.Exec(` + DELETE FROM artists + WHERE NOT EXISTS ( SELECT 1 FROM albums + WHERE albums.tag_artist_id=artists.id ) + `) + return int(q.RowsAffected), q.Error +} + +// ## begin entries +// ## begin entries +// ## begin entries + func (s *Scanner) Start(isFull bool) error { if IsScanning() { return errors.New("already scanning") @@ -76,8 +137,8 @@ func (s *Scanner) Start(isFull bool) error { defer unSet() // reset tracking variables when finished defer func() { - s.seenTracks = make(map[int]struct{}) - s.seenFolders = make(map[int]struct{}) + s.seenTracks = map[int]struct{}{} + s.seenFolders = map[int]struct{}{} s.curFolders = &stack.Stack{} s.seenTracksNew = 0 s.seenTracksErr = 0 @@ -95,56 +156,29 @@ func (s *Scanner) Start(isFull bool) error { return errors.Wrap(err, "walking filesystem") } log.Printf("finished scan in %s, +%d/%d tracks (%d err)\n", - time.Since(start), + durSince(start), s.seenTracksNew, len(s.seenTracks), s.seenTracksErr, ) // ** begin cleaning - start = time.Now() - var deleted uint - // delete tracks not on filesystem - s.db.WithTx(func(tx *gorm.DB) { - var tracks []*db.Track - tx.Select("id").Find(&tracks) - for _, track := range tracks { - if _, ok := s.seenTracks[track.ID]; !ok { - tx.Delete(track) - deleted++ - } - } - }) - // delete folders not on filesystem - s.db.WithTx(func(tx *gorm.DB) { - var folders []*db.Album - tx.Select("id").Find(&folders) - for _, folder := range folders { - if _, ok := s.seenFolders[folder.ID]; !ok { - tx.Delete(folder) - } - } - }) - // delete albums without tracks - s.db.Exec(` - DELETE FROM albums - WHERE tag_artist_id NOT NULL - AND NOT EXISTS ( SELECT 1 FROM tracks - WHERE tracks.album_id=albums.id - )`) - // delete artists without albums - s.db.Exec(` - DELETE FROM artists - WHERE NOT EXISTS ( SELECT 1 from albums - WHERE albums.tag_artist_id=artists.id - )`) + cleanFuncs := []struct { + name string + f func() (int, error) + }{ + {name: "tracks", f: s.cleanTracks}, + {name: "folders", f: s.cleanFolders}, + {name: "artists", f: s.cleanArtists}, + } + for _, clean := range cleanFuncs { + start = time.Now() + deleted, _ := clean.f() + log.Printf("finished clean %s in %s, %d removed", + clean.name, durSince(start), deleted) + } // finish up strNow := strconv.FormatInt(time.Now().Unix(), 10) s.db.SetSetting("last_scan_time", strNow) - // - log.Printf("finished clean in %s, -%d tracks\n", - time.Since(start), - deleted, - ) return nil } @@ -246,17 +280,6 @@ func (s *Scanner) callbackPost(fullPath string, info *godirwalk.Dirent) error { return nil } -// decoded converts a string to it's latin equivalent. it will -// be used by the model's *UDec fields, and is only set if it -// differs from the original. the fields are used for searching -func decoded(in string) string { - result := unidecode.Unidecode(in) - if result == in { - return "" - } - return result -} - // ## begin handlers // ## begin handlers // ## begin handlers @@ -284,8 +307,8 @@ func (s *Scanner) handleFolder(it *item) error { }). First(folder). Error - if !s.isFull && (!gorm.IsRecordNotFoundError(err) && - it.stat.ModTime().Before(folder.UpdatedAt)) { + if !gorm.IsRecordNotFoundError(err) && + it.stat.ModTime().Before(folder.UpdatedAt) { // we found the record but it hasn't changed return nil } @@ -305,6 +328,11 @@ func (s *Scanner) handleTrack(it *item) error { } // ** begin set track basics track := &db.Track{} + defer func() { + // folder's id will come from early return + // or save at the end + s.seenTracks[track.ID] = struct{}{} + }() err := s.trTx. Select("id, updated_at"). Where(db.Track{ @@ -313,10 +341,9 @@ func (s *Scanner) handleTrack(it *item) error { }). First(track). Error - if !s.isFull && !gorm.IsRecordNotFoundError(err) && + if !gorm.IsRecordNotFoundError(err) && it.stat.ModTime().Before(track.UpdatedAt) { // we found the record but it hasn't changed - s.seenTracks[track.ID] = struct{}{} return nil } track.Filename = it.filename @@ -382,7 +409,6 @@ func (s *Scanner) handleTrack(it *item) error { track.TagGenreID = genre.ID // ** begin save the track s.trTx.Save(track) - s.seenTracks[track.ID] = struct{}{} s.seenTracksNew++ // ** begin set album if this is the first track in the folder folder := s.curFolders.Peek() diff --git a/server/scanner/tags/tags.go b/server/scanner/tags/tags.go index 21b703e..60b02c3 100644 --- a/server/scanner/tags/tags.go +++ b/server/scanner/tags/tags.go @@ -5,9 +5,20 @@ import ( "strings" "github.com/nicksellen/audiotags" - "github.com/pkg/errors" ) +func intSep(in, sep string) int { + if in == "" { + return 0 + } + start := strings.SplitN(in, sep, 2)[0] + out, err := strconv.Atoi(start) + if err != nil { + return 0 + } + return out +} + type Tags struct { raw map[string]string props *audiotags.AudioProperties @@ -15,13 +26,7 @@ type Tags struct { func New(path string) (*Tags, error) { raw, props, err := audiotags.Read(path) - if err != nil { - return nil, errors.Wrap(err, "audiotags module") - } - return &Tags{ - raw: raw, - props: props, - }, nil + return &Tags{raw, props}, err } func (t *Tags) firstTag(keys ...string) string { @@ -45,15 +50,3 @@ func (t *Tags) TrackNumber() int { return intSep(t.firstTag("tracknumber"), func (t *Tags) DiscNumber() int { return intSep(t.firstTag("discnumber"), "/") } // eg. 1/2 func (t *Tags) Length() int { return t.props.Length } func (t *Tags) Bitrate() int { return t.props.Bitrate } - -func intSep(in, sep string) int { - if in == "" { - return 0 - } - start := strings.SplitN(in, sep, 2)[0] - out, err := strconv.Atoi(start) - if err != nil { - return 0 - } - return out -}