Add cleanup command that removes orphaned thumbnails and index entries

This commit is contained in:
Michael Mayer 2021-01-24 17:46:18 +01:00
parent 01f14d39c2
commit e04f0f937f
25 changed files with 340 additions and 28 deletions

View file

@ -58,6 +58,7 @@ func main() {
commands.MomentsCommand,
commands.OptimizeCommand,
commands.PurgeCommand,
commands.CleanUpCommand,
commands.CopyCommand,
commands.ConvertCommand,
commands.ResampleCommand,

View file

@ -0,0 +1,65 @@
package commands
import (
"context"
"time"
"github.com/photoprism/photoprism/internal/config"
"github.com/photoprism/photoprism/internal/photoprism"
"github.com/photoprism/photoprism/internal/service"
"github.com/urfave/cli"
)
// CleanUpCommand registers the cleanup command.
var CleanUpCommand = cli.Command{
Name: "cleanup",
Usage: "Removes orphaned thumbnails and index entries",
Flags: cleanUpFlags,
Action: cleanUpAction,
}
var cleanUpFlags = []cli.Flag{
cli.BoolFlag{
Name: "dry",
Usage: "dry run, don't actually remove anything",
},
}
// cleanUpAction removes orphaned thumbnails and index entries.
func cleanUpAction(ctx *cli.Context) error {
start := time.Now()
conf := config.NewConfig(ctx)
service.SetConfig(conf)
_, cancel := context.WithCancel(context.Background())
defer cancel()
if err := conf.Init(); err != nil {
return err
}
conf.InitDb()
if conf.ReadOnly() {
log.Infof("cleanup: read-only mode enabled")
}
cleanUp := service.CleanUp()
opt := photoprism.CleanUpOptions{
Dry: ctx.Bool("dry"),
}
if thumbs, orphans, err := cleanUp.Start(opt); err != nil {
return err
} else {
elapsed := time.Since(start)
log.Infof("cleanup: removed %d orphaned thumbnails and %d photos in %s", thumbs, orphans, elapsed)
}
conf.Shutdown()
return nil
}

View file

@ -10,14 +10,14 @@ import (
"github.com/urfave/cli"
)
// ConfigCommand is used to register the display config cli command
// ConfigCommand registers the display config cli command.
var ConfigCommand = cli.Command{
Name: "config",
Usage: "Displays global configuration values",
Action: configAction,
}
// configAction prints current configuration
// configAction lists configuration options and their values.
func configAction(ctx *cli.Context) error {
conf := config.NewConfig(ctx)

View file

@ -9,7 +9,7 @@ import (
"github.com/urfave/cli"
)
// ConvertCommand is used to register the convert cli command
// ConvertCommand registers the convert cli command.
var ConvertCommand = cli.Command{
Name: "convert",
Usage: "Converts originals in other formats to JPEG",

View file

@ -13,7 +13,7 @@ import (
"github.com/urfave/cli"
)
// CopyCommand is used to register the copy cli command
// CopyCommand registers the copy cli command.
var CopyCommand = cli.Command{
Name: "copy",
Aliases: []string{"cp"},

View file

@ -13,7 +13,7 @@ import (
"github.com/urfave/cli"
)
// ImportCommand is used to register the import cli command
// ImportCommand registers the import cli command.
var ImportCommand = cli.Command{
Name: "import",
Aliases: []string{"mv"},

View file

@ -13,7 +13,7 @@ import (
"github.com/urfave/cli"
)
// IndexCommand is used to register the index cli command
// IndexCommand registers the index cli command.
var IndexCommand = cli.Command{
Name: "index",
Usage: "Indexes media files in originals folder",
@ -26,6 +26,10 @@ var indexFlags = []cli.Flag{
Name: "all, a",
Usage: "re-index all originals, including unchanged files",
},
cli.BoolFlag{
Name: "cleanup",
Usage: "removes orphaned thumbnails and index entries",
},
}
// indexAction indexes all photos in originals directory (photo library)
@ -54,7 +58,7 @@ func indexAction(ctx *cli.Context) error {
}
if conf.ReadOnly() {
log.Infof("read-only mode enabled")
log.Infof("index: read-only mode enabled")
}
ind := service.Index()
@ -78,7 +82,21 @@ func indexAction(ctx *cli.Context) error {
if files, photos, err := prg.Start(prgOpt); err != nil {
log.Error(err)
} else if len(files) > 0 || len(photos) > 0 {
log.Infof("removed %d files and %d photos", len(files), len(photos))
log.Infof("purge: removed %d files and %d photos", len(files), len(photos))
}
if ctx.Bool("cleanup") {
cleanUp := service.CleanUp()
opt := photoprism.CleanUpOptions{
Dry: false,
}
if thumbs, orphans, err := cleanUp.Start(opt); err != nil {
return err
} else {
log.Infof("cleanup: removed %d orphaned thumbnails and %d photos", thumbs, orphans)
}
}
elapsed := time.Since(start)

View file

@ -8,7 +8,7 @@ import (
"github.com/urfave/cli"
)
// MigrateCommand is used to register the migrate cli command
// MigrateCommand registers the migrate cli command.
var MigrateCommand = cli.Command{
Name: "migrate",
Usage: "Initializes the index database if needed",

View file

@ -9,7 +9,7 @@ import (
"github.com/urfave/cli"
)
// MomentsCommand is used to register the index cli command
// MomentsCommand registers the index cli command.
var MomentsCommand = cli.Command{
Name: "moments",
Usage: "Creates albums based on popular locations, dates and labels",
@ -33,7 +33,7 @@ func momentsAction(ctx *cli.Context) error {
conf.InitDb()
if conf.ReadOnly() {
log.Infof("read-only mode enabled")
log.Infof("moments: read-only mode enabled")
}
moments := service.Moments()

View file

@ -11,7 +11,7 @@ import (
"github.com/urfave/cli"
)
// OptimizeCommand is used to register the index cli command.
// OptimizeCommand registers the index cli command.
var OptimizeCommand = cli.Command{
Name: "optimize",
Usage: "Starts metadata check and optimization",
@ -35,7 +35,7 @@ func optimizeAction(ctx *cli.Context) error {
conf.InitDb()
if conf.ReadOnly() {
log.Infof("read-only mode enabled")
log.Infof("optimize: read-only mode enabled")
}
worker := workers.NewMeta(conf)

View file

@ -14,7 +14,7 @@ import (
"github.com/urfave/cli"
)
// PurgeCommand is used to register the index cli command
// PurgeCommand registers the index cli command.
var PurgeCommand = cli.Command{
Name: "purge",
Usage: "Removes missing files from search results",
@ -53,13 +53,13 @@ func purgeAction(ctx *cli.Context) error {
subPath := strings.TrimSpace(ctx.Args().First())
if subPath == "" {
log.Infof("removing missing files in %s", txt.Quote(filepath.Base(conf.OriginalsPath())))
log.Infof("purge: removing missing files in %s", txt.Quote(filepath.Base(conf.OriginalsPath())))
} else {
log.Infof("removing missing files in %s", txt.Quote(fs.RelName(filepath.Join(conf.OriginalsPath(), subPath), filepath.Dir(conf.OriginalsPath()))))
log.Infof("purge: removing missing files in %s", txt.Quote(fs.RelName(filepath.Join(conf.OriginalsPath(), subPath), filepath.Dir(conf.OriginalsPath()))))
}
if conf.ReadOnly() {
log.Infof("read-only mode enabled")
log.Infof("purge: read-only mode enabled")
}
prg := service.Purge()
@ -75,7 +75,7 @@ func purgeAction(ctx *cli.Context) error {
} else {
elapsed := time.Since(start)
log.Infof("removed %d files and %d photos in %s", len(files), len(photos), elapsed)
log.Infof("purge: removed %d files and %d photos in %s", len(files), len(photos), elapsed)
}
conf.Shutdown()

View file

@ -9,7 +9,7 @@ import (
"github.com/urfave/cli"
)
// ResampleCommand is used to register the thumbs cli command
// ResampleCommand registers the thumbs cli command.
var ResampleCommand = cli.Command{
Name: "resample",
Aliases: []string{"thumbs"},

View file

@ -23,7 +23,7 @@ import (
"github.com/urfave/cli"
)
// StartCommand is used to register the start cli command
// StartCommand registers the start cli command.
var StartCommand = cli.Command{
Name: "start",
Aliases: []string{"up"},
@ -105,7 +105,7 @@ func startAction(ctx *cli.Context) error {
}
if conf.ReadOnly() {
log.Infof("read-only mode enabled")
log.Infof("start: read-only mode enabled")
}
// start web server

View file

@ -9,7 +9,7 @@ import (
"github.com/urfave/cli"
)
// StopCommand is used to register the stop cli command
// StopCommand registers the stop cli command.
var StopCommand = cli.Command{
Name: "stop",
Aliases: []string{"down"},

View file

@ -7,7 +7,7 @@ import (
"github.com/urfave/cli"
)
// VersionCommand is used to register the version cli command
// VersionCommand registers the version cli command.
var VersionCommand = cli.Command{
Name: "version",
Usage: "Shows version information",

View file

@ -0,0 +1,142 @@
package photoprism
import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime/debug"
"strings"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/internal/event"
"github.com/photoprism/photoprism/internal/config"
"github.com/photoprism/photoprism/internal/mutex"
"github.com/photoprism/photoprism/internal/query"
"github.com/photoprism/photoprism/pkg/fastwalk"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
)
// CleanUp represents a worker that deletes unneeded data and files.
type CleanUp struct {
conf *config.Config
}
// NewCleanUp returns a new cleanup worker.
func NewCleanUp(conf *config.Config) *CleanUp {
instance := &CleanUp{
conf: conf,
}
return instance
}
// Start removes orphaned thumbnails and index entries.
func (w *CleanUp) Start(opt CleanUpOptions) (thumbs int, orphans int, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("cleanup: %s (panic)\nstack: %s", r, debug.Stack())
log.Error(err)
}
}()
if err := mutex.MainWorker.Start(); err != nil {
log.Warnf("cleanup: %s (start)", err.Error())
return thumbs, orphans, err
}
defer mutex.MainWorker.Stop()
if opt.Dry {
log.Infof("cleanup: dry run, nothing will actually be removed")
}
// Find and remove orphaned thumbnail thumbs.
hashes, err := query.FileHashes()
if err != nil {
return thumbs, orphans, err
}
thumbPath := w.conf.ThumbPath()
if err := fastwalk.Walk(thumbPath, func(fileName string, info os.FileMode) error {
base := filepath.Base(fileName)
if info.IsDir() || strings.HasPrefix(base, ".") {
return nil
}
i := strings.Index(base, "_")
if i < 39 {
return nil
}
hash := base[:i]
logName := txt.Quote(fs.RelName(fileName, thumbPath))
if ok := hashes[hash]; ok {
// Do nothing.
} else if opt.Dry {
thumbs++
log.Debugf("cleanup: orphaned thumbnail %s would be removed", logName)
} else if err := os.Remove(fileName); err != nil {
log.Warnf("cleanup: %s in %s", err, logName)
} else {
thumbs++
log.Debugf("cleanup: removed orphaned thumbnail %s", logName)
}
return nil
}); err != nil {
return thumbs, orphans, err
}
// Find and remove orphaned photo index entries without thumbs.
photos, err := query.PhotosOrphaned()
if err != nil {
return thumbs, orphans, err
}
var deleted []string
for _, p := range photos {
if mutex.MainWorker.Canceled() {
return thumbs, orphans, errors.New("cleanup canceled")
}
if opt.Dry {
orphans++
log.Infof("cleanup: orphaned photo %s would be removed", txt.Quote(p.PhotoUID))
continue
}
if err := Delete(p); err != nil {
log.Errorf("cleanup: %s (remove orphan)", err.Error())
} else {
orphans++
deleted = append(deleted, p.PhotoUID)
log.Debugf("cleanup: removed orphaned photo %s", p.PhotoUID)
}
}
// Update counts and views if needed.
if len(deleted) > 0 {
if err := entity.UpdatePhotoCounts(); err != nil {
log.Errorf("cleanup: %s", err)
}
event.EntitiesDeleted("photos", deleted)
}
return thumbs, orphans, nil
}
// Cancel stops the current operation.
func (w *CleanUp) Cancel() {
mutex.MainWorker.Cancel()
}

View file

@ -0,0 +1,5 @@
package photoprism
type CleanUpOptions struct {
Dry bool
}

View file

@ -31,7 +31,7 @@ func NewPurge(conf *config.Config, files *Files) *Purge {
}
// Start removes missing files from search results.
func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPhotos map[string]bool, err error) {
func (w *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPhotos map[string]bool, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("purge: %s (panic)\nstack: %s", r, debug.Stack())
@ -105,7 +105,7 @@ func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPh
if err := file.Purge(); err != nil {
log.Errorf("purge: %s", err)
} else {
prg.files.Remove(file.FileName, file.FileRoot)
w.files.Remove(file.FileName, file.FileRoot)
purgedFiles[fileName] = true
log.Infof("purge: flagged file %s as missing", txt.Quote(file.FileName))
}
@ -156,7 +156,7 @@ func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPh
if err := file.Purge(); err != nil {
log.Errorf("purge: %s", err)
} else {
prg.files.Remove(file.FileName, file.FileRoot)
w.files.Remove(file.FileName, file.FileRoot)
purgedFiles[fileName] = true
log.Infof("purge: removed duplicate %s", txt.Quote(file.FileName))
}
@ -214,7 +214,7 @@ func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPh
// Remove files from lookup table.
for _, file := range photo.AllFiles() {
prg.files.Remove(file.FileName, file.FileRoot)
w.files.Remove(file.FileName, file.FileRoot)
}
}
}
@ -250,6 +250,6 @@ func (prg *Purge) Start(opt PurgeOptions) (purgedFiles map[string]bool, purgedPh
}
// Cancel stops the current operation.
func (prg *Purge) Cancel() {
func (w *Purge) Cancel() {
mutex.MainWorker.Cancel()
}

View file

@ -150,3 +150,22 @@ func IndexedFiles() (result FileMap, err error) {
return result, err
}
type HashMap map[string]bool
// FileHashes returns a map of all known file hashes.
func FileHashes() (result HashMap, err error) {
result = make(HashMap)
var hashes []string
if err := UnscopedDb().Raw("SELECT file_hash FROM files WHERE file_missing = 0 AND deleted_at IS NULL").Pluck("file_hash", &hashes).Error; err != nil {
return result, err
}
for _, hash := range hashes {
result[hash] = true
}
return result, err
}

View file

@ -204,3 +204,17 @@ func TestIndexedFiles(t *testing.T) {
t.Logf("INDEXED FILES: %#v", result)
}
func TestFileHashes(t *testing.T) {
result, err := FileHashes()
if err != nil {
t.Fatal(err)
}
if len(result) < 3 {
t.Fatalf("at least 3 file hashes expected")
}
t.Logf("FILE HASHES: %#v", result)
}

View file

@ -117,3 +117,15 @@ func PhotosCheck(limit, offset int, delay time.Duration) (entities entity.Photos
return entities, err
}
// PhotosOrphaned finds orphaned index entries that may be removed.
func PhotosOrphaned() (photos entity.Photos, err error) {
err = UnscopedDb().
Raw(`SELECT * FROM photos WHERE
deleted_at IS NOT NULL
AND photo_quality = -1
AND id NOT IN (SELECT photo_id FROM files WHERE files.deleted_at IS NULL)`).
Find(&photos).Error
return photos, err
}

View file

@ -79,3 +79,15 @@ func TestPhotosCheck(t *testing.T) {
}
assert.IsType(t, entity.Photos{}, result)
}
func TestPhotosOrphaned(t *testing.T) {
result, err := PhotosOrphaned()
if err != nil {
t.Fatal(err)
}
assert.IsType(t, entity.Photos{}, result)
t.Logf("ORPHANS: %#v", result)
}

View file

@ -0,0 +1,19 @@
package service
import (
"sync"
"github.com/photoprism/photoprism/internal/photoprism"
)
var onceCleanUp sync.Once
func initCleanUp() {
services.CleanUp = photoprism.NewCleanUp(Config())
}
func CleanUp() *photoprism.CleanUp {
onceCleanUp.Do(initCleanUp)
return services.CleanUp
}

View file

@ -25,6 +25,7 @@ var services struct {
Index *photoprism.Index
Moments *photoprism.Moments
Purge *photoprism.Purge
CleanUp *photoprism.CleanUp
Nsfw *nsfw.Detector
Query *query.Query
Resample *photoprism.Resample

View file

@ -68,6 +68,10 @@ func TestPurge(t *testing.T) {
assert.IsType(t, &photoprism.Purge{}, Purge())
}
func TestCleanUp(t *testing.T) {
assert.IsType(t, &photoprism.CleanUp{}, CleanUp())
}
func TestNsfwDetector(t *testing.T) {
assert.IsType(t, &nsfw.Detector{}, NsfwDetector())
}