JPEG: Automatically re-encode broken image files #1673 #2463 #2557

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2023-02-21 13:31:30 +01:00
parent 057a8b7cd7
commit b855922283
7 changed files with 252 additions and 49 deletions

View file

@ -130,6 +130,12 @@ func (c *Config) CreateDirectories() error {
return createError(c.CachePath(), err)
}
if c.MediaCachePath() == "" {
return notFoundError("media")
} else if err := os.MkdirAll(c.MediaCachePath(), fs.ModeDir); err != nil {
return createError(c.MediaCachePath(), err)
}
if c.ThumbCachePath() == "" {
return notFoundError("thumbs")
} else if err := os.MkdirAll(c.ThumbCachePath(), fs.ModeDir); err != nil {
@ -443,9 +449,14 @@ func (c *Config) CmdLibPath() string {
return "/usr/local/lib:/usr/lib"
}
// ThumbCachePath returns the thumbnail storage directory.
// MediaCachePath returns the media cache path.
func (c *Config) MediaCachePath() string {
return filepath.Join(c.CachePath(), "media")
}
// ThumbCachePath returns the thumbnail storage path.
func (c *Config) ThumbCachePath() string {
return c.CachePath() + "/thumbnails"
return filepath.Join(c.CachePath(), "thumbnails")
}
// StoragePath returns the path for generated files like cache and index.

View file

@ -199,7 +199,14 @@ func TestConfig_CachePath(t *testing.T) {
assert.True(t, strings.HasSuffix(c.CachePath(), "storage/testdata/cache"))
}
func TestConfig_ThumbnailsPath(t *testing.T) {
func TestConfig_MediaCachePath(t *testing.T) {
c := NewConfig(CliTestContext())
assert.True(t, strings.HasPrefix(c.MediaCachePath(), "/"))
assert.True(t, strings.HasSuffix(c.MediaCachePath(), "storage/testdata/cache/media"))
}
func TestConfig_ThumbCachePath(t *testing.T) {
c := NewConfig(CliTestContext())
assert.True(t, strings.HasPrefix(c.ThumbCachePath(), "/"))

View file

@ -65,6 +65,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
{"backup-path", c.BackupPath()},
{"cache-path", c.CachePath()},
{"cmd-cache-path", c.CmdCachePath()},
{"media-cache-path", c.MediaCachePath()},
{"thumb-cache-path", c.ThumbCachePath()},
{"import-path", c.ImportPath()},
{"import-dest", c.ImportDest()},

View file

@ -113,8 +113,8 @@ func (w *CleanUp) Start(opt CleanUpOptions) (thumbs int, orphans int, sidecars i
}
}
// Remove thumbnail files.
thumbs, err = w.Thumbs(opt)
// Remove orphaned media and thumbnail cache files.
thumbs, err = w.Cache(opt)
// Only update counts if anything was deleted.
if len(deleted) > 0 {
@ -135,68 +135,70 @@ func (w *CleanUp) Start(opt CleanUpOptions) (thumbs int, orphans int, sidecars i
return thumbs, orphans, sidecars, err
}
// Thumbs removes orphan thumbnail files.
func (w *CleanUp) Thumbs(opt CleanUpOptions) (thumbs int, err error) {
// Cache removes orphaned media and thumbnail cache files.
func (w *CleanUp) Cache(opt CleanUpOptions) (deleted int, err error) {
cleanupStart := time.Now()
var fileHashes, thumbHashes query.HashMap
// Fetch existing media and thumb file hashes.
if fileHashes, err = query.FileHashMap(); err != nil {
return thumbs, err
return deleted, err
} else if thumbHashes, err = query.ThumbHashMap(); err != nil {
return thumbs, err
return deleted, err
}
// At least one SHA1 checksum found?
if len(fileHashes) == 0 {
log.Info("cleanup: empty index, aborting search for orphaned thumbnails")
return thumbs, err
log.Info("cleanup: empty index, aborting search for orphaned cache files")
return deleted, err
}
// Thumbnails storage path.
thumbPath := w.conf.ThumbCachePath()
// Cache directories.
dirs := []string{w.conf.MediaCachePath(), w.conf.ThumbCachePath()}
log.Info("cleanup: searching for orphaned thumbnails")
log.Info("cleanup: searching for orphaned cache files")
// Find and remove orphan thumbnail files.
err = fastwalk.Walk(thumbPath, func(fileName string, info os.FileMode) error {
base := filepath.Base(fileName)
for _, dir := range dirs {
err = fastwalk.Walk(dir, func(fileName string, info os.FileMode) error {
base := filepath.Base(fileName)
if info.IsDir() || strings.HasPrefix(base, ".") {
return nil
}
// Example: 01244519acf35c62a5fea7a5a7dcefdbec4fb2f5_3x3_resize.png
i := strings.IndexAny(base, "_.")
if i < 39 {
return nil
}
hash := base[:i]
logName := clean.Log(fs.RelName(fileName, filepath.Dir(dir)))
if ok := fileHashes[hash]; ok {
// Do nothing.
} else if ok = thumbHashes[hash]; ok {
// Do nothing.
} else if opt.Dry {
deleted++
log.Debugf("cleanup: %s would be removed", logName)
} else if err := os.Remove(fileName); err != nil {
log.Warnf("cleanup: %s in %s", err, logName)
} else {
deleted++
log.Debugf("cleanup: removed %s from cache", logName)
}
if info.IsDir() || strings.HasPrefix(base, ".") {
return nil
}
})
}
// Example: 01244519acf35c62a5fea7a5a7dcefdbec4fb2f5_3x3_resize.png
i := strings.Index(base, "_")
log.Infof("cleanup: removed %s from cache [%s]", english.Plural(deleted, "file", "files"), time.Since(cleanupStart))
if i < 39 {
return nil
}
hash := base[:i]
logName := clean.Log(fs.RelName(fileName, thumbPath))
if ok := fileHashes[hash]; ok {
// Do nothing.
} else if ok = thumbHashes[hash]; ok {
// Do nothing.
} else if opt.Dry {
thumbs++
log.Debugf("cleanup: thumbnail %s would be removed", logName)
} else if err := os.Remove(fileName); err != nil {
log.Warnf("cleanup: %s in %s", err, logName)
} else {
thumbs++
log.Debugf("cleanup: removed thumbnail %s from cache", logName)
}
return nil
})
log.Infof("cleanup: removed %s [%s]", english.Plural(thumbs, "thumbnail file", "thumbnail files"), time.Since(cleanupStart))
return thumbs, err
return deleted, err
}
// Cancel stops the current operation.

View file

@ -0,0 +1,118 @@
package photoprism
import (
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path"
"path/filepath"
"time"
"github.com/photoprism/photoprism/internal/event"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/fs"
)
// FixJpeg tries to re-encode a broken JPEG and returns the cached image file.
func (c *Convert) FixJpeg(f *MediaFile, force bool) (*MediaFile, error) {
if f == nil {
return nil, fmt.Errorf("convert: file is nil - possible bug")
}
logName := clean.Log(f.RootRelName())
if c.conf.DisableImageMagick() || !c.imagemagickBlacklist.Allow(fs.ExtJPEG) {
return nil, fmt.Errorf("convert: ImageMagick must be enabled to re-encode %s", logName)
}
if !f.Exists() {
return nil, fmt.Errorf("convert: %s not found", logName)
} else if f.Empty() {
return nil, fmt.Errorf("convert: %s is empty", logName)
} else if !f.IsJpeg() {
return nil, fmt.Errorf("convert: %s is not a jpeg", logName)
}
var err error
hash := f.Hash()
dir := path.Join(c.conf.MediaCachePath(), hash[0:1], hash[1:2], hash[2:3])
if err := os.MkdirAll(dir, fs.ModeDir); err != nil {
return nil, fmt.Errorf("convert: failed to create cache directory")
}
cacheName := filepath.Join(dir, hash+fs.ExtJPEG)
mediaFile, err := NewMediaFile(cacheName)
// Replace existing sidecar if "force" is true.
if err == nil && mediaFile.IsJpeg() {
if force && mediaFile.InSidecar() {
if err := mediaFile.Remove(); err != nil {
return mediaFile, fmt.Errorf("convert: failed removing %s (%s)", clean.Log(mediaFile.RootRelName()), err)
} else {
log.Infof("convert: replacing %s", clean.Log(mediaFile.RootRelName()))
}
} else {
return mediaFile, nil
}
}
fileName := f.RelName(c.conf.OriginalsPath())
// Publish file conversion event.
event.Publish("index.converting", event.Data{
"fileType": f.FileType(),
"fileName": fileName,
"baseName": filepath.Base(fileName),
"xmpName": "",
})
start := time.Now()
// Try ImageMagick for other image file formats if allowed.
quality := fmt.Sprintf("%d", c.conf.JpegQuality())
resize := fmt.Sprintf("%dx%d>", c.conf.JpegSize(), c.conf.JpegSize())
args := []string{f.FileName(), "-flatten", "-resize", resize, "-quality", quality, cacheName}
cmd := exec.Command(c.conf.ImageMagickBin(), args...)
if fs.FileExists(cacheName) {
return NewMediaFile(cacheName)
}
// Fetch command output.
var out bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &stderr
cmd.Env = []string{
fmt.Sprintf("HOME=%s", c.conf.CmdCachePath()),
fmt.Sprintf("LD_LIBRARY_PATH=%s", c.conf.CmdLibPath()),
}
log.Infof("convert: re-encoding %s to %s (%s)", logName, clean.Log(filepath.Base(cacheName)), filepath.Base(cmd.Path))
// Log exact command for debugging in trace mode.
log.Trace(cmd.String())
// Run convert command.
if err = cmd.Run(); err != nil {
if stderr.String() != "" {
err = errors.New(stderr.String())
}
log.Tracef("convert: %s (%s)", err, filepath.Base(cmd.Path))
} else if fs.FileExistsNotEmpty(cacheName) {
log.Infof("convert: %s created in %s (%s)", clean.Log(filepath.Base(cacheName)), time.Since(start), filepath.Base(cmd.Path))
}
// Ok?
if err != nil {
return nil, err
}
return NewMediaFile(cacheName)
}

View file

@ -0,0 +1,53 @@
package photoprism
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/photoprism/photoprism/internal/config"
"github.com/photoprism/photoprism/pkg/fs"
)
func TestConvert_FixJpeg(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
cnf := config.TestConfig()
cnf.InitializeTestData()
convert := NewConvert(cnf)
t.Run("elephants.jpg", func(t *testing.T) {
fileName := filepath.Join(cnf.ExamplesPath(), "elephants.jpg")
outputName := filepath.Join(cnf.MediaCachePath(), "b/1/0/b10447b54c3330eb13566735322e971cc1dcbc41.jpg")
_ = os.Remove(outputName)
assert.Truef(t, fs.FileExists(fileName), "input file does not exist: %s", fileName)
mf, err := NewMediaFile(fileName)
if err != nil {
t.Fatal(err)
}
jpegFile, err := convert.FixJpeg(mf, false)
if err != nil {
t.Fatal(err)
}
assert.Equal(t, jpegFile.FileName(), outputName)
assert.Truef(t, fs.FileExists(jpegFile.FileName()), "output file does not exist: %s", jpegFile.FileName())
t.Logf("old jpeg filename: %s", mf.FileName())
t.Logf("old jpeg metadata: %#v", mf.MetaData())
t.Logf("new jpeg filename: %s", jpegFile.FileName())
t.Logf("new jpeg metadata: %#v", jpegFile.MetaData())
_ = os.Remove(outputName)
})
}

View file

@ -98,9 +98,20 @@ func (m *MediaFile) CreateThumbnails(thumbPath string, force bool) (err error) {
if original == nil {
img, err := thumb.Open(m.FileName(), m.Orientation())
// Handle error and try to fix broken JPEGs if possible.
if err != nil {
log.Debugf("media: %s in %s", err.Error(), clean.Log(m.RootRelName()))
return err
if err.Error() != "invalid JPEG format: bad RST marker while decoding" {
log.Debugf("media: %s in %s", err.Error(), clean.Log(m.RootRelName()))
return err
}
if fixed, err := NewConvert(conf).FixJpeg(m, false); err != nil {
return err
} else if fixedImg, err := thumb.Open(fixed.FileName(), m.Orientation()); err != nil {
return err
} else {
img = fixedImg
}
}
original = img