package controller import ( "database/sql" "errors" "fmt" "strings" "sync" stime "time" "github.com/ente-io/museum/pkg/controller/lock" "github.com/ente-io/stacktrace" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/spf13/viper" "github.com/aws/aws-sdk-go/service/s3" "github.com/ente-io/museum/ente" "github.com/ente-io/museum/pkg/repo" "github.com/ente-io/museum/pkg/utils/s3config" enteString "github.com/ente-io/museum/pkg/utils/string" "github.com/ente-io/museum/pkg/utils/time" log "github.com/sirupsen/logrus" ) // ObjectCleanupController exposes functions to remove orphan and stale entries // from object storage. // // There are 3 main types of orphans that can end up in our object storage: // // 1. We create presigned URLs for clients to upload their objects to. It might // happen that the client is able to successfully upload to these URLs, but // not tell museum about the successful upload. // // 2. During replication, we might have half-done multipart uploads. // // 3. When an existing object is updated (e.g. the user edits the file on iOS), // then the file entry in our DB is updated to point to the new object, and // the old object is now meant to be discarded. // // ObjectCleanupController is meant to manage all these scenarios over time. type ObjectCleanupController struct { Repo *repo.ObjectCleanupRepository ObjectRepo *repo.ObjectRepository LockController *lock.LockController ObjectController *ObjectController S3Config *s3config.S3Config // Prometheus Metrics mOrphanObjectsDeleted *prometheus.CounterVec } // PreSignedRequestValidityDuration is the lifetime of a pre-signed URL const PreSignedRequestValidityDuration = 7 * 24 * stime.Hour // PreSignedPartUploadRequestDuration is the lifetime of a pre-signed multipart URL const PreSignedPartUploadRequestDuration = 7 * 24 * stime.Hour // clearOrphanObjectsCheckInterval is the interval after which we check if the // ClearOrphanObjects job needs to be re-run. // // See also, clearOrphanObjectsMinimumJobInterval. const clearOrphanObjectsCheckInterval = 1 * 24 * stime.Hour // ClearOrphanObjectsMinimumJobInterval is the minimum interval that must pass // before we run another instance of the ClearOrphanObjects job. // // This interval is enforced across museum instances. const clearOrphanObjectsMinimumJobInterval = 2 * 24 * stime.Hour // Return a new instance of ObjectCleanupController func NewObjectCleanupController( objectCleanupRepo *repo.ObjectCleanupRepository, objectRepo *repo.ObjectRepository, lockController *lock.LockController, objectController *ObjectController, s3Config *s3config.S3Config, ) *ObjectCleanupController { mOrphanObjectsDeleted := promauto.NewCounterVec(prometheus.CounterOpts{ Name: "museum_orphan_objects_deleted_total", Help: "Number of objects successfully deleted when clearing orphan objects", }, []string{"dc"}) return &ObjectCleanupController{ Repo: objectCleanupRepo, ObjectRepo: objectRepo, LockController: lockController, ObjectController: objectController, S3Config: s3Config, mOrphanObjectsDeleted: mOrphanObjectsDeleted, } } // StartRemovingUnreportedObjects starts goroutines to cleanup deletes those // objects that were possibly uploaded but not reported to the database func (c *ObjectCleanupController) StartRemovingUnreportedObjects() { // TODO: object_cleanup: This code is only currently tested for B2 if c.S3Config.GetHotDataCenter() != c.S3Config.GetHotBackblazeDC() { log.Info("Skipping RemovingUnreportedObjects since the Hot DC is not B2") return } workerCount := viper.GetInt("jobs.remove-unreported-objects.worker-count") if workerCount == 0 { workerCount = 1 } log.Infof("Starting %d workers to remove-unreported-objects", workerCount) for i := 0; i < workerCount; i++ { go c.removeUnreportedObjectsWorker(i) } } // Entry point for the worker goroutine to cleanup unreported objects. // // i is an arbitrary index for the current goroutine. func (c *ObjectCleanupController) removeUnreportedObjectsWorker(i int) { for { count := c.removeUnreportedObjects() if count == 0 { stime.Sleep(stime.Duration(5+i) * stime.Minute) } else { stime.Sleep(stime.Second) } } } func (c *ObjectCleanupController) removeUnreportedObjects() int { logger := log.WithFields(log.Fields{ "task": "remove-unreported-objects", }) logger.Info("Removing unreported objects") count := 0 tx, tempObjects, err := c.Repo.GetAndLockExpiredObjects() if err != nil { if !errors.Is(err, sql.ErrNoRows) { logger.Error(err) } return count } for _, tempObject := range tempObjects { err = c.removeUnreportedObject(tx, tempObject) if err != nil { continue } count += 1 } logger.Infof("Removed %d objects", count) // We always commit the transaction, even on errors for individual rows. To // avoid object getting stuck in a loop, we increase their expiry times. cerr := tx.Commit() if cerr != nil { cerr = stacktrace.Propagate(err, "Failed to commit transaction") logger.Error(cerr) } return count } func (c *ObjectCleanupController) removeUnreportedObject(tx *sql.Tx, t ente.TempObject) error { // TODO: object_cleanup // This should use the DC from TempObject (once we start persisting it) // dc := t.DataCenter dc := c.S3Config.GetHotDataCenter() logger := log.WithFields(log.Fields{ "task": "remove-unreported-objects", "object_key": t.ObjectKey, "data_center": dc, "upload_id": t.UploadID, }) skip := func(err error) error { logger.Errorf("Clearing tempObject failed: %v", err) newExpiry := time.MicrosecondsAfterDays(1) serr := c.Repo.SetExpiryForTempObject(tx, t, newExpiry) if serr != nil { logger.Errorf("Updating expiry for failed temp object failed: %v", serr) } return err } logger.Info("Clearing tempObject") exists, err := c.ObjectRepo.DoesObjectExist(tx, t.ObjectKey) if err != nil { return skip(stacktrace.Propagate(err, "")) } if exists { err := errors.New("aborting attempt to delete an object which has a DB entry") return skip(stacktrace.Propagate(err, "")) } if t.IsMultipart { err = c.abortMultipartUpload(t.ObjectKey, t.UploadID, dc) } else { err = c.DeleteObjectFromDataCenter(t.ObjectKey, dc) } if err != nil { return skip(err) } err = c.Repo.RemoveTempObject(tx, t) if err != nil { return skip(err) } return nil } // AddTempObjectKey creates a new temporary object entry. // // It persists a given object key as having been provided to a client for // uploading. If a client does not successfully mark this object's upload as // having completed within PreSignedRequestValidityDuration, this temp object // will be cleaned up. func (c *ObjectCleanupController) AddTempObjectKey(objectKey string, dc string) error { expiry := time.Microseconds() + (2 * PreSignedRequestValidityDuration.Microseconds()) return c.addCleanupEntryForObjectKey(objectKey, dc, expiry) } // Add the object to a queue of "temporary" objects that are deleted (if they // exist) if this entry is not removed from the queue by expirationTime. func (c *ObjectCleanupController) addCleanupEntryForObjectKey(objectKey string, dc string, expirationTime int64) error { err := c.Repo.AddTempObject(ente.TempObject{ ObjectKey: objectKey, IsMultipart: false, DataCenter: dc, }, expirationTime) return stacktrace.Propagate(err, "") } // AddTempObjectMultipartKey creates a new temporary object entry for a // multlipart upload. // // See AddTempObjectKey for more details. func (c *ObjectCleanupController) AddMultipartTempObjectKey(objectKey string, uploadID string, dc string) error { expiry := time.Microseconds() + (2 * PreSignedPartUploadRequestDuration.Microseconds()) err := c.Repo.AddTempObject(ente.TempObject{ ObjectKey: objectKey, IsMultipart: true, UploadID: uploadID, DataCenter: dc, }, expiry) return stacktrace.Propagate(err, "") } func (c *ObjectCleanupController) DeleteAllObjectsWithPrefix(prefix string, dc string) error { s3Client := c.S3Config.GetS3Client(dc) bucket := c.S3Config.GetBucket(dc) output, err := s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ Bucket: bucket, Prefix: &prefix, }) if err != nil { log.Error(err) return stacktrace.Propagate(err, "") } var keys []string for _, obj := range output.Contents { keys = append(keys, *obj.Key) } for _, key := range keys { err = c.DeleteObjectFromDataCenter(key, dc) if err != nil { log.Error(err) return stacktrace.Propagate(err, "") } } return nil } func (c *ObjectCleanupController) DeleteObjectFromDataCenter(objectKey string, dc string) error { log.Info("Deleting " + objectKey + " from " + dc) var s3Client = c.S3Config.GetS3Client(dc) bucket := c.S3Config.GetBucket(dc) _, err := s3Client.DeleteObject(&s3.DeleteObjectInput{ Bucket: bucket, Key: &objectKey, }) if err != nil { return stacktrace.Propagate(err, "") } err = s3Client.WaitUntilObjectNotExists(&s3.HeadObjectInput{ Bucket: bucket, Key: &objectKey, }) if err != nil { return stacktrace.Propagate(err, "") } return nil } func (c *ObjectCleanupController) abortMultipartUpload(objectKey string, uploadID string, dc string) error { s3Client := c.S3Config.GetS3Client(dc) bucket := c.S3Config.GetBucket(dc) _, err := s3Client.AbortMultipartUpload(&s3.AbortMultipartUploadInput{ Bucket: bucket, Key: &objectKey, UploadId: &uploadID, }) if err != nil { if isUnknownUploadError(err) { log.Info("Could not find upload for " + objectKey) return nil } return stacktrace.Propagate(err, "") } r, err := s3Client.ListParts(&s3.ListPartsInput{ Bucket: bucket, Key: &objectKey, UploadId: &uploadID, }) if err != nil { if isUnknownUploadError(err) { // This is expected now, since we just aborted the upload return nil } return stacktrace.Propagate(err, "") } if len(r.Parts) > 0 { return stacktrace.NewError("abort Failed") } return nil } // The original code here checked for NoSuchUpload, presumably because that is // the error that B2 returns. // // Wasabi returns something similar: // // // NoSuchUpload // The specified upload does not exist. The upload ID may be invalid, // or the upload may have been aborted or completed. // ... // // However, Scaleway returns a different error, NoSuchKey // // // NoSuchKey // The specified key does not exist. // ... // // This method returns true if either of these occur. func isUnknownUploadError(err error) bool { // B2, Wasabi if strings.Contains(err.Error(), "NoSuchUpload") { return true } // Scaleway if strings.Contains(err.Error(), "NoSuchKey") { return true } return false } // StartClearingOrphanObjects is the entry point for the job that goes through // all the objects in the given datacenter, and deletes orphan objects for which // we do not have DB entries. // // Such orphan objects are expected to have been created because the code for // updating the DB entries when a file gets updated did not cleanup the // corresponding objects from object storage. Once we start keeping track of // such objects in a separate queue, this cron job won't be needed. func (c *ObjectCleanupController) StartClearingOrphanObjects() { // TODO: object_cleanup: This code is only currently tested for B2 if c.S3Config.GetHotDataCenter() != c.S3Config.GetHotBackblazeDC() { log.Info("Skipping ClearingOrphanObjects since the Hot DC is not B2") return } isJobEnabled := viper.GetBool("jobs.clear-orphan-objects.enabled") if !isJobEnabled { return } prefix := viper.GetString("jobs.clear-orphan-objects.prefix") log.Infof("Starting workers to clear-orphan-objects (prefix %s)", prefix) // TODO: object_cleanup: start workers for other DCs once the temp_objects // table supports specifying a DC go c.clearOrphanObjectsWorker(c.S3Config.GetHotBackblazeDC(), prefix) } // clearOrphanObjectsWorker is the entry point for the worker goroutine to // cleanup objects in a particular DC. func (c *ObjectCleanupController) clearOrphanObjectsWorker(dc string, prefix string) { for { c.ClearOrphanObjects(dc, prefix, false) stime.Sleep(clearOrphanObjectsCheckInterval) } } // IsValidClearOrphanObjectsDC verifies that the given DC is valid for use as // the target of an orphan object cleanup. func (c *ObjectCleanupController) IsValidClearOrphanObjectsDC(dc string) bool { if dc != c.S3Config.GetHotBackblazeDC() { return false } // TODO: object_cleanup: This code is only currently tested for B2 if c.S3Config.GetHotDataCenter() != c.S3Config.GetHotBackblazeDC() { return false } return true } func (c *ObjectCleanupController) ClearOrphanObjects(dc string, prefix string, forceTaskLock bool) { logger := log.WithFields(log.Fields{ "task": "clear-orphan-objects", "data_center": dc, }) if !c.IsValidClearOrphanObjectsDC(dc) { logger.Errorf("Unsupported DC %s", dc) return } lockName := clearOrphanObjectsLockName(dc) if forceTaskLock { logger.Infof("Forcefully removing task lock %s", lockName) err := c.LockController.TaskLockingRepo.ReleaseLock(lockName) if err != nil { logger.Error(stacktrace.Propagate(err, "")) return } } if !c.LockController.TryLock(lockName, clearOrphanObjectsNextLockUntil()) { logger.Infof("Skipping since a lock could not be obtained") return } // The lock is not released intentionally // // By keeping the stale entry for the unheld lock in the DB, we will be able // to retain the timestamp when this job last ran. This is a kludgy way to // guarantee that clearOrphanObjectsMinimumJobInterval is enforced across // all museum instances (without introducing a new DB table). // // defer c.LockController.ReleaseLock(lockName) s3Config := c.S3Config dest := &CleanupOrphanObjectsDestination{ DC: dc, Client: s3Config.GetS3Client(dc), Bucket: s3Config.GetBucket(dc), HasComplianceHold: s3Config.WasabiComplianceDC() == dc, } logger.Infof("Clearing orphan objects from bucket %s (hasComplianceHold %v)", *dest.Bucket, dest.HasComplianceHold) // Each directory listing of an S3 bucket returns a maximum of 1000 objects, // and an optional continuation token. Until there are more objects // (indicated by the presence of the continuation token), keep fetching // directory listings. // // For each directory listing, spawn 10 goroutines to go through chunks of // 100 each to clear orphan objects. // // Refresh the lock's acquisition time during each iteration since this job // can span hours, and we don't want a different instance to start another // run just because it was only considering the start time of the job. err := dest.Client.ListObjectVersionsPages(&s3.ListObjectVersionsInput{ Bucket: dest.Bucket, Prefix: &prefix, }, func(page *s3.ListObjectVersionsOutput, lastPage bool) bool { c.clearOrphanObjectsPage(page, dest, logger) lerr := c.LockController.ExtendLock(lockName, clearOrphanObjectsNextLockUntil()) if lerr != nil { logger.Error(lerr) return false } return true }) if err != nil { logger.Error(stacktrace.Propagate(err, "")) return } logger.Info("Cleared orphan objects") } func clearOrphanObjectsLockName(dc string) string { return fmt.Sprintf("clear-orphan-objects:%s", dc) } func clearOrphanObjectsNextLockUntil() int64 { return time.Microseconds() + clearOrphanObjectsMinimumJobInterval.Microseconds() } type CleanupOrphanObjectsDestination struct { DC string Client s3.S3 Bucket *string // If true, this bucket has a compliance hold on objects that needs to be // removed first before they can be deleted. HasComplianceHold bool } // ObjectVersionOrDeleteMarker is an abstraction to allow us to reuse the same // code to delete both object versions and delete markers type ObjectVersionOrDeleteMarker struct { ObjectVersion *s3.ObjectVersion DeleteMarker *s3.DeleteMarkerEntry } func (od ObjectVersionOrDeleteMarker) GetKey() *string { if od.ObjectVersion != nil { return od.ObjectVersion.Key } return od.DeleteMarker.Key } func (od ObjectVersionOrDeleteMarker) GetLastModified() *stime.Time { if od.ObjectVersion != nil { return od.ObjectVersion.LastModified } return od.DeleteMarker.LastModified } func (od ObjectVersionOrDeleteMarker) GetVersionId() *string { if od.ObjectVersion != nil { return od.ObjectVersion.VersionId } return od.DeleteMarker.VersionId } func (c *ObjectCleanupController) clearOrphanObjectsPage(page *s3.ListObjectVersionsOutput, dest *CleanupOrphanObjectsDestination, logger *log.Entry) error { // MaxKeys is 1000. Until we can, break it into batches and create a // separate goroutine to process each batch. batchSize := 10 versions := page.Versions nv := len(versions) deleteMarkers := page.DeleteMarkers nd := len(deleteMarkers) n := nv + nd logger.Infof("Processing page containing %d values (%d object versions, %d delete markers)", n, nv, nd) ods := make([]ObjectVersionOrDeleteMarker, n) for i := 0; i < nv; i++ { ods[i] = ObjectVersionOrDeleteMarker{ObjectVersion: versions[i]} } for i := 0; i < nd; i++ { ods[nv+i] = ObjectVersionOrDeleteMarker{DeleteMarker: deleteMarkers[i]} } var wg sync.WaitGroup for i := 0; i < n; i++ { end := i + batchSize if end > n { end = n } if i >= end { // Nothing left break } wg.Add(1) go func(i int, end int) { defer wg.Done() batch := ods[i:end] c.clearOrphanObjectsVersionOrDeleteMarkers(batch, dest, logger) }(i, end) i = end } wg.Wait() return nil } func (c *ObjectCleanupController) clearOrphanObjectsVersionOrDeleteMarkers(ods []ObjectVersionOrDeleteMarker, dest *CleanupOrphanObjectsDestination, logger *log.Entry) { for _, od := range ods { c.clearOrphanObjectsVersionOrDeleteMarker(od, dest, logger) } } func (c *ObjectCleanupController) clearOrphanObjectsVersionOrDeleteMarker(od ObjectVersionOrDeleteMarker, dest *CleanupOrphanObjectsDestination, logger *log.Entry) { if od.GetKey() == nil || od.GetLastModified() == nil { logger.Errorf("Ignoring object with missing fields: %v %v", od.GetKey(), od.GetLastModified()) return } objectKey := *od.GetKey() lastModified := *od.GetLastModified() logger = logger.WithFields(log.Fields{ "object_key": objectKey, "last_modified": lastModified, }) exists, err := c.ObjectRepo.DoesObjectOrTempObjectExist(objectKey) if err != nil { logger.Error(stacktrace.Propagate(err, "Failed to determine if object already exists in DB")) return } if exists { return } // 2 days ago cutoff := stime.Now().AddDate(0, 0, -2) // As a safety check, ignore very recent objects from cleanup if lastModified.After(cutoff) { logger.Warnf("Ignoring too-recent orphan object since it was modified after %v", cutoff) return } logger.Infof("Found orphan object %v", od) if dest.HasComplianceHold { // Remove compliance hold. err := c.ObjectController.DisableObjectConditionalHold(&dest.Client, *dest.Bucket, objectKey) if err != nil { logger.Error(stacktrace.Propagate(err, "Failed to disable conditional hold on object")) return } // Add the object to the cleanup queue with an expiry time that is after // the compliance hold would've passed. Add 2 days of buffer too. expiryDays := s3config.WasabiObjectConditionalHoldDays + 2 expiryTime := time.MicrosecondsAfterDays(expiryDays) c.addCleanupEntryForObjectKey(objectKey, dest.DC, expiryTime) logger.Infof("Disabled compliance hold and added an entry to cleanup orphan object after %v", expiryTime) } else { // Delete it right away. versionID := od.GetVersionId() logger.Infof("Deleting version '%s'", enteString.EmptyIfNil(versionID)) err := c.DeleteObjectVersion(objectKey, versionID, dest) if err != nil { logger.Error(stacktrace.Propagate(err, "Failed to delete object")) } c.mOrphanObjectsDeleted.WithLabelValues(dest.DC).Inc() } } // DeleteObjectVersion can be used to delete objects from versioned buckets. // // If we delete an object in a versioning enabled bucket, deletion does not // actually remove the object and instead creates a delete marker: // // - When we delete a file, it creates a delete marker // - The delete marker becomes the latest version // - The old version of the file still remains // // If we explicitly pass a version ID in the delete call, then the delete marker // won't get created. // // > To delete versioned objects permanently, use `DELETE Object versionId` // // https://docs.aws.amazon.com/AmazonS3/latest/userguide/DeletingObjectVersions.html func (c *ObjectCleanupController) DeleteObjectVersion(objectKey string, versionID *string, dest *CleanupOrphanObjectsDestination) error { _, err := dest.Client.DeleteObject(&s3.DeleteObjectInput{ Bucket: dest.Bucket, Key: &objectKey, VersionId: versionID, }) return stacktrace.Propagate(err, "") }