azure: implement multipart uploads using low level API
The high level wrapper seems to hang if there are network issues
This commit is contained in:
parent
14bcc6f2fc
commit
9b49f63a97
3 changed files with 166 additions and 5 deletions
|
@ -227,7 +227,7 @@ func (t *BaseTransfer) Close() error {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
fileSize = info.Size()
|
fileSize = info.Size()
|
||||||
}
|
}
|
||||||
t.Connection.Log(logger.LevelDebug, "upload file size %v stat error %v", fileSize, err)
|
t.Connection.Log(logger.LevelDebug, "uploaded file size %v stat error: %v", fileSize, err)
|
||||||
t.updateQuota(numFiles, fileSize)
|
t.updateQuota(numFiles, fileSize)
|
||||||
logger.TransferLog(uploadLogSender, t.fsPath, elapsed, atomic.LoadInt64(&t.BytesReceived), t.Connection.User.Username,
|
logger.TransferLog(uploadLogSender, t.fsPath, elapsed, atomic.LoadInt64(&t.BytesReceived), t.Connection.User.Username,
|
||||||
t.Connection.ID, t.Connection.protocol)
|
t.Connection.ID, t.Connection.protocol)
|
||||||
|
|
165
vfs/azblobfs.go
165
vfs/azblobfs.go
|
@ -3,7 +3,9 @@
|
||||||
package vfs
|
package vfs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
@ -14,6 +16,7 @@ import (
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/Azure/azure-storage-blob-go/azblob"
|
"github.com/Azure/azure-storage-blob-go/azblob"
|
||||||
|
@ -268,12 +271,16 @@ func (fs AzureBlobFs) Create(name string, flag int) (*os.File, *PipeWriter, func
|
||||||
go func() {
|
go func() {
|
||||||
defer cancelFn()
|
defer cancelFn()
|
||||||
|
|
||||||
uploadOptions := azblob.UploadStreamToBlockBlobOptions{
|
/*uploadOptions := azblob.UploadStreamToBlockBlobOptions{
|
||||||
BufferSize: int(fs.config.UploadPartSize),
|
BufferSize: int(fs.config.UploadPartSize),
|
||||||
BlobHTTPHeaders: headers,
|
BlobHTTPHeaders: headers,
|
||||||
MaxBuffers: fs.config.UploadConcurrency,
|
MaxBuffers: fs.config.UploadConcurrency,
|
||||||
}
|
}
|
||||||
_, err := azblob.UploadStreamToBlockBlob(ctx, r, blobBlockURL, uploadOptions)
|
// UploadStreamToBlockBlob seems to have issues if there is an error, for example
|
||||||
|
// if we shutdown Azurite while uploading it hangs, so we use our own wrapper for
|
||||||
|
// the low level functions
|
||||||
|
_, err := azblob.UploadStreamToBlockBlob(ctx, r, blobBlockURL, uploadOptions)*/
|
||||||
|
err := fs.handleMultipartUpload(ctx, r, blobBlockURL, headers)
|
||||||
r.CloseWithError(err) //nolint:errcheck
|
r.CloseWithError(err) //nolint:errcheck
|
||||||
p.Done(err)
|
p.Done(err)
|
||||||
fsLog(fs, logger.LevelDebug, "upload completed, path: %#v, readed bytes: %v, err: %v", name, r.GetReadedBytes(), err)
|
fsLog(fs, logger.LevelDebug, "upload completed, path: %#v, readed bytes: %v, err: %v", name, r.GetReadedBytes(), err)
|
||||||
|
@ -445,7 +452,11 @@ func (fs AzureBlobFs) ReadDir(dirname string) ([]os.FileInfo, error) {
|
||||||
if blobPrefix.Name == "/" {
|
if blobPrefix.Name == "/" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// sometime we have duplicate prefixes, maybe an Azurite bug
|
||||||
name := strings.TrimPrefix(blobPrefix.Name, prefix)
|
name := strings.TrimPrefix(blobPrefix.Name, prefix)
|
||||||
|
if _, ok := prefixes[strings.TrimSuffix(name, "/")]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
result = append(result, NewFileInfo(name, true, 0, time.Now(), false))
|
result = append(result, NewFileInfo(name, true, 0, time.Now(), false))
|
||||||
prefixes[strings.TrimSuffix(name, "/")] = true
|
prefixes[strings.TrimSuffix(name, "/")] = true
|
||||||
}
|
}
|
||||||
|
@ -463,6 +474,7 @@ func (fs AzureBlobFs) ReadDir(dirname string) ([]os.FileInfo, error) {
|
||||||
if _, ok := prefixes[name]; ok {
|
if _, ok := prefixes[name]; ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
prefixes[name] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result = append(result, NewFileInfo(name, isDir, size, blobInfo.Properties.LastModified, false))
|
result = append(result, NewFileInfo(name, isDir, size, blobInfo.Properties.LastModified, false))
|
||||||
|
@ -722,3 +734,152 @@ func (fs *AzureBlobFs) getPrefixForStat(name string) string {
|
||||||
}
|
}
|
||||||
return prefix
|
return prefix
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fs *AzureBlobFs) handleMultipartUpload(ctx context.Context, reader io.Reader, blockBlobURL azblob.BlockBlobURL,
|
||||||
|
httpHeaders azblob.BlobHTTPHeaders) error {
|
||||||
|
partSize := fs.config.UploadPartSize
|
||||||
|
guard := make(chan struct{}, fs.config.UploadConcurrency)
|
||||||
|
blockCtxTimeout := time.Duration(fs.config.UploadPartSize/(1024*1024)) * time.Minute
|
||||||
|
|
||||||
|
// sync.Pool seems to use a lot of memory so prefer our own, very simple, allocator
|
||||||
|
// we only need to recycle few byte slices
|
||||||
|
pool := newBufferAllocator(int(partSize))
|
||||||
|
finished := false
|
||||||
|
binaryBlockID := make([]byte, 8)
|
||||||
|
var blocks []string
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
var errOnce sync.Once
|
||||||
|
var poolError error
|
||||||
|
|
||||||
|
poolCtx, poolCancel := context.WithCancel(ctx)
|
||||||
|
defer poolCancel()
|
||||||
|
|
||||||
|
for part := 0; !finished; part++ {
|
||||||
|
buf := pool.getBuffer()
|
||||||
|
|
||||||
|
n, err := fs.readFill(reader, buf)
|
||||||
|
if err == io.EOF {
|
||||||
|
// read finished, if n > 0 we need to process the last data chunck
|
||||||
|
if n == 0 {
|
||||||
|
pool.releaseBuffer(buf)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
finished = true
|
||||||
|
} else if err != nil {
|
||||||
|
pool.releaseBuffer(buf)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.incrementBlockID(binaryBlockID)
|
||||||
|
blockID := base64.StdEncoding.EncodeToString(binaryBlockID)
|
||||||
|
blocks = append(blocks, blockID)
|
||||||
|
|
||||||
|
guard <- struct{}{}
|
||||||
|
if poolError != nil {
|
||||||
|
fsLog(fs, logger.LevelDebug, "pool error, upload for part %v not started", part)
|
||||||
|
pool.releaseBuffer(buf)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func(blockID string, buf []byte, bufSize int) {
|
||||||
|
defer wg.Done()
|
||||||
|
bufferReader := bytes.NewReader(buf[:bufSize])
|
||||||
|
innerCtx, cancelFn := context.WithDeadline(poolCtx, time.Now().Add(blockCtxTimeout))
|
||||||
|
defer cancelFn()
|
||||||
|
|
||||||
|
_, err := blockBlobURL.StageBlock(innerCtx, blockID, bufferReader, azblob.LeaseAccessConditions{}, nil)
|
||||||
|
pool.releaseBuffer(buf)
|
||||||
|
if err != nil {
|
||||||
|
errOnce.Do(func() {
|
||||||
|
poolError = err
|
||||||
|
fsLog(fs, logger.LevelDebug, "multipart upload error: %v", poolError)
|
||||||
|
poolCancel()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
<-guard
|
||||||
|
}(blockID, buf, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
close(guard)
|
||||||
|
pool.free()
|
||||||
|
|
||||||
|
if poolError != nil {
|
||||||
|
return poolError
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := blockBlobURL.CommitBlockList(ctx, blocks, httpHeaders, azblob.Metadata{}, azblob.BlobAccessConditions{})
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// copied from rclone
|
||||||
|
func (fs *AzureBlobFs) readFill(r io.Reader, buf []byte) (n int, err error) {
|
||||||
|
var nn int
|
||||||
|
for n < len(buf) && err == nil {
|
||||||
|
nn, err = r.Read(buf[n:])
|
||||||
|
n += nn
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// copied from rclone
|
||||||
|
func (fs *AzureBlobFs) incrementBlockID(blockID []byte) {
|
||||||
|
for i, digit := range blockID {
|
||||||
|
newDigit := digit + 1
|
||||||
|
blockID[i] = newDigit
|
||||||
|
if newDigit >= digit {
|
||||||
|
// exit if no carry
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type bufferAllocator struct {
|
||||||
|
sync.Mutex
|
||||||
|
available [][]byte
|
||||||
|
bufferSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBufferAllocator(size int) *bufferAllocator {
|
||||||
|
return &bufferAllocator{
|
||||||
|
bufferSize: size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bufferAllocator) getBuffer() []byte {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
if len(b.available) > 0 {
|
||||||
|
var result []byte
|
||||||
|
|
||||||
|
truncLength := len(b.available) - 1
|
||||||
|
result = b.available[truncLength]
|
||||||
|
|
||||||
|
b.available[truncLength] = nil
|
||||||
|
b.available = b.available[:truncLength]
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
return make([]byte, b.bufferSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bufferAllocator) releaseBuffer(buf []byte) {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
if len(buf) != b.bufferSize {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
b.available = append(b.available, buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *bufferAllocator) free() {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
b.available = nil
|
||||||
|
}
|
||||||
|
|
|
@ -244,7 +244,7 @@ func ValidateS3FsConfig(config *S3FsConfig) error {
|
||||||
if config.UploadPartSize != 0 && (config.UploadPartSize < 5 || config.UploadPartSize > 5000) {
|
if config.UploadPartSize != 0 && (config.UploadPartSize < 5 || config.UploadPartSize > 5000) {
|
||||||
return errors.New("upload_part_size cannot be != 0, lower than 5 (MB) or greater than 5000 (MB)")
|
return errors.New("upload_part_size cannot be != 0, lower than 5 (MB) or greater than 5000 (MB)")
|
||||||
}
|
}
|
||||||
if config.UploadConcurrency < 0 {
|
if config.UploadConcurrency < 0 || config.UploadConcurrency > 64 {
|
||||||
return fmt.Errorf("invalid upload concurrency: %v", config.UploadConcurrency)
|
return fmt.Errorf("invalid upload concurrency: %v", config.UploadConcurrency)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -300,7 +300,7 @@ func ValidateAzBlobFsConfig(config *AzBlobFsConfig) error {
|
||||||
if config.UploadPartSize < 0 || config.UploadPartSize > 100 {
|
if config.UploadPartSize < 0 || config.UploadPartSize > 100 {
|
||||||
return fmt.Errorf("invalid upload part size: %v", config.UploadPartSize)
|
return fmt.Errorf("invalid upload part size: %v", config.UploadPartSize)
|
||||||
}
|
}
|
||||||
if config.UploadConcurrency < 0 {
|
if config.UploadConcurrency < 0 || config.UploadConcurrency > 64 {
|
||||||
return fmt.Errorf("invalid upload concurrency: %v", config.UploadConcurrency)
|
return fmt.Errorf("invalid upload concurrency: %v", config.UploadConcurrency)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
Loading…
Reference in a new issue