Browse Source

s3: export PartSize parameter

By default AWS SDK use part_size of 5 MB. For big files,
it is not ideal case. For Hadoop, it is not uncommon to
use 512 MB.
Michael Bonfils 5 years ago
parent
commit
cdf1233065
5 changed files with 27 additions and 1 deletions
  1. 2 0
      httpd/httpd_test.go
  2. 4 0
      httpd/web.go
  3. 10 1
      templates/user.html
  4. 8 0
      vfs/s3fs.go
  5. 3 0
      vfs/vfs.go

+ 2 - 0
httpd/httpd_test.go

@@ -2010,6 +2010,7 @@ func TestWebUserS3Mock(t *testing.T) {
 	user.FsConfig.S3Config.Endpoint = "http://127.0.0.1:9000/path?a=b"
 	user.FsConfig.S3Config.StorageClass = "Standard"
 	user.FsConfig.S3Config.KeyPrefix = "somedir/subdir/"
+	user.FsConfig.S3Config.PartSize = 5
 	form := make(url.Values)
 	form.Set("username", user.Username)
 	form.Set("home_dir", user.HomeDir)
@@ -2034,6 +2035,7 @@ func TestWebUserS3Mock(t *testing.T) {
 	form.Set("s3_storage_class", user.FsConfig.S3Config.StorageClass)
 	form.Set("s3_endpoint", user.FsConfig.S3Config.Endpoint)
 	form.Set("s3_key_prefix", user.FsConfig.S3Config.KeyPrefix)
+	form.Set("s3_part_size", strconv.FormatInt(int64(user.FsConfig.S3Config.PartSize), 10))
 	form.Set("allowed_extensions", "/dir1::.jpg,.png")
 	form.Set("denied_extensions", "/dir2::.zip")
 	b, contentType, _ := getMultipartFormData(form, "", "")

+ 4 - 0
httpd/web.go

@@ -329,6 +329,10 @@ func getFsConfigFromUserPostFields(r *http.Request) (dataprovider.Filesystem, er
 		fs.S3Config.Endpoint = r.Form.Get("s3_endpoint")
 		fs.S3Config.StorageClass = r.Form.Get("s3_storage_class")
 		fs.S3Config.KeyPrefix = r.Form.Get("s3_key_prefix")
+		fs.S3Config.PartSize, err = strconv.ParseInt(r.Form.Get("s3_part_size"), 10, 64)
+		if err != nil {
+			return fs, err
+		}
 	} else if fs.Provider == 2 {
 		fs.GCSConfig.Bucket = r.Form.Get("gcs_bucket")
 		fs.GCSConfig.StorageClass = r.Form.Get("gcs_storage_class")

+ 10 - 1
templates/user.html

@@ -300,6 +300,15 @@
         </div>
     </div>
 
+    <div class="form-group row s3">
+        <label for="idS3PartSize" class="col-sm-2 col-form-label">Part Size (MB)</label>
+        <div class="col-sm-3">
+            <input type="number" class="form-control" id="idS3PartSoze" name="s3_part_size" placeholder=""
+                value="{{.User.FsConfig.S3Config.PartSize}}" min="5">
+        </div>
+        <div class="col-sm-2"></div>
+    </div>
+
     <div class="form-group row s3">
         <label for="idS3KeyPrefix" class="col-sm-2 col-form-label">Key Prefix</label>
         <div class="col-sm-10">
@@ -418,4 +427,4 @@
         }
     }
 </script>
-{{end}}
+{{end}}

+ 8 - 0
vfs/s3fs.go

@@ -36,6 +36,7 @@ type S3FsConfig struct {
 	AccessSecret string `json:"access_secret,omitempty"`
 	Endpoint     string `json:"endpoint,omitempty"`
 	StorageClass string `json:"storage_class,omitempty"`
+	PartSize     int64  `json:"partsize,omitempty"`
 }
 
 // S3Fs is a Fs implementation for Amazon S3 compatible object storage.
@@ -81,6 +82,12 @@ func NewS3Fs(connectionID, localTempDir string, config S3FsConfig) (Fs, error) {
 		awsConfig.S3ForcePathStyle = aws.Bool(true)
 	}
 
+	if fs.config.PartSize == 0 {
+		fs.config.PartSize = s3manager.DefaultUploadPartSize
+	} else {
+		fs.config.PartSize *= 1024 * 1024
+	}
+
 	sessOpts := session.Options{
 		Config:            *awsConfig,
 		SharedConfigState: session.SharedConfigEnable,
@@ -201,6 +208,7 @@ func (fs S3Fs) Create(name string, flag int) (*os.File, *pipeat.PipeWriterAt, fu
 			StorageClass: utils.NilIfEmpty(fs.config.StorageClass),
 		}, func(u *s3manager.Uploader) {
 			u.Concurrency = 2
+			u.PartSize = fs.config.PartSize
 		})
 		r.CloseWithError(err)
 		fsLog(fs, logger.LevelDebug, "upload completed, path: %#v, response: %v, readed bytes: %v, err: %v",

+ 3 - 0
vfs/vfs.go

@@ -103,6 +103,9 @@ func ValidateS3FsConfig(config *S3FsConfig) error {
 			config.KeyPrefix += "/"
 		}
 	}
+	if config.PartSize != 0 && config.PartSize < 5 {
+		return errors.New("part_size ret cannot be lower than 5MB")
+	}
 	return nil
 }