🎨 插入较大的资源文件时内存占用较大 https://github.com/siyuan-note/siyuan/issues/5023

This commit is contained in:
Liang Ding 2022-05-27 12:56:23 +08:00
parent dc46b478bc
commit 49b9f7bc92
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
7 changed files with 137 additions and 103 deletions

View file

@ -17,13 +17,9 @@
package model
import (
"bytes"
"context"
"crypto/sha1"
"encoding/base64"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path"
@ -662,7 +658,7 @@ func localUpsertRemoveListOSS(localDirPath string, cloudFileList map[string]*Clo
return nil
}
localHash, hashErr := GetEtag(path)
localHash, hashErr := util.GetEtag(path)
if nil != hashErr {
util.LogErrorf("get local file [%s] etag failed: %s", path, hashErr)
return nil
@ -696,7 +692,7 @@ func cloudUpsertRemoveListOSS(localDirPath string, cloudFileList map[string]*Clo
continue
}
localHash, hashErr := GetEtag(localCheckPath)
localHash, hashErr := util.GetEtag(localCheckPath)
if nil != hashErr {
util.LogErrorf("get local file [%s] hash failed: %s", localCheckPath, hashErr)
err = hashErr
@ -760,62 +756,3 @@ func putFileToCloud(filePath, key, upToken string) (err error) {
}
return
}
// 以下是七牛云 Hash 算法实现 https://github.com/qiniu/qetag/blob/master/qetag.go
func GetEtag(filename string) (etag string, err error) {
f, err := os.Open(filename)
if err != nil {
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return
}
fsize := fi.Size()
blockCnt := BlockCount(fsize)
sha1Buf := make([]byte, 0, 21)
if blockCnt <= 1 { // file size <= 4M
sha1Buf = append(sha1Buf, 0x16)
sha1Buf, err = CalSha1(sha1Buf, f)
if err != nil {
return
}
} else { // file size > 4M
sha1Buf = append(sha1Buf, 0x96)
sha1BlockBuf := make([]byte, 0, blockCnt*20)
for i := 0; i < blockCnt; i++ {
body := io.LimitReader(f, BLOCK_SIZE)
sha1BlockBuf, err = CalSha1(sha1BlockBuf, body)
if err != nil {
return
}
}
sha1Buf, _ = CalSha1(sha1Buf, bytes.NewReader(sha1BlockBuf))
}
etag = base64.URLEncoding.EncodeToString(sha1Buf)
return
}
const (
BLOCK_BITS = 22 // Indicate that the blocksize is 4M
BLOCK_SIZE = 1 << BLOCK_BITS
)
func BlockCount(fsize int64) int {
return int((fsize + (BLOCK_SIZE - 1)) >> BLOCK_BITS)
}
func CalSha1(b []byte, r io.Reader) ([]byte, error) {
h := sha1.New()
_, err := io.Copy(h, r)
if err != nil {
return nil, err
}
return h.Sum(b), nil
}

View file

@ -571,7 +571,7 @@ func genCloudIndex(localDirPath string, excludes map[string]bool) (err error) {
return nil
}
hash, hashErr := GetEtag(path)
hash, hashErr := util.GetEtag(path)
if nil != hashErr {
util.LogErrorf("get file [%s] hash failed: %s", path, hashErr)
return hashErr

View file

@ -17,9 +17,7 @@
package model
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"os"
"path"
@ -58,20 +56,22 @@ func InsertLocalAssets(id string, assetPaths []string) (succMap map[string]inter
continue
}
var f *os.File
f, err = os.Open(p)
if nil != err {
fi, statErr := os.Stat(p)
if nil != statErr {
err = statErr
return
}
f, openErr := os.Open(p)
if nil != openErr {
err = openErr
return
}
hash, hashErr := util.GetEtagByHandle(f, fi.Size())
if nil != hashErr {
f.Close()
return
}
var data []byte
data, err = io.ReadAll(f)
f.Close()
if nil != err {
return
}
hash := fmt.Sprintf("%x", sha256.Sum256(data))
if existAsset := sql.QueryAssetByHash(hash); nil != existAsset {
// 已经存在同样数据的资源文件的话不重复保存
succMap[baseName] = existAsset.Path
@ -80,9 +80,15 @@ func InsertLocalAssets(id string, assetPaths []string) (succMap map[string]inter
fName = fName[0 : len(fName)-len(ext)]
fName = fName + "-" + ast.NewNodeID() + ext
writePath := filepath.Join(assets, fName)
if err = gulu.File.WriteFileSafer(writePath, data, 0644); nil != err {
if _, err = f.Seek(0, io.SeekStart); nil != err {
f.Close()
return
}
if err = gulu.File.WriteFileSaferByReader(writePath, f, 0644); nil != err {
f.Close()
return
}
f.Close()
succMap[baseName] = "assets/" + fName
}
}
@ -134,22 +140,21 @@ func Upload(c *gin.Context) {
ext = strings.ToLower(ext)
fName += ext
baseName := fName
f, err := file.Open()
if nil != err {
f, openErr := file.Open()
if nil != openErr {
errFiles = append(errFiles, fName)
ret.Msg = err.Error()
ret.Msg = openErr.Error()
break
}
data, err := io.ReadAll(f)
if nil != err {
hash, hashErr := util.GetEtagByHandle(f, file.Size)
if nil != hashErr {
errFiles = append(errFiles, fName)
ret.Msg = err.Error()
f.Close()
break
}
f.Close()
hash := fmt.Sprintf("%x", sha256.Sum256(data))
if existAsset := sql.QueryAssetByHash(hash); nil != existAsset {
// 已经存在同样数据的资源文件的话不重复保存
succMap[baseName] = existAsset.Path
@ -168,11 +173,19 @@ func Upload(c *gin.Context) {
}
}
writePath := filepath.Join(assetsDirPath, fName)
if err = gulu.File.WriteFileSafer(writePath, data, 0644); nil != err {
if _, err = f.Seek(0, io.SeekStart); nil != err {
errFiles = append(errFiles, fName)
ret.Msg = err.Error()
f.Close()
break
}
if err = gulu.File.WriteFileSaferByReader(writePath, f, 0644); nil != err {
errFiles = append(errFiles, fName)
ret.Msg = err.Error()
f.Close()
break
}
f.Close()
succMap[baseName] = "assets/" + fName
}
}

View file

@ -44,7 +44,7 @@ var cookieStore = cookie.NewStore([]byte("ATN51UlxVq1Gcvdf"))
func Serve(fastMode bool) {
gin.SetMode(gin.ReleaseMode)
ginServer := gin.New()
ginServer.MaxMultipartMemory = 1024 * 1024 * 1024 * 4
ginServer.MaxMultipartMemory = 1024 * 1024 * 32 // 32MB
ginServer.Use(gin.Recovery())
ginServer.Use(cors.Default())
ginServer.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedExtensions([]string{".pdf", ".mp3", ".wav", ".ogg", ".mov", ".weba", ".mkv", ".mp4", ".webm"})))

View file

@ -20,7 +20,6 @@ import (
"crypto/sha256"
"database/sql"
"fmt"
"os"
"path/filepath"
"strings"
@ -71,12 +70,11 @@ func docTitleImgAsset(root *ast.Node) *Asset {
}
var hash string
var err error
absPath := filepath.Join(util.DataDir, p)
if data, err := os.ReadFile(absPath); nil != err {
if hash, err = util.GetEtag(absPath); nil != err {
util.LogErrorf("read asset [%s] data failed: %s", absPath, err)
hash = fmt.Sprintf("%x", sha256.Sum256([]byte(gulu.Rand.String(7))))
} else {
hash = fmt.Sprintf("%x", sha256.Sum256(data))
}
name, _ := util.LastID(p)
asset := &Asset{

View file

@ -18,10 +18,8 @@ package sql
import (
"bytes"
"crypto/sha256"
"database/sql"
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
@ -522,12 +520,12 @@ func buildSpanFromNode(n *ast.Node, tree *parse.Tree, rootID, boxID, p string) (
}
var hash string
var hashErr error
if lp := assetLocalPath(dest, boxLocalPath, docDirLocalPath); "" != lp {
if !gulu.File.IsDir(lp) {
if data, err := os.ReadFile(lp); nil != err {
util.LogErrorf("read asset [%s] data failed: %s", lp, err)
} else {
hash = fmt.Sprintf("%x", sha256.Sum256(data))
hash, hashErr = util.GetEtag(lp)
if nil != hashErr {
util.LogErrorf("calc asset [%s] hash failed: %s", lp, hashErr)
}
}
}
@ -597,11 +595,11 @@ func buildSpanFromNode(n *ast.Node, tree *parse.Tree, rootID, boxID, p string) (
dest := string(src)
var hash string
var hashErr error
if lp := assetLocalPath(dest, boxLocalPath, docDirLocalPath); "" != lp {
if data, err := os.ReadFile(lp); nil != err {
util.LogErrorf("read asset [%s] data failed: %s", lp, err)
} else {
hash = fmt.Sprintf("%x", sha256.Sum256(data))
hash, hashErr = util.GetEtag(lp)
if nil != hashErr {
util.LogErrorf("calc asset [%s] hash failed: %s", lp, hashErr)
}
}

88
kernel/util/etag.go Normal file
View file

@ -0,0 +1,88 @@
// SiYuan - Build Your Eternal Digital Garden
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package util
import (
"bytes"
"crypto/sha1"
"encoding/base64"
"io"
"os"
)
// 以下是七牛云 Hash 算法实现 https://github.com/qiniu/qetag/blob/master/qetag.go
func GetEtagByHandle(f io.Reader, size int64) (etag string, err error) {
blockCnt := BlockCount(size)
sha1Buf := make([]byte, 0, 21)
if blockCnt <= 1 { // file size <= 4M
sha1Buf = append(sha1Buf, 0x16)
sha1Buf, err = CalSha1(sha1Buf, f)
if err != nil {
return
}
} else { // file size > 4M
sha1Buf = append(sha1Buf, 0x96)
sha1BlockBuf := make([]byte, 0, blockCnt*20)
for i := 0; i < blockCnt; i++ {
body := io.LimitReader(f, BLOCK_SIZE)
sha1BlockBuf, err = CalSha1(sha1BlockBuf, body)
if err != nil {
return
}
}
sha1Buf, _ = CalSha1(sha1Buf, bytes.NewReader(sha1BlockBuf))
}
etag = base64.URLEncoding.EncodeToString(sha1Buf)
return
}
func GetEtag(filename string) (etag string, err error) {
f, err := os.Open(filename)
if err != nil {
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return
}
etag, err = GetEtagByHandle(f, fi.Size())
return
}
const (
BLOCK_BITS = 22 // Indicate that the blocksize is 4M
BLOCK_SIZE = 1 << BLOCK_BITS
)
func BlockCount(fsize int64) int {
return int((fsize + (BLOCK_SIZE - 1)) >> BLOCK_BITS)
}
func CalSha1(b []byte, r io.Reader) ([]byte, error) {
h := sha1.New()
_, err := io.Copy(h, r)
if err != nil {
return nil, err
}
return h.Sum(b), nil
}