Преглед на файлове

:art: 插入较大的资源文件时内存占用较大 https://github.com/siyuan-note/siyuan/issues/5023

Liang Ding преди 3 години
родител
ревизия
49b9f7bc92
променени са 7 файла, в които са добавени 135 реда и са изтрити 101 реда
  1. 2 65
      kernel/model/osssync.go
  2. 1 1
      kernel/model/sync.go
  3. 33 20
      kernel/model/upload.go
  4. 1 1
      kernel/server/serve.go
  5. 2 4
      kernel/sql/aseet.go
  6. 8 10
      kernel/sql/database.go
  7. 88 0
      kernel/util/etag.go

+ 2 - 65
kernel/model/osssync.go

@@ -17,13 +17,9 @@
 package model
 
 import (
-	"bytes"
 	"context"
-	"crypto/sha1"
-	"encoding/base64"
 	"errors"
 	"fmt"
-	"io"
 	"io/fs"
 	"os"
 	"path"
@@ -662,7 +658,7 @@ func localUpsertRemoveListOSS(localDirPath string, cloudFileList map[string]*Clo
 			return nil
 		}
 
-		localHash, hashErr := GetEtag(path)
+		localHash, hashErr := util.GetEtag(path)
 		if nil != hashErr {
 			util.LogErrorf("get local file [%s] etag failed: %s", path, hashErr)
 			return nil
@@ -696,7 +692,7 @@ func cloudUpsertRemoveListOSS(localDirPath string, cloudFileList map[string]*Clo
 			continue
 		}
 
-		localHash, hashErr := GetEtag(localCheckPath)
+		localHash, hashErr := util.GetEtag(localCheckPath)
 		if nil != hashErr {
 			util.LogErrorf("get local file [%s] hash failed: %s", localCheckPath, hashErr)
 			err = hashErr
@@ -760,62 +756,3 @@ func putFileToCloud(filePath, key, upToken string) (err error) {
 	}
 	return
 }
-
-// 以下是七牛云 Hash 算法实现 https://github.com/qiniu/qetag/blob/master/qetag.go
-
-func GetEtag(filename string) (etag string, err error) {
-	f, err := os.Open(filename)
-	if err != nil {
-		return
-	}
-	defer f.Close()
-
-	fi, err := f.Stat()
-	if err != nil {
-		return
-	}
-
-	fsize := fi.Size()
-	blockCnt := BlockCount(fsize)
-	sha1Buf := make([]byte, 0, 21)
-
-	if blockCnt <= 1 { // file size <= 4M
-		sha1Buf = append(sha1Buf, 0x16)
-		sha1Buf, err = CalSha1(sha1Buf, f)
-		if err != nil {
-			return
-		}
-	} else { // file size > 4M
-		sha1Buf = append(sha1Buf, 0x96)
-		sha1BlockBuf := make([]byte, 0, blockCnt*20)
-		for i := 0; i < blockCnt; i++ {
-			body := io.LimitReader(f, BLOCK_SIZE)
-			sha1BlockBuf, err = CalSha1(sha1BlockBuf, body)
-			if err != nil {
-				return
-			}
-		}
-		sha1Buf, _ = CalSha1(sha1Buf, bytes.NewReader(sha1BlockBuf))
-	}
-	etag = base64.URLEncoding.EncodeToString(sha1Buf)
-	return
-}
-
-const (
-	BLOCK_BITS = 22 // Indicate that the blocksize is 4M
-	BLOCK_SIZE = 1 << BLOCK_BITS
-)
-
-func BlockCount(fsize int64) int {
-	return int((fsize + (BLOCK_SIZE - 1)) >> BLOCK_BITS)
-}
-
-func CalSha1(b []byte, r io.Reader) ([]byte, error) {
-
-	h := sha1.New()
-	_, err := io.Copy(h, r)
-	if err != nil {
-		return nil, err
-	}
-	return h.Sum(b), nil
-}

+ 1 - 1
kernel/model/sync.go

@@ -571,7 +571,7 @@ func genCloudIndex(localDirPath string, excludes map[string]bool) (err error) {
 			return nil
 		}
 
-		hash, hashErr := GetEtag(path)
+		hash, hashErr := util.GetEtag(path)
 		if nil != hashErr {
 			util.LogErrorf("get file [%s] hash failed: %s", path, hashErr)
 			return hashErr

+ 33 - 20
kernel/model/upload.go

@@ -17,9 +17,7 @@
 package model
 
 import (
-	"crypto/sha256"
 	"errors"
-	"fmt"
 	"io"
 	"os"
 	"path"
@@ -58,20 +56,22 @@ func InsertLocalAssets(id string, assetPaths []string) (succMap map[string]inter
 			continue
 		}
 
-		var f *os.File
-		f, err = os.Open(p)
-		if nil != err {
+		fi, statErr := os.Stat(p)
+		if nil != statErr {
+			err = statErr
 			return
 		}
-
-		var data []byte
-		data, err = io.ReadAll(f)
-		f.Close()
-		if nil != err {
+		f, openErr := os.Open(p)
+		if nil != openErr {
+			err = openErr
+			return
+		}
+		hash, hashErr := util.GetEtagByHandle(f, fi.Size())
+		if nil != hashErr {
+			f.Close()
 			return
 		}
 
-		hash := fmt.Sprintf("%x", sha256.Sum256(data))
 		if existAsset := sql.QueryAssetByHash(hash); nil != existAsset {
 			// 已经存在同样数据的资源文件的话不重复保存
 			succMap[baseName] = existAsset.Path
@@ -80,9 +80,15 @@ func InsertLocalAssets(id string, assetPaths []string) (succMap map[string]inter
 			fName = fName[0 : len(fName)-len(ext)]
 			fName = fName + "-" + ast.NewNodeID() + ext
 			writePath := filepath.Join(assets, fName)
-			if err = gulu.File.WriteFileSafer(writePath, data, 0644); nil != err {
+			if _, err = f.Seek(0, io.SeekStart); nil != err {
+				f.Close()
+				return
+			}
+			if err = gulu.File.WriteFileSaferByReader(writePath, f, 0644); nil != err {
+				f.Close()
 				return
 			}
+			f.Close()
 			succMap[baseName] = "assets/" + fName
 		}
 	}
@@ -134,22 +140,21 @@ func Upload(c *gin.Context) {
 		ext = strings.ToLower(ext)
 		fName += ext
 		baseName := fName
-		f, err := file.Open()
-		if nil != err {
+		f, openErr := file.Open()
+		if nil != openErr {
 			errFiles = append(errFiles, fName)
-			ret.Msg = err.Error()
+			ret.Msg = openErr.Error()
 			break
 		}
 
-		data, err := io.ReadAll(f)
-		if nil != err {
+		hash, hashErr := util.GetEtagByHandle(f, file.Size)
+		if nil != hashErr {
 			errFiles = append(errFiles, fName)
 			ret.Msg = err.Error()
+			f.Close()
 			break
 		}
-		f.Close()
 
-		hash := fmt.Sprintf("%x", sha256.Sum256(data))
 		if existAsset := sql.QueryAssetByHash(hash); nil != existAsset {
 			// 已经存在同样数据的资源文件的话不重复保存
 			succMap[baseName] = existAsset.Path
@@ -168,11 +173,19 @@ func Upload(c *gin.Context) {
 				}
 			}
 			writePath := filepath.Join(assetsDirPath, fName)
-			if err = gulu.File.WriteFileSafer(writePath, data, 0644); nil != err {
+			if _, err = f.Seek(0, io.SeekStart); nil != err {
+				errFiles = append(errFiles, fName)
+				ret.Msg = err.Error()
+				f.Close()
+				break
+			}
+			if err = gulu.File.WriteFileSaferByReader(writePath, f, 0644); nil != err {
 				errFiles = append(errFiles, fName)
 				ret.Msg = err.Error()
+				f.Close()
 				break
 			}
+			f.Close()
 			succMap[baseName] = "assets/" + fName
 		}
 	}

+ 1 - 1
kernel/server/serve.go

@@ -44,7 +44,7 @@ var cookieStore = cookie.NewStore([]byte("ATN51UlxVq1Gcvdf"))
 func Serve(fastMode bool) {
 	gin.SetMode(gin.ReleaseMode)
 	ginServer := gin.New()
-	ginServer.MaxMultipartMemory = 1024 * 1024 * 1024 * 4
+	ginServer.MaxMultipartMemory = 1024 * 1024 * 32 // 32MB
 	ginServer.Use(gin.Recovery())
 	ginServer.Use(cors.Default())
 	ginServer.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedExtensions([]string{".pdf", ".mp3", ".wav", ".ogg", ".mov", ".weba", ".mkv", ".mp4", ".webm"})))

+ 2 - 4
kernel/sql/aseet.go

@@ -20,7 +20,6 @@ import (
 	"crypto/sha256"
 	"database/sql"
 	"fmt"
-	"os"
 	"path/filepath"
 	"strings"
 
@@ -71,12 +70,11 @@ func docTitleImgAsset(root *ast.Node) *Asset {
 		}
 
 		var hash string
+		var err error
 		absPath := filepath.Join(util.DataDir, p)
-		if data, err := os.ReadFile(absPath); nil != err {
+		if hash, err = util.GetEtag(absPath); nil != err {
 			util.LogErrorf("read asset [%s] data failed: %s", absPath, err)
 			hash = fmt.Sprintf("%x", sha256.Sum256([]byte(gulu.Rand.String(7))))
-		} else {
-			hash = fmt.Sprintf("%x", sha256.Sum256(data))
 		}
 		name, _ := util.LastID(p)
 		asset := &Asset{

+ 8 - 10
kernel/sql/database.go

@@ -18,10 +18,8 @@ package sql
 
 import (
 	"bytes"
-	"crypto/sha256"
 	"database/sql"
 	"errors"
-	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
@@ -522,12 +520,12 @@ func buildSpanFromNode(n *ast.Node, tree *parse.Tree, rootID, boxID, p string) (
 		}
 
 		var hash string
+		var hashErr error
 		if lp := assetLocalPath(dest, boxLocalPath, docDirLocalPath); "" != lp {
 			if !gulu.File.IsDir(lp) {
-				if data, err := os.ReadFile(lp); nil != err {
-					util.LogErrorf("read asset [%s] data failed: %s", lp, err)
-				} else {
-					hash = fmt.Sprintf("%x", sha256.Sum256(data))
+				hash, hashErr = util.GetEtag(lp)
+				if nil != hashErr {
+					util.LogErrorf("calc asset [%s] hash failed: %s", lp, hashErr)
 				}
 			}
 		}
@@ -597,11 +595,11 @@ func buildSpanFromNode(n *ast.Node, tree *parse.Tree, rootID, boxID, p string) (
 
 		dest := string(src)
 		var hash string
+		var hashErr error
 		if lp := assetLocalPath(dest, boxLocalPath, docDirLocalPath); "" != lp {
-			if data, err := os.ReadFile(lp); nil != err {
-				util.LogErrorf("read asset [%s] data failed: %s", lp, err)
-			} else {
-				hash = fmt.Sprintf("%x", sha256.Sum256(data))
+			hash, hashErr = util.GetEtag(lp)
+			if nil != hashErr {
+				util.LogErrorf("calc asset [%s] hash failed: %s", lp, hashErr)
 			}
 		}
 

+ 88 - 0
kernel/util/etag.go

@@ -0,0 +1,88 @@
+// SiYuan - Build Your Eternal Digital Garden
+// Copyright (c) 2020-present, b3log.org
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+package util
+
+import (
+	"bytes"
+	"crypto/sha1"
+	"encoding/base64"
+	"io"
+	"os"
+)
+
+// 以下是七牛云 Hash 算法实现 https://github.com/qiniu/qetag/blob/master/qetag.go
+
+func GetEtagByHandle(f io.Reader, size int64) (etag string, err error) {
+	blockCnt := BlockCount(size)
+	sha1Buf := make([]byte, 0, 21)
+
+	if blockCnt <= 1 { // file size <= 4M
+		sha1Buf = append(sha1Buf, 0x16)
+		sha1Buf, err = CalSha1(sha1Buf, f)
+		if err != nil {
+			return
+		}
+	} else { // file size > 4M
+		sha1Buf = append(sha1Buf, 0x96)
+		sha1BlockBuf := make([]byte, 0, blockCnt*20)
+		for i := 0; i < blockCnt; i++ {
+			body := io.LimitReader(f, BLOCK_SIZE)
+			sha1BlockBuf, err = CalSha1(sha1BlockBuf, body)
+			if err != nil {
+				return
+			}
+		}
+		sha1Buf, _ = CalSha1(sha1Buf, bytes.NewReader(sha1BlockBuf))
+	}
+	etag = base64.URLEncoding.EncodeToString(sha1Buf)
+	return
+}
+
+func GetEtag(filename string) (etag string, err error) {
+	f, err := os.Open(filename)
+	if err != nil {
+		return
+	}
+	defer f.Close()
+
+	fi, err := f.Stat()
+	if err != nil {
+		return
+	}
+
+	etag, err = GetEtagByHandle(f, fi.Size())
+	return
+}
+
+const (
+	BLOCK_BITS = 22 // Indicate that the blocksize is 4M
+	BLOCK_SIZE = 1 << BLOCK_BITS
+)
+
+func BlockCount(fsize int64) int {
+	return int((fsize + (BLOCK_SIZE - 1)) >> BLOCK_BITS)
+}
+
+func CalSha1(b []byte, r io.Reader) ([]byte, error) {
+
+	h := sha1.New()
+	_, err := io.Copy(h, r)
+	if err != nil {
+		return nil, err
+	}
+	return h.Sum(b), nil
+}