Bläddra i källkod

More efficient annex content location resolution

See https://github.com/G-Node/libgin/issues/9
The NewAFile in libgin runs a search through the annex object store for
the file matching the key. Shelling out to git annex with the
'contentlocation' command is constant time since it determines the
location directly based on the key.
We could also use the hashmix functions from libgin for this.
Achilleas Koutsou 5 år sedan
förälder
incheckning
373e59deee
2 ändrade filer med 40 tillägg och 18 borttagningar
  1. 38 17
      internal/route/repo/repo_gin.go
  2. 2 1
      internal/route/repo/view.go

+ 38 - 17
internal/route/repo/repo_gin.go

@@ -5,36 +5,45 @@ import (
 	"bytes"
 	"bytes"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
 
 
 	"github.com/G-Node/git-module"
 	"github.com/G-Node/git-module"
 	"github.com/G-Node/gogs/internal/context"
 	"github.com/G-Node/gogs/internal/context"
 	"github.com/G-Node/gogs/internal/setting"
 	"github.com/G-Node/gogs/internal/setting"
 	"github.com/G-Node/gogs/internal/tool"
 	"github.com/G-Node/gogs/internal/tool"
 	"github.com/G-Node/libgin/libgin"
 	"github.com/G-Node/libgin/libgin"
-	"github.com/G-Node/libgin/libgin/annex"
 	"github.com/go-macaron/captcha"
 	"github.com/go-macaron/captcha"
 	log "gopkg.in/clog.v1"
 	log "gopkg.in/clog.v1"
 	"gopkg.in/yaml.v2"
 	"gopkg.in/yaml.v2"
 )
 )
 
 
 func serveAnnexedData(ctx *context.Context, name string, cpt *captcha.Captcha, buf []byte) error {
 func serveAnnexedData(ctx *context.Context, name string, cpt *captcha.Captcha, buf []byte) error {
-	annexFile, err := annex.NewAFile(ctx.Repo.Repository.RepoPath(), "annex", name, buf)
+	keyparts := strings.Split(strings.TrimSpace(string(buf)), "/")
+	key := keyparts[len(keyparts)-1]
+	contentPath, err := git.NewCommand("annex", "contentlocation", key).RunInDir(ctx.Repo.Repository.RepoPath())
 	if err != nil {
 	if err != nil {
+		log.Error(2, "Failed to find content location for file %q with key %q", name, key)
 		return err
 		return err
 	}
 	}
-	if cpt != nil && annexFile.Info.Size() > annex.MEGABYTE*setting.Repository.RawCaptchaMinFileSize && !cpt.VerifyReq(ctx.Req) &&
-		!ctx.IsLogged {
-		ctx.Data["EnableCaptcha"] = true
-		ctx.HTML(200, "repo/download")
-		return nil
-	}
-	annexfp, err := annexFile.Open()
+	// always trim space from output for git command
+	contentPath = strings.TrimSpace(contentPath)
+	return serveAnnexedKey(ctx, name, contentPath)
+}
+
+func serveAnnexedKey(ctx *context.Context, name string, contentPath string) error {
+	annexfp, err := os.Open(filepath.Join(ctx.Repo.Repository.RepoPath(), contentPath))
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
 	defer annexfp.Close()
 	defer annexfp.Close()
 	annexReader := bufio.NewReader(annexfp)
 	annexReader := bufio.NewReader(annexfp)
-	buf, _ = annexReader.Peek(1024)
+
+	buf, _ := annexReader.Peek(1024)
+
+	// TODO: Add size to header
 
 
 	if !tool.IsTextFile(buf) {
 	if !tool.IsTextFile(buf) {
 		if !tool.IsImageFile(buf) {
 		if !tool.IsImageFile(buf) {
@@ -45,6 +54,8 @@ func serveAnnexedData(ctx *context.Context, name string, cpt *captcha.Captcha, b
 		ctx.Resp.Header().Set("Content-Type", "text/plain; charset=utf-8")
 		ctx.Resp.Header().Set("Content-Type", "text/plain; charset=utf-8")
 	}
 	}
 
 
+	// TODO: Skip if request method is HEAD
+	log.Trace("Serving annex content for %q: %q", name, contentPath)
 	_, err = io.Copy(ctx.Resp, annexReader)
 	_, err = io.Copy(ctx.Resp, annexReader)
 	return err
 	return err
 }
 }
@@ -103,25 +114,35 @@ func resolveAnnexedContent(c *context.Context, buf []byte, dataRc io.Reader) ([]
 		// not an annex pointer file; return as is
 		// not an annex pointer file; return as is
 		return buf, dataRc, nil
 		return buf, dataRc, nil
 	}
 	}
-	log.Trace("Annexed file requested: Resolving content for [%s]", bytes.TrimSpace(buf))
-	af, err := annex.NewAFile(c.Repo.Repository.RepoPath(), "annex", "", buf)
+	log.Trace("Annexed file requested: Resolving content for %q", bytes.TrimSpace(buf))
+
+	keyparts := strings.Split(strings.TrimSpace(string(buf)), "/")
+	key := keyparts[len(keyparts)-1]
+	contentPath, err := git.NewCommand("annex", "contentlocation", key).RunInDir(c.Repo.Repository.RepoPath())
 	if err != nil {
 	if err != nil {
-		log.Trace("Could not get annex file: %v", err)
+		log.Error(2, "Failed to find content location for key %q", key)
 		c.Data["IsAnnexedFile"] = true
 		c.Data["IsAnnexedFile"] = true
 		return buf, dataRc, err
 		return buf, dataRc, err
 	}
 	}
-
-	afp, err := af.Open()
+	// always trim space from output for git command
+	contentPath = strings.TrimSpace(contentPath)
+	afp, err := os.Open(filepath.Join(c.Repo.Repository.RepoPath(), contentPath))
 	if err != nil {
 	if err != nil {
 		log.Trace("Could not open annex file: %v", err)
 		log.Trace("Could not open annex file: %v", err)
 		c.Data["IsAnnexedFile"] = true
 		c.Data["IsAnnexedFile"] = true
 		return buf, dataRc, err
 		return buf, dataRc, err
 	}
 	}
+	info, err := afp.Stat()
+	if err != nil {
+		log.Trace("Could not stat annex file: %v", err)
+		c.Data["IsAnnexedFile"] = true
+		return buf, dataRc, err
+	}
 	annexDataReader := bufio.NewReader(afp)
 	annexDataReader := bufio.NewReader(afp)
 	annexBuf := make([]byte, 1024)
 	annexBuf := make([]byte, 1024)
 	n, _ := annexDataReader.Read(annexBuf)
 	n, _ := annexDataReader.Read(annexBuf)
 	annexBuf = annexBuf[:n]
 	annexBuf = annexBuf[:n]
-	c.Data["FileSize"] = af.Info.Size()
-	log.Trace("Annexed file size: %d B", af.Info.Size())
+	c.Data["FileSize"] = info.Size()
+	log.Trace("Annexed file size: %d B", info.Size())
 	return annexBuf, annexDataReader, nil
 	return annexBuf, annexDataReader, nil
 }
 }

+ 2 - 1
internal/route/repo/view.go

@@ -155,7 +155,8 @@ func renderFile(c *context.Context, entry *git.TreeEntry, treeLink, rawLink stri
 	n, _ := dataRc.Read(buf)
 	n, _ := dataRc.Read(buf)
 	buf = buf[:n]
 	buf = buf[:n]
 
 
-	// GIN mod: Replace existing buf and reader with annexed content buf and reader
+	// GIN mod: Replace existing buf and reader with annexed content buf and
+	// reader (only if it's an annexed ptr file)
 	buf, dataRc, err = resolveAnnexedContent(c, buf, dataRc)
 	buf, dataRc, err = resolveAnnexedContent(c, buf, dataRc)
 	if err != nil {
 	if err != nil {
 		return
 		return