Browse Source

ENH: route to .git/{config,annex/objects} and allow HEAD requests

This allows to access public git-annex repos via http without requiring
going through SSH or some other fanciness (e.g. special remote).

Notes:

- I have no clue in Go, or in any of the frameworks used here, so my
  solution could be very suboptimal.

- URL should end in .git/ since stored repos are bare

- I have not tested if somehow this errorneously enables access to
  annexed files in private repos (I would asume not)

- I have tested only 'get' and 'drop' functionality on a basic example
  running a local instance of gin/gogs.  I had to set

  annex.security.allowed-http-addresses=all

  config since it was running from localhost

Actual minor TODOs left for someone to finish up

- [ ] I think that there is a bit more to be done to support resuming downloads
  That seems to be revealed also by running annex testremote on it:

	$> git annex testremote origin --test-readonly 1.mp3
	testremote origin Remote Tests
	  dropping from http remote not supported
	  unavailable remote
		removeKey:                       OK
		storeKey:                        FAIL
		  ./Command/TestRemote.hs:276:
		  (got: Left "copying to non-ssh repo not supported")
		checkPresent:                    OK
		retrieveKeyFile:                 download failed: ConnectionFailure Network.Socket.getAddrInfo (called with preferred socket type/protocol: AddrInfo {addrFlags = [AI_ADDRCONFIG], addrFamily = AF_UNSPEC, addrSocketType = Stream, addrProtocol = 0, addrAddress = <assumed to be undefined>, addrCanonName = <assumed to be undefined>}, host name: Just "!dne!", service name: Just "3000"): does not exist (Name or service not known)
	download failed: ConnectionFailure Network.Socket.getAddrInfo (called with preferred socket type/protocol: AddrInfo {addrFlags = [AI_ADDRCONFIG], addrFamily = AF_UNSPEC, addrSocketType = Stream, addrProtocol = 0, addrAddress = <assumed to be undefined>, addrCanonName = <assumed to be undefined>}, host name: Just "!dne!", service name: Just "3000"): does not exist (Name or service not known)
	OK
		retrieveKeyFileCheap:            OK
	  key size Just 6951961; NoChunks; encryption none
		present True:                    OK (0.03s)
		retrieveKeyFile:                 OK (0.11s)
		fsck downloaded object:          OK
		retrieveKeyFile resume from 33%: OK (0.07s)
		fsck downloaded object:          OK
		retrieveKeyFile resume from 0:   OK (0.09s)
		fsck downloaded object:          OK
		retrieveKeyFile resume from end: download failed: ResponseBodyTooShort 6951961 33
	download failed: Not Found
	FAIL (0.04s)
		  ./Command/TestRemote.hs:198:
		  failed
		fsck downloaded object:          OK

	2 out of 14 tests failed (0.37s)

- [ ] getTextFile is probably not kosher for binary files. will improve in the
      next commit

that 2nd failure is the one hinting on inability to retreave if previous one
was interrupted (running datalad clean or just removing those
.git/annex/transfer and should mitigate
Yaroslav Halchenko 5 năm trước cách đây
mục cha
commit
ef4efec6f2
2 tập tin đã thay đổi với 20 bổ sung5 xóa
  1. 2 2
      internal/cmd/web.go
  2. 18 3
      internal/route/repo/http.go

+ 2 - 2
internal/cmd/web.go

@@ -666,10 +666,10 @@ func runWeb(c *cli.Context) error {
 		m.Group("/:reponame([\\d\\w-_\\.]+\\.git$)", func() {
 			m.Get("", ignSignIn, context.RepoAssignment(), context.RepoRef(), repo.Home)
 			m.Options("/*", ignSignInAndCsrf, repo.HTTPContexter(), repo.HTTP)
-			m.Route("/*", "GET,POST", ignSignInAndCsrf, repo.HTTPContexter(), repo.HTTP)
+			m.Route("/*", "GET,POST,HEAD", ignSignInAndCsrf, repo.HTTPContexter(), repo.HTTP)
 		})
 		m.Options("/:reponame/*", ignSignInAndCsrf, repo.HTTPContexter(), repo.HTTP)
-		m.Route("/:reponame/*", "GET,POST", ignSignInAndCsrf, repo.HTTPContexter(), repo.HTTP)
+		m.Route("/:reponame/*", "GET,POST,HEAD", ignSignInAndCsrf, repo.HTTPContexter(), repo.HTTP)
 	})
 	// ***** END: Repository *****
 

+ 18 - 3
internal/route/repo/http.go

@@ -62,7 +62,7 @@ func HTTPContexter() macaron.Handler {
 
 		isPull := c.Query("service") == "git-upload-pack" ||
 			strings.HasSuffix(c.Req.URL.Path, "git-upload-pack") ||
-			c.Req.Method == "GET"
+			c.Req.Method == "GET" || c.Req.Method == "HEAD"
 
 		owner, err := db.GetUserByName(ownerName)
 		if err != nil {
@@ -361,6 +361,12 @@ var routes = []struct {
 	{regexp.MustCompile("(.*?)/objects/[0-9a-f]{2}/[0-9a-f]{38}$"), "GET", getLooseObject},
 	{regexp.MustCompile("(.*?)/objects/pack/pack-[0-9a-f]{40}\\.pack$"), "GET", getPackFile},
 	{regexp.MustCompile("(.*?)/objects/pack/pack-[0-9a-f]{40}\\.idx$"), "GET", getIdxFile},
+	// files neeeded for git-annex access to the repository over http
+	{regexp.MustCompile("(.*?)/config$"), "GET", getTextFile},
+	// TODO: we probably just need to provide some getBinaryFile
+	// Note: code below treats HEAD (used by git annex drop to sense presence)
+	// as "GET" for the purpose of allowing or not the route
+	{regexp.MustCompile("(.*?)/annex/objects/.*/.*-s[0-9]*--.*"), "GET", getTextFile},
 }
 
 func getGitRepoPath(dir string) (string, error) {
@@ -377,8 +383,14 @@ func getGitRepoPath(dir string) (string, error) {
 }
 
 func HTTP(c *HTTPContext) {
+    var reqPath string
 	for _, route := range routes {
-		reqPath := strings.ToLower(c.Req.URL.Path)
+		// Annex keys are case sensitive, so they must not be lower cased
+		if strings.Contains(c.Req.URL.Path, "/annex/objects/") {
+			reqPath = c.Req.URL.Path
+		} else {
+			reqPath = strings.ToLower(c.Req.URL.Path)
+		}
 		m := route.reg.FindStringSubmatch(reqPath)
 		if m == nil {
 			continue
@@ -392,7 +404,10 @@ func HTTP(c *HTTPContext) {
 			return
 		}
 
-		if route.method != c.Req.Method {
+		req_method := c.Req.Method
+		// Treat HEAD (used by e.g. git annex drop) as GET
+		if req_method == "HEAD" { req_method = "GET" }
+		if route.method != req_method {
 			c.NotFound()
 			return
 		}