pack.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. package gig
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "os"
  7. "strings"
  8. )
  9. // Resources:
  10. // https://github.com/git/git/blob/master/Documentation/technical/pack-format.txt
  11. // http://schacon.github.io/gitbook/7_the_packfile.html
  12. //PackHeader stores version and number of objects in the packfile
  13. // all data is in network-byte order (big-endian)
  14. type PackHeader struct {
  15. Sig [4]byte
  16. Version uint32
  17. Objects uint32
  18. }
  19. //FanOut table where the "N-th entry of this table records the
  20. // number of objects in the corresponding pack, the first
  21. // byte of whose object name is less than or equal to N.
  22. type FanOut [256]uint32
  23. //Bounds returns the how many objects whose first byte
  24. //has a value of b-1 (in s) and b (returned in e)
  25. //are contained in the fanout table
  26. func (fo FanOut) Bounds(b byte) (s, e int) {
  27. e = int(fo[b])
  28. if b > 0 {
  29. s = int(fo[b-1])
  30. }
  31. return
  32. }
  33. //PackIndex represents the git pack file
  34. //index. It is the main object to use for
  35. //opening objects contained in packfiles
  36. //vai OpenObject
  37. type PackIndex struct {
  38. *os.File
  39. Version uint32
  40. FO FanOut
  41. shaBase int64
  42. }
  43. //PackFile is git pack file with the actual
  44. //data in it. It should normally not be used
  45. //directly.
  46. type PackFile struct {
  47. *os.File
  48. Version uint32
  49. ObjCount uint32
  50. }
  51. //PackIndexOpen opens the git pack file with the given
  52. //path. The ".idx" if missing will be appended.
  53. func PackIndexOpen(path string) (*PackIndex, error) {
  54. if !strings.HasSuffix(path, ".idx") {
  55. path += ".idx"
  56. }
  57. fd, err := os.Open(path)
  58. if err != nil {
  59. return nil, fmt.Errorf("git: could not read pack index: %v", err)
  60. }
  61. idx := &PackIndex{File: fd, Version: 1}
  62. var peek [4]byte
  63. err = binary.Read(fd, binary.BigEndian, &peek)
  64. if err != nil {
  65. fd.Close()
  66. return nil, fmt.Errorf("git: could not read pack index: %v", err)
  67. }
  68. if bytes.Equal(peek[:], []byte("\377tOc")) {
  69. binary.Read(fd, binary.BigEndian, &idx.Version)
  70. }
  71. if idx.Version == 1 {
  72. _, err = idx.Seek(0, 0)
  73. if err != nil {
  74. fd.Close()
  75. return nil, fmt.Errorf("git: io error: %v", err)
  76. }
  77. } else if idx.Version > 2 {
  78. fd.Close()
  79. return nil, fmt.Errorf("git: unsupported pack index version: %d", idx.Version)
  80. }
  81. err = binary.Read(idx, binary.BigEndian, &idx.FO)
  82. if err != nil {
  83. idx.Close()
  84. return nil, fmt.Errorf("git: io error: %v", err)
  85. }
  86. idx.shaBase = int64((idx.Version-1)*8) + int64(binary.Size(idx.FO))
  87. return idx, nil
  88. }
  89. //ReadSHA1 reads the SHA1 stared at position pos (in the FanOut table).
  90. func (pi *PackIndex) ReadSHA1(chksum *SHA1, pos int) error {
  91. if version := pi.Version; version != 2 {
  92. return fmt.Errorf("git: v%d version support incomplete", version)
  93. }
  94. start := pi.shaBase
  95. _, err := pi.ReadAt(chksum[0:20], start+int64(pos)*int64(20))
  96. if err != nil {
  97. return err
  98. }
  99. return nil
  100. }
  101. //ReadOffset returns the offset in the pack file of the object
  102. //at position pos in the FanOut table.
  103. func (pi *PackIndex) ReadOffset(pos int) (int64, error) {
  104. if version := pi.Version; version != 2 {
  105. return -1, fmt.Errorf("git: v%d version incomplete", version)
  106. }
  107. //header[2*4] + FanOut[256*4] + n * (sha1[20]+crc[4])
  108. start := int64(2*4+256*4) + int64(pi.FO[255]*24) + int64(pos*4)
  109. var offset uint32
  110. _, err := pi.Seek(start, 0)
  111. if err != nil {
  112. return -1, fmt.Errorf("git: io error: %v", err)
  113. }
  114. err = binary.Read(pi, binary.BigEndian, &offset)
  115. if err != nil {
  116. return -1, err
  117. }
  118. //see if msb is set, if so this is an
  119. // offset into the 64b_offset table
  120. if val := uint32(1<<31) & offset; val != 0 {
  121. return -1, fmt.Errorf("git: > 31 bit offests not implemented. Meh")
  122. }
  123. return int64(offset), nil
  124. }
  125. func (pi *PackIndex) findSHA1(target SHA1) (int, error) {
  126. //s, e and midpoint are one-based indices,
  127. //where s is the index before interval and
  128. //e is the index of the last element in it
  129. //-> search interval is: (s | 1, 2, ... e]
  130. s, e := pi.FO.Bounds(target[0])
  131. //invariant: object is, if present, in the interval, (s, e]
  132. for s < e {
  133. midpoint := s + (e-s+1)/2
  134. var sha SHA1
  135. err := pi.ReadSHA1(&sha, midpoint-1)
  136. if err != nil {
  137. return 0, fmt.Errorf("git: io error: %v", err)
  138. }
  139. switch bytes.Compare(target[:], sha[:]) {
  140. case -1: // target < sha1, new interval (s, m-1]
  141. e = midpoint - 1
  142. case +1: //taget > sha1, new interval (m, e]
  143. s = midpoint
  144. default:
  145. return midpoint - 1, nil
  146. }
  147. }
  148. return 0, fmt.Errorf("git: sha1 not found in index")
  149. }
  150. //FindOffset tries to find object with the id target and if
  151. //if found returns the offset of the object in the pack file.
  152. //Returns an error that can be detected by os.IsNotExist if
  153. //the object could not be found.
  154. func (pi *PackIndex) FindOffset(target SHA1) (int64, error) {
  155. pos, err := pi.findSHA1(target)
  156. if err != nil {
  157. return 0, err
  158. }
  159. off, err := pi.ReadOffset(pos)
  160. if err != nil {
  161. return 0, err
  162. }
  163. return off, nil
  164. }
  165. //OpenPackFile opens the corresponding pack file.
  166. func (pi *PackIndex) OpenPackFile() (*PackFile, error) {
  167. f := pi.Name()
  168. pf, err := OpenPackFile(f[:len(f)-4] + ".pack")
  169. if err != nil {
  170. return nil, err
  171. }
  172. return pf, nil
  173. }
  174. //OpenObject will try to find the object with the given id
  175. //in it is index and then reach out to its corresponding
  176. //pack file to open the actual git Object.
  177. //If the object cannot be found it will return an error
  178. //the can be detected via os.IsNotExist()
  179. //Delta objects will returned as such and not be resolved.
  180. func (pi *PackIndex) OpenObject(id SHA1) (Object, error) {
  181. off, err := pi.FindOffset(id)
  182. if err != nil {
  183. return nil, err
  184. }
  185. pf, err := pi.OpenPackFile()
  186. if err != nil {
  187. return nil, err
  188. }
  189. obj, err := pf.readRawObject(off)
  190. if err != nil {
  191. return nil, err
  192. }
  193. if IsStandardObject(obj.otype) {
  194. return parseObject(obj)
  195. }
  196. if !IsDeltaObject(obj.otype) {
  197. return nil, fmt.Errorf("git: unsupported object")
  198. }
  199. //This is a delta object
  200. delta, err := parseDelta(obj)
  201. return delta, err
  202. }
  203. //OpenPackFile opens the git pack file at the given path
  204. //It will check the pack file header and version.
  205. //Currently only version 2 is supported.
  206. //NB: This is low-level API and should most likely
  207. //not be used directly.
  208. func OpenPackFile(path string) (*PackFile, error) {
  209. osfd, err := os.Open(path)
  210. if err != nil {
  211. return nil, err
  212. }
  213. var header PackHeader
  214. err = binary.Read(osfd, binary.BigEndian, &header)
  215. if err != nil {
  216. return nil, fmt.Errorf("git: could not read header: %v", err)
  217. }
  218. if string(header.Sig[:]) != "PACK" {
  219. return nil, fmt.Errorf("git: packfile signature error")
  220. }
  221. if header.Version != 2 {
  222. return nil, fmt.Errorf("git: unsupported packfile version")
  223. }
  224. fd := &PackFile{File: osfd,
  225. Version: header.Version,
  226. ObjCount: header.Objects}
  227. return fd, nil
  228. }
  229. func (pf *PackFile) readRawObject(offset int64) (gitObject, error) {
  230. r := newPackReader(pf, offset)
  231. b, err := r.ReadByte()
  232. if err != nil {
  233. return gitObject{}, fmt.Errorf("git: io error: %v", err)
  234. }
  235. //object header format:
  236. //[mxxx tttt] (byte)
  237. // tttt -> type [4 bit]
  238. otype := ObjectType((b & 0x70) >> 4)
  239. // xxx -> size [3 bit]
  240. size := int64(b & 0xF)
  241. // m -> 1, if size > 2^3 (n-byte encoding)
  242. if b&0x80 != 0 {
  243. s, err := readVarSize(r, 4)
  244. if err != nil {
  245. return gitObject{}, err
  246. }
  247. size += s
  248. }
  249. obj := gitObject{otype, size, r}
  250. if IsStandardObject(otype) {
  251. err = obj.wrapSourceWithDeflate()
  252. if err != nil {
  253. return gitObject{}, err
  254. }
  255. }
  256. return obj, nil
  257. }
  258. //OpenObject reads the git object header at offset and
  259. //then parses the data as the corresponding object type.
  260. func (pf *PackFile) OpenObject(offset int64) (Object, error) {
  261. obj, err := pf.readRawObject(offset)
  262. if err != nil {
  263. return nil, err
  264. }
  265. switch obj.otype {
  266. case ObjCommit:
  267. return parseCommit(obj)
  268. case ObjTree:
  269. return parseTree(obj)
  270. case ObjBlob:
  271. return parseBlob(obj)
  272. case ObjTag:
  273. return parseTag(obj)
  274. case ObjOFSDelta:
  275. fallthrough
  276. case ObjRefDelta:
  277. return parseDelta(obj)
  278. default:
  279. return nil, fmt.Errorf("git: unknown object type")
  280. }
  281. }
  282. type packReader struct {
  283. fd *PackFile
  284. start int64
  285. off int64
  286. }
  287. func newPackReader(fd *PackFile, offset int64) *packReader {
  288. return &packReader{fd: fd, start: offset, off: offset}
  289. }
  290. func (p *packReader) Read(d []byte) (n int, err error) {
  291. n, err = p.fd.ReadAt(d, p.off)
  292. p.off += int64(n)
  293. return
  294. }
  295. func (p *packReader) ReadByte() (c byte, err error) {
  296. var b [1]byte
  297. _, err = p.Read(b[:])
  298. c = b[0]
  299. return
  300. }
  301. func (p *packReader) Close() (err error) {
  302. return //noop
  303. }