12 "github.com/git-lfs/git-lfs/filepathfilter"
13 "github.com/git-lfs/git-lfs/git"
16 // An entry from ls-tree or rev-list including a blob sha and tree path
17 type TreeBlob struct {
22 func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter) error {
23 // We don't use the nameMap approach here since that's imprecise when >1 file
24 // can be using the same content
25 treeShas, err := lsTreeBlobs(ref, filter)
30 pcw, err := catFileBatchTree(treeShas)
35 for p := range pcw.Results {
39 if err := pcw.Wait(); err != nil {
45 // catFileBatchTree uses git cat-file --batch to get the object contents
46 // of a git object, given its sha1. The contents will be decoded into
47 // a Git LFS pointer. treeblobs is a channel over which blob entries
48 // will be sent. It returns a channel from which point.Pointers can be read.
49 func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) {
50 scanner, err := NewPointerScanner()
57 pointers := make(chan *WrappedPointer, chanBufSize)
58 errchan := make(chan error, 10) // Multiple errors possible
61 for t := range treeblobs.Results {
62 hasNext := scanner.Scan(t.Sha1)
63 if p := scanner.Pointer(); p != nil {
68 if err := scanner.Err(); err != nil {
77 // Deal with nested error from incoming treeblobs
78 err := treeblobs.Wait()
83 if err = scanner.Close(); err != nil {
91 return NewPointerChannelWrapper(pointers, errchan), nil
94 // Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
95 // The returned channel will be sent these blobs which should be sent to catFileBatchTree
96 // for final check & conversion to Pointer
97 func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWrapper, error) {
98 cmd, err := git.LsTree(ref)
105 blobs := make(chan TreeBlob, chanBufSize)
106 errchan := make(chan error, 1)
109 scanner := newLsTreeScanner(cmd.Stdout)
111 if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) {
116 stderr, _ := ioutil.ReadAll(cmd.Stderr)
119 errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr))
125 return NewTreeBlobChannelWrapper(blobs, errchan), nil
128 type lsTreeScanner struct {
133 func newLsTreeScanner(r io.Reader) *lsTreeScanner {
134 s := bufio.NewScanner(r)
135 s.Split(scanNullLines)
136 return &lsTreeScanner{s: s}
139 func (s *lsTreeScanner) TreeBlob() *TreeBlob {
143 func (s *lsTreeScanner) Err() error {
147 func (s *lsTreeScanner) Scan() bool {
148 t, hasNext := s.next()
153 func (s *lsTreeScanner) next() (*TreeBlob, bool) {
154 hasNext := s.s.Scan()
156 parts := strings.SplitN(line, "\t", 2)
161 attrs := strings.SplitN(parts[0], " ", 4)
166 if attrs[1] != "blob" {
170 sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
175 if sz < blobSizeCutoff {
178 return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext
183 func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
184 if atEOF && len(data) == 0 {
188 if i := bytes.IndexByte(data, '\000'); i >= 0 {
189 // We have a full null-terminated line.
190 return i + 1, data[0:i], nil
193 // If we're at EOF, we have a final, non-terminated line. Return it.
195 return len(data), data, nil
198 // Request more data.