13 "github.com/git-lfs/git-lfs/errors"
14 "github.com/rubyist/tracerx"
17 // ScanningMode is a constant type that allows for variation in the range of
18 // commits to scan when given to the `*git.RevListScanner` type.
22 // ScanRefsMode will scan between two refspecs.
23 ScanRefsMode ScanningMode = iota
24 // ScanAllMode will scan all history.
26 // ScanLeftToRemoteMode will scan the difference between any included
27 // SHA1s and a remote tracking ref.
31 // RevListOrder is a constant type that allows for variation in the ordering of
32 // revisions given by the *RevListScanner below.
36 // DefaultRevListOrder is the zero-value for this type and yields the
37 // results as given by git-rev-list(1) without any `--<t>-order`
38 // argument given. By default: reverse chronological order.
39 DefaultRevListOrder RevListOrder = iota
40 // DateRevListOrder gives the revisions such that no parents are shown
41 // before children, and otherwise in commit timestamp order.
43 // AuthorDateRevListOrder gives the revisions such that no parents are
44 // shown before children, and otherwise in author date timestamp order.
45 AuthorDateRevListOrder
46 // TopoRevListOrder gives the revisions such that they appear in
51 // Flag returns the command-line flag to be passed to git-rev-list(1) in order
52 // to order the output according to the given RevListOrder. It returns both the
53 // flag ("--date-order", "--topo-order", etc) and a bool, whether or not to
54 // append the flag (for instance, DefaultRevListOrder requires no flag).
56 // Given a type other than those defined above, Flag() will panic().
57 func (o RevListOrder) Flag() (string, bool) {
59 case DefaultRevListOrder:
61 case DateRevListOrder:
62 return "--date-order", true
63 case AuthorDateRevListOrder:
64 return "--author-date-order", true
65 case TopoRevListOrder:
66 return "--topo-order", true
68 panic(fmt.Sprintf("git/rev_list_scanner: unknown RevListOrder %d", o))
72 // ScanRefsOptions is an "options" type that is used to configure a scan
73 // operation on the `*git.RevListScanner` instance when given to the function
74 // `NewRevListScanner()`.
75 type ScanRefsOptions struct {
76 // Mode is the scan mode to apply, see above.
78 // Remote is the current remote to scan against, if using
79 // ScanLeftToRemoveMode.
81 // SkipDeletedBlobs specifies whether or not to traverse into commit
82 // ancestry (revealing potentially deleted (unreferenced) blobs, trees,
85 // Order specifies the order in which revisions are yielded from the
86 // output of `git-rev-list(1)`. For more information, see the above
87 // documentation on the RevListOrder type.
89 // CommitsOnly specifies whether or not the *RevListScanner should
90 // return only commits, or all objects in range by performing a
91 // traversal of the graph. By default, false: show all objects.
93 // WorkingDir specifies the working directory in which to run
94 // git-rev-list(1). If this is an empty string, (has len(WorkingDir) ==
95 // 0), it is equivalent to running in os.Getwd().
97 // Reverse specifies whether or not to give the revisions in reverse
101 // SkippedRefs provides a list of refs to ignore.
103 // Mutex guards names.
105 // Names maps Git object IDs (encoded as hex using
106 // hex.EncodeString()) to their names, i.e., a directory name
107 // (fully-qualified) for trees, or a pathspec for blob tree entries.
108 Names map[string]string
111 // GetName returns the name associated with a given blob/tree sha and "true" if
112 // it exists, or ("", false) if it doesn't.
114 // GetName is guarded by a use of o.Mutex, and is goroutine safe.
115 func (o *ScanRefsOptions) GetName(sha string) (string, bool) {
117 defer o.Mutex.Unlock()
119 name, ok := o.Names[sha]
123 // SetName sets the name associated with a given blob/tree sha.
125 // SetName is guarded by a use of o.Mutex, and is therefore goroutine safe.
126 func (o *ScanRefsOptions) SetName(sha, name string) {
128 defer o.Mutex.Unlock()
133 // RevListScanner is a Scanner type that parses through results of the `git
134 // rev-list` command.
135 type RevListScanner struct {
136 // s is a buffered scanner feeding from the output (stdout) of
137 // git-rev-list(1) invocation.
139 // closeFn is an optional type returning an error yielded by closing any
140 // resources held by an open (running) instance of the *RevListScanner
144 // name is the name of the most recently read object.
146 // oid is the oid of the most recently read object.
148 // err is the most recently encountered error.
153 // ambiguousRegex is a regular expression matching the output of stderr
154 // when ambiguous refnames are encountered.
155 ambiguousRegex = regexp.MustCompile(`warning: refname (.*) is ambiguous`)
157 // z40 is a regular expression matching the empty blob/commit/tree
158 // SHA: "0000000000000000000000000000000000000000".
159 z40 = regexp.MustCompile(`\^?0{40}`)
162 // NewRevListScanner instantiates a new RevListScanner instance scanning all
163 // revisions reachable by refs contained in "include" and not reachable by any
164 // refs included in "excluded", using the *ScanRefsOptions "opt" configuration.
166 // It returns a new *RevListScanner instance, or an error if one was
167 // encountered. Upon returning, the `git-rev-list(1)` instance is already
168 // running, and Scan() may be called immediately.
169 func NewRevListScanner(include, excluded []string, opt *ScanRefsOptions) (*RevListScanner, error) {
170 stdin, args, err := revListArgs(include, excluded, opt)
175 cmd := gitNoLFS(args...).Cmd
176 if len(opt.WorkingDir) > 0 {
177 cmd.Dir = opt.WorkingDir
181 stdout, err := cmd.StdoutPipe()
185 stderr, err := cmd.StderrPipe()
190 tracerx.Printf("run_command: git %s", strings.Join(args, " "))
191 if err := cmd.Start(); err != nil {
195 return &RevListScanner{
196 s: bufio.NewScanner(stdout),
197 closeFn: func() error {
198 msg, _ := ioutil.ReadAll(stderr)
200 // First check if there was a non-zero exit code given
201 // when Wait()-ing on the command execution.
202 if err := cmd.Wait(); err != nil {
203 return errors.Errorf("Error in git %s: %v %s",
204 strings.Join(args, " "), err, msg)
207 // If the command exited cleanly, but found an ambiguous
208 // refname, promote that to an error and return it.
210 // `git-rev-list(1)` does not treat ambiguous refnames
211 // as fatal (non-zero exit status), but we do.
212 if am := ambiguousRegex.FindSubmatch(msg); len(am) > 1 {
213 return errors.Errorf("ref %s is ambiguous", am[1])
220 // revListArgs returns the arguments for a given included and excluded set of
221 // SHA1s, and ScanRefsOptions instance.
223 // In order, it returns the contents of stdin as an io.Reader, the args passed
224 // to git as a []string, and any error encountered in generating those if one
226 func revListArgs(include, exclude []string, opt *ScanRefsOptions) (io.Reader, []string, error) {
228 args := []string{"rev-list", "--stdin"}
229 if !opt.CommitsOnly {
230 args = append(args, "--objects")
234 args = append(args, "--reverse")
237 if orderFlag, ok := opt.Order.Flag(); ok {
238 args = append(args, orderFlag)
243 if opt.SkipDeletedBlobs {
244 args = append(args, "--no-walk")
246 args = append(args, "--do-walk")
249 stdin = strings.NewReader(strings.Join(
250 includeExcludeShas(include, exclude), "\n"))
252 args = append(args, "--all")
253 case ScanLeftToRemoteMode:
254 if len(opt.SkippedRefs) == 0 {
255 args = append(args, "--not", "--remotes="+opt.Remote)
256 stdin = strings.NewReader(strings.Join(
257 includeExcludeShas(include, exclude), "\n"))
259 stdin = strings.NewReader(strings.Join(
260 append(includeExcludeShas(include, exclude), opt.SkippedRefs...), "\n"),
264 return nil, nil, errors.Errorf("unknown scan type: %d", opt.Mode)
266 return stdin, append(args, "--"), nil
269 func includeExcludeShas(include, exclude []string) []string {
270 include = nonZeroShas(include)
271 exclude = nonZeroShas(exclude)
273 args := make([]string, 0, len(include)+len(exclude))
275 for _, i := range include {
276 args = append(args, i)
279 for _, x := range exclude {
280 args = append(args, fmt.Sprintf("^%s", x))
286 func nonZeroShas(all []string) []string {
287 nz := make([]string, 0, len(all))
289 for _, sha := range all {
290 if len(sha) > 0 && !z40.MatchString(sha) {
297 // Name is an optional field that gives the name of the object (if the object is
300 // It can be called before or after Scan(), but will return "" if called
302 func (s *RevListScanner) Name() string { return s.name }
304 // OID is the hex-decoded bytes of the object's ID.
306 // It can be called before or after Scan(), but will return "" if called
308 func (s *RevListScanner) OID() []byte { return s.oid }
310 // Err returns the last encountered error (or nil) after a call to Scan().
312 // It SHOULD be called, checked and handled after a call to Scan().
313 func (s *RevListScanner) Err() error { return s.err }
315 // Scan scans the next entry given by git-rev-list(1), and returns true/false
316 // indicating if there are more results to scan.
317 func (s *RevListScanner) Scan() bool {
319 s.oid, s.name, err = s.scan()
327 return len(s.oid) > 0
330 // Close closes the RevListScanner by freeing any resources held by the
331 // instance while running, and returns any error encountered while doing so.
332 func (s *RevListScanner) Close() error {
333 if s.closeFn == nil {
339 // scan provides the internal implementation of scanning a line of text from the
340 // output of `git-rev-list(1)`.
341 func (s *RevListScanner) scan() ([]byte, string, error) {
343 return nil, "", s.s.Err()
346 line := strings.TrimSpace(s.s.Text())
351 sha1, err := hex.DecodeString(line[:40])
361 return sha1, name, nil