#include "blame.h"
#include "alloc.h"
#include "commit-slab.h"
+#include "bloom.h"
+#include "commit-graph.h"
define_commit_slab(blame_suspects, struct blame_origin *);
static struct blame_suspects blame_suspects;
static void free_fingerprint(struct fingerprint *f)
{
- hashmap_free(&f->map);
+ hashmap_clear(&f->map);
free(f->entries);
}
for (ent = sb->ent; ent && (next = ent->next); ent = next) {
if (ent->suspect == next->suspect &&
ent->s_lno + ent->num_lines == next->s_lno &&
+ ent->lno + ent->num_lines == next->lno &&
ent->ignored == next->ignored &&
ent->unblamable == next->unblamable) {
ent->num_lines += next->num_lines;
return -1;
}
+struct blame_bloom_data {
+ /*
+ * Changed-path Bloom filter keys. These can help prevent
+ * computing diffs against first parents, but we need to
+ * expand the list as code is moved or files are renamed.
+ */
+ struct bloom_filter_settings *settings;
+ struct bloom_key **keys;
+ int nr;
+ int alloc;
+};
+
+static int bloom_count_queries = 0;
+static int bloom_count_no = 0;
+static int maybe_changed_path(struct repository *r,
+ struct blame_origin *origin,
+ struct blame_bloom_data *bd)
+{
+ int i;
+ struct bloom_filter *filter;
+
+ if (!bd)
+ return 1;
+
+ if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY)
+ return 1;
+
+ filter = get_bloom_filter(r, origin->commit);
+
+ if (!filter)
+ return 1;
+
+ bloom_count_queries++;
+ for (i = 0; i < bd->nr; i++) {
+ if (bloom_filter_contains(filter,
+ bd->keys[i],
+ bd->settings))
+ return 1;
+ }
+
+ bloom_count_no++;
+ return 0;
+}
+
+static void add_bloom_key(struct blame_bloom_data *bd,
+ const char *path)
+{
+ if (!bd)
+ return;
+
+ if (bd->nr >= bd->alloc) {
+ bd->alloc *= 2;
+ REALLOC_ARRAY(bd->keys, bd->alloc);
+ }
+
+ bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
+ fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
+ bd->nr++;
+}
+
/*
* We have an origin -- check if the same path exists in the
* parent and return an origin structure to represent it.
*/
static struct blame_origin *find_origin(struct repository *r,
struct commit *parent,
- struct blame_origin *origin)
+ struct blame_origin *origin,
+ struct blame_bloom_data *bd)
{
struct blame_origin *porigin;
struct diff_options diff_opts;
if (is_null_oid(&origin->commit->object.oid))
do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
- else
- diff_tree_oid(get_commit_tree_oid(parent),
- get_commit_tree_oid(origin->commit),
- "", &diff_opts);
+ else {
+ int compute_diff = 1;
+ if (origin->commit->parents &&
+ oideq(&parent->object.oid,
+ &origin->commit->parents->item->object.oid))
+ compute_diff = maybe_changed_path(r, origin, bd);
+
+ if (compute_diff)
+ diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(origin->commit),
+ "", &diff_opts);
+ }
diffcore_std(&diff_opts);
if (!diff_queued_diff.nr) {
*/
static struct blame_origin *find_rename(struct repository *r,
struct commit *parent,
- struct blame_origin *origin)
+ struct blame_origin *origin,
+ struct blame_bloom_data *bd)
{
struct blame_origin *porigin = NULL;
struct diff_options diff_opts;
struct diff_filepair *p = diff_queued_diff.queue[i];
if ((p->status == 'R' || p->status == 'C') &&
!strcmp(p->two->path, origin->path)) {
+ add_bloom_key(bd, p->one->path);
porigin = get_origin(parent, p->one->path);
oidcpy(&porigin->blob_oid, &p->one->oid);
porigin->mode = p->one->mode;
#define MAXSG 16
+typedef struct blame_origin *(*blame_find_alg)(struct repository *,
+ struct commit *,
+ struct blame_origin *,
+ struct blame_bloom_data *);
+
static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
{
struct rev_info *revs = sb->revs;
* common cases, then we look for renames in the second pass.
*/
for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
- struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *);
- find = pass ? find_rename : find_origin;
+ blame_find_alg find = pass ? find_rename : find_origin;
for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
i < num_sg && sg;
continue;
if (parse_commit(p))
continue;
- porigin = find(sb->repo, p, origin);
+ porigin = find(sb->repo, p, origin, sb->bloom_data);
if (!porigin)
continue;
if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
if (obj->flags & UNINTERESTING)
continue;
obj = deref_tag(revs->repo, obj, NULL, 0);
- if (obj->type != OBJ_COMMIT)
+ if (!obj || obj->type != OBJ_COMMIT)
die("Non commit %s?", revs->pending.objects[i].name);
if (found)
die("More than one commit to dig from %s and %s?",
/* Is that sole rev a committish? */
obj = revs->pending.objects[0].item;
obj = deref_tag(revs->repo, obj, NULL, 0);
- if (obj->type != OBJ_COMMIT)
+ if (!obj || obj->type != OBJ_COMMIT)
return NULL;
/* Do we have HEAD? */
if (!(obj->flags & UNINTERESTING))
continue;
obj = deref_tag(revs->repo, obj, NULL, 0);
- if (obj->type != OBJ_COMMIT)
+ if (!obj || obj->type != OBJ_COMMIT)
die("Non commit %s?", revs->pending.objects[i].name);
if (found)
die("More than one commit to dig up from, %s and %s?",
}
void setup_scoreboard(struct blame_scoreboard *sb,
- const char *path,
struct blame_origin **orig)
{
const char *final_commit_name = NULL;
setup_work_tree();
sb->final = fake_working_tree_commit(sb->repo,
&sb->revs->diffopt,
- path, sb->contents_from);
+ sb->path, sb->contents_from);
add_pending_object(sb->revs, &(sb->final->object), ":");
}
sb->final_buf_size = o->file.size;
}
else {
- o = get_origin(sb->final, path);
+ o = get_origin(sb->final, sb->path);
if (fill_blob_sha1_and_mode(sb->repo, o))
- die(_("no such path %s in %s"), path, final_commit_name);
+ die(_("no such path %s in %s"), sb->path, final_commit_name);
if (sb->revs->diffopt.flags.allow_textconv &&
- textconv_object(sb->repo, path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
+ textconv_object(sb->repo, sb->path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
&sb->final_buf_size))
;
else
if (!sb->final_buf)
die(_("cannot read blob %s for path %s"),
oid_to_hex(&o->blob_oid),
- path);
+ sb->path);
}
sb->num_read_blob++;
prepare_lines(sb);
blame_origin_incref(o);
return new_head;
}
+
+void setup_blame_bloom_data(struct blame_scoreboard *sb)
+{
+ struct blame_bloom_data *bd;
+ struct bloom_filter_settings *bs;
+
+ if (!sb->repo->objects->commit_graph)
+ return;
+
+ bs = get_bloom_filter_settings(sb->repo);
+ if (!bs)
+ return;
+
+ bd = xmalloc(sizeof(struct blame_bloom_data));
+
+ bd->settings = bs;
+
+ bd->alloc = 4;
+ bd->nr = 0;
+ ALLOC_ARRAY(bd->keys, bd->alloc);
+
+ add_bloom_key(bd, sb->path);
+
+ sb->bloom_data = bd;
+}
+
+void cleanup_scoreboard(struct blame_scoreboard *sb)
+{
+ if (sb->bloom_data) {
+ int i;
+ for (i = 0; i < sb->bloom_data->nr; i++) {
+ free(sb->bloom_data->keys[i]->hashes);
+ free(sb->bloom_data->keys[i]);
+ }
+ free(sb->bloom_data->keys);
+ FREE_AND_NULL(sb->bloom_data);
+
+ trace2_data_intmax("blame", sb->repo,
+ "bloom/queries", bloom_count_queries);
+ trace2_data_intmax("blame", sb->repo,
+ "bloom/response-no", bloom_count_no);
+ }
+}