2 * Copyright (c) 2009-2013, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
14 #include "repo_rpmdb.h"
15 #include "pool_fileconflicts.h"
21 Queue lookat; /* conflict candidates */
22 Queue lookat_dir; /* not yet conflicting directories */
26 unsigned int cflmapused;
30 unsigned int dirmapused;
35 unsigned int lastdiridx; /* last diridx we have seen */
36 unsigned int lastdirhash; /* strhash of last dir we have seen */
38 Id idx; /* index of package we're looking at */
39 Id hx; /* used in findfileconflicts2_cb, limit to files matching hx */
42 unsigned char *filesspace;
43 unsigned int filesspacen;
46 #define FILESSPACE_BLOCK 255
49 growhash(Hashtable map, Hashval *mapnp)
51 Hashval mapn = *mapnp;
52 Hashval newn = (mapn + 1) * 2 - 1;
57 m = solv_calloc(newn + 1, 2 * sizeof(Id));
58 for (i = 0; i <= mapn; i++)
70 h = HASHCHAIN_NEXT(h, hh, newn);
73 m[2 * h + 1] = map[2 * i + 1];
81 finddirs_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
83 struct cbdata *cbdata = cbdatav;
86 Id oidx, idx = cbdata->idx;
91 h = hx & cbdata->dirmapn;
95 qx = cbdata->dirmap[2 * h];
100 h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
107 cbdata->dirmap[2 * h] = hx;
108 cbdata->dirmap[2 * h + 1] = idx;
109 cbdata->dirmapused++;
110 if (cbdata->dirmapused * 2 > cbdata->dirmapn)
111 cbdata->dirmap = growhash(cbdata->dirmap, &cbdata->dirmapn);
114 oidx = cbdata->dirmap[2 * h + 1];
117 /* found a conflict, this dir may be used in multiple packages */
120 MAPSET(&cbdata->idxmap, oidx);
121 cbdata->dirmap[2 * h + 1] = -1;
122 cbdata->dirconflicts++;
124 MAPSET(&cbdata->idxmap, idx);
128 isindirmap(struct cbdata *cbdata, Id hx)
133 h = hx & cbdata->dirmapn;
134 hh = HASHCHAIN_START;
137 qx = cbdata->dirmap[2 * h];
141 return cbdata->dirmap[2 * h + 1] == -1 ? 1 : 0;
142 h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
147 findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
149 struct cbdata *cbdata = cbdatav;
150 int isdir = S_ISDIR(info->mode);
160 dp = fn + info->dirlen;
161 if (info->diridx != cbdata->lastdiridx)
163 cbdata->lastdiridx = info->diridx;
164 cbdata->lastdirhash = strnhash(fn, dp - fn);
166 dhx = cbdata->lastdirhash;
168 /* this mirrors the "if (!hx) hx = strlen(fn) + 1" in finddirs_cb */
169 if (!isindirmap(cbdata, dhx ? dhx : dp - fn + 1))
172 hx = strhash_cont(dp, dhx);
176 h = hx & cbdata->cflmapn;
177 hh = HASHCHAIN_START;
180 qx = cbdata->cflmap[2 * h];
185 h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
192 cbdata->cflmap[2 * h] = hx;
193 cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx);
194 cbdata->cflmapused++;
195 if (cbdata->cflmapused * 2 > cbdata->cflmapn)
196 cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
199 oidx = cbdata->cflmap[2 * h + 1];
205 /* both are directories. delay the conflict, keep oidx in slot */
206 queue_push2(&cbdata->lookat_dir, hx, idx);
210 /* now have file, had directories before. */
211 cbdata->cflmap[2 * h + 1] = oidx; /* make it a file */
212 /* dump all delayed directory hits for hx */
213 for (i = 0; i < cbdata->lookat_dir.count; i += 2)
214 if (cbdata->lookat_dir.elements[i] == hx)
215 queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
217 else if (oidx == idx)
218 return; /* no conflicts with ourself, please */
219 queue_push2(&cbdata->lookat, hx, oidx);
220 queue_push2(&cbdata->lookat, hx, idx);
224 addfilesspace(struct cbdata *cbdata, unsigned char *data, int len)
226 cbdata->filesspace = solv_extend(cbdata->filesspace, cbdata->filesspacen, len, 1, FILESSPACE_BLOCK);
227 memcpy(cbdata->filesspace + cbdata->filesspacen, data, len);
228 cbdata->filesspacen += len;
232 findfileconflicts2_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
234 struct cbdata *cbdata = cbdatav;
241 dp = fn + info->dirlen;
242 if (info->diridx != cbdata->lastdiridx)
244 cbdata->lastdiridx = info->diridx;
245 cbdata->lastdirhash = strnhash(fn, dp - fn);
247 hx = cbdata->lastdirhash;
248 hx = strhash_cont(dp, hx);
251 if ((Id)hx != cbdata->hx)
253 strncpy(md5padded, info->digest, 32);
255 md5padded[33] = info->color;
256 /* printf("%d, hx %x -> %s %d %s\n", cbdata->idx, hx, fn, fmode, md5); */
257 queue_push(&cbdata->files, cbdata->filesspacen);
258 addfilesspace(cbdata, (unsigned char *)md5padded, 34);
259 addfilesspace(cbdata, (unsigned char *)fn, strlen(fn) + 1);
263 lookat_idx_cmp(const void *ap, const void *bp, void *dp)
265 const Id *a = ap, *b = bp;
266 unsigned int ahx, bhx;
267 if (a[1] - b[1] != 0)
269 ahx = (unsigned int)a[0]; /* a[0] can be < 0 */
270 bhx = (unsigned int)b[0];
271 return ahx < bhx ? -1 : ahx > bhx ? 1 : 0;
275 lookat_hx_cmp(const void *ap, const void *bp, void *dp)
277 const Id *a = ap, *b = bp;
278 unsigned int ahx = (unsigned int)a[0]; /* a[0] can be < 0 */
279 unsigned int bhx = (unsigned int)b[0];
280 return ahx < bhx ? -1 : ahx > bhx ? 1 : a[1] - b[1];
284 conflicts_cmp(const void *ap, const void *bp, void *dp)
289 if (a[0] != b[0]) /* filename1 */
290 return strcmp(pool_id2str(pool, a[0]), pool_id2str(pool, b[0]));
291 if (a[3] != b[3]) /* filename2 */
292 return strcmp(pool_id2str(pool, a[3]), pool_id2str(pool, b[3]));
293 if (a[1] != b[1]) /* pkgid1 */
295 if (a[4] != b[4]) /* pkgid2 */
301 iterate_solvable_dirs(Pool *pool, Id p, void (*cb)(void *, const char *, struct filelistinfo *), void *cbdata)
303 Repodata *lastdata = 0;
307 dataiterator_init(&di, pool, 0, p, SOLVABLE_FILELIST, 0, SEARCH_COMPLETE_FILELIST);
308 while (dataiterator_step(&di))
310 if (di.data == lastdata && di.kv.id == lastdirid)
313 lastdirid = di.kv.id;
314 cb(cbdata, repodata_dir2str(di.data, di.kv.id, ""), 0);
316 dataiterator_free(&di);
321 iterate_solvable_files(Pool *pool, Id p, void (*cb)(void *, const char *, struct filelistinfo *), void *cbdata)
326 Repodata *lastdata = 0;
329 struct filelistinfo info;
330 const char *tmpdir = 0;
333 dataiterator_init(&di, pool, 0, p, SOLVABLE_FILELIST, 0, SEARCH_COMPLETE_FILELIST);
334 memset(&info, 0, sizeof(info));
335 while (dataiterator_step(&di))
337 if (di.data != lastdata || di.kv.id != lastdirid)
340 lastdirid = di.kv.id;
341 tmpdir = repodata_dir2str(di.data, di.kv.id, "");
342 dirl = strlen(tmpdir);
346 l = dirl + strlen(di.kv.str) + 1;
350 space = solv_realloc(space, spacen);
354 strcpy(space, tmpdir);
357 strcpy(space + dirl, di.kv.str);
358 cb(cbdata, space, &info);
360 dataiterator_free(&di);
366 pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, int flags, void *(*handle_cb)(Pool *, Id, void *) , void *handle_cbdata)
368 int i, j, cflmapn, idxmapset;
369 struct cbdata cbdata;
370 unsigned int now, start;
372 Repo *installed = pool->installed;
374 int obsoleteusescolors = pool_get_flag(pool, POOL_FLAG_OBSOLETEUSESCOLORS);
376 queue_empty(conflicts);
380 now = start = solv_timems(0);
381 POOL_DEBUG(SOLV_DEBUG_STATS, "searching for file conflicts\n");
382 POOL_DEBUG(SOLV_DEBUG_STATS, "packages: %d, cutoff %d\n", pkgs->count, cutoff);
384 memset(&cbdata, 0, sizeof(cbdata));
386 queue_init(&cbdata.lookat);
387 queue_init(&cbdata.lookat_dir);
388 map_init(&cbdata.idxmap, pkgs->count);
391 cutoff = pkgs->count;
393 /* avarage file list size: 200 files per package */
394 /* avarage dir count: 20 dirs per package */
396 /* first pass: scan dirs */
397 cflmapn = (cutoff + 3) * 64;
398 while ((cflmapn & (cflmapn - 1)) != 0)
399 cflmapn = cflmapn & (cflmapn - 1);
400 cbdata.dirmap = solv_calloc(cflmapn, 2 * sizeof(Id));
401 cbdata.dirmapn = cflmapn - 1; /* make it a mask */
404 for (i = 0; i < pkgs->count; i++)
406 p = pkgs->elements[i];
410 if ((flags & FINDFILECONFLICTS_USESOLVABLEFILELIST) != 0 && installed)
412 if (p >= installed->start && p < installed->end && pool->solvables[p].repo == installed)
414 iterate_solvable_dirs(pool, p, finddirs_cb, &cbdata);
415 if (MAPTST(&cbdata.idxmap, i))
420 handle = (*handle_cb)(pool, p, handle_cbdata);
423 rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_ONLYDIRS, finddirs_cb, &cbdata);
424 if (MAPTST(&cbdata.idxmap, i))
428 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap size: %d, used %d\n", cbdata.dirmapn + 1, cbdata.dirmapused);
429 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap memory usage: %d K\n", (cbdata.dirmapn + 1) * 2 * (int)sizeof(Id) / 1024);
430 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap creation took %d ms\n", solv_timems(now));
431 POOL_DEBUG(SOLV_DEBUG_STATS, "dir conflicts found: %d, idxmap %d of %d\n", cbdata.dirconflicts, idxmapset, pkgs->count);
433 /* second pass: scan files */
434 now = solv_timems(0);
435 cflmapn = (cutoff + 3) * 128;
436 while ((cflmapn & (cflmapn - 1)) != 0)
437 cflmapn = cflmapn & (cflmapn - 1);
438 cbdata.cflmap = solv_calloc(cflmapn, 2 * sizeof(Id));
439 cbdata.cflmapn = cflmapn - 1; /* make it a mask */
441 for (i = 0; i < pkgs->count; i++)
443 if (!MAPTST(&cbdata.idxmap, i))
445 p = pkgs->elements[i];
449 /* can't use FINDFILECONFLICTS_USESOLVABLEFILELIST because we have to know if
450 * the file is a directory or not */
451 handle = (*handle_cb)(pool, p, handle_cbdata);
454 cbdata.lastdiridx = -1;
455 rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, findfileconflicts_cb, &cbdata);
458 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap size: %d, used %d\n", cbdata.cflmapn + 1, cbdata.cflmapused);
459 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap memory usage: %d K\n", (cbdata.cflmapn + 1) * 2 * (int)sizeof(Id) / 1024);
460 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap creation took %d ms\n", solv_timems(now));
462 cbdata.dirmap = solv_free(cbdata.dirmap);
464 cbdata.dirmapused = 0;
465 cbdata.cflmap = solv_free(cbdata.cflmap);
467 cbdata.cflmapused = 0;
468 map_free(&cbdata.idxmap);
470 now = solv_timems(0);
471 POOL_DEBUG(SOLV_DEBUG_STATS, "lookat_dir size: %d\n", cbdata.lookat_dir.count);
472 queue_free(&cbdata.lookat_dir);
475 solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 2, sizeof(Id) * 2, &lookat_idx_cmp, pool);
476 for (i = j = 0; i < cbdata.lookat.count; i += 2)
478 Id hx = cbdata.lookat.elements[i];
479 Id idx = cbdata.lookat.elements[i + 1];
480 if (j && hx == cbdata.lookat.elements[j - 2] && idx == cbdata.lookat.elements[j - 1])
482 cbdata.lookat.elements[j++] = hx;
483 cbdata.lookat.elements[j++] = idx;
485 queue_truncate(&cbdata.lookat, j);
486 POOL_DEBUG(SOLV_DEBUG_STATS, "candidates: %d\n", cbdata.lookat.count / 2);
488 /* third pass: collect file info for all files that match a hx */
489 queue_init(&cbdata.files);
490 for (i = 0; i < cbdata.lookat.count; i += 2)
492 Id idx = cbdata.lookat.elements[i + 1];
493 int iterflags = RPM_ITERATE_FILELIST_WITHMD5 | RPM_ITERATE_FILELIST_NOGHOSTS;
494 if (obsoleteusescolors)
495 iterflags |= RPM_ITERATE_FILELIST_WITHCOL;
496 p = pkgs->elements[idx];
497 handle = (*handle_cb)(pool, p, handle_cbdata);
500 int fstart = cbdata.files.count;
501 queue_push(&cbdata.files, idx);
502 queue_push(&cbdata.files, 0);
504 cbdata.hx = cbdata.lookat.elements[i];
505 cbdata.lastdiridx = -1;
507 rpm_iterate_filelist(handle, iterflags, findfileconflicts2_cb, &cbdata);
508 cbdata.files.elements[fstart + 1] = cbdata.files.count;
509 cbdata.lookat.elements[i + 1] = fstart;
510 if (i + 2 >= cbdata.lookat.count || cbdata.lookat.elements[i + 3] != idx)
515 /* forth pass: for each hx we have, compare all matching files against all other matching files */
516 solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 2, sizeof(Id) * 2, &lookat_hx_cmp, pool);
517 for (i = 0; i < cbdata.lookat.count - 2; i += 2)
519 Id hx = cbdata.lookat.elements[i];
520 Id pstart = cbdata.lookat.elements[i + 1];
521 Id pidx = cbdata.files.elements[pstart];
522 Id pend = cbdata.files.elements[pstart + 1];
523 if (cbdata.lookat.elements[i + 2] != hx)
524 continue; /* no package left with that hx */
525 for (j = i + 2; j < cbdata.lookat.count && cbdata.lookat.elements[j] == hx; j += 2)
527 Id qstart = cbdata.lookat.elements[j + 1];
528 Id qidx = cbdata.files.elements[qstart];
529 Id qend = cbdata.files.elements[qstart + 1];
531 if (pidx >= cutoff && qidx >= cutoff)
532 continue; /* no conflicts between packages with idx >= cutoff */
533 for (ii = pstart + 2; ii < pend; ii++)
534 for (jj = qstart + 2; jj < qend; jj++)
536 char *fsi = (char *)cbdata.filesspace + cbdata.files.elements[ii];
537 char *fsj = (char *)cbdata.filesspace + cbdata.files.elements[jj];
538 if (strcmp(fsi + 34, fsj + 34))
539 continue; /* different file names */
540 if (!strcmp(fsi, fsj))
541 continue; /* md5 sum matches */
542 if (obsoleteusescolors && fsi[33] && fsj[33] && (fsi[33] & fsj[33]) == 0)
543 continue; /* colors do not conflict */
544 queue_push(conflicts, pool_str2id(pool, fsi + 34, 1));
545 queue_push(conflicts, pkgs->elements[pidx]);
546 queue_push(conflicts, pool_str2id(pool, fsi, 1));
547 queue_push(conflicts, pool_str2id(pool, fsj + 34, 1));
548 queue_push(conflicts, pkgs->elements[qidx]);
549 queue_push(conflicts, pool_str2id(pool, fsj, 1));
553 POOL_DEBUG(SOLV_DEBUG_STATS, "filespace size: %d K\n", cbdata.filesspacen / 1024);
554 POOL_DEBUG(SOLV_DEBUG_STATS, "candidate check took %d ms\n", solv_timems(now));
555 cbdata.filesspace = solv_free(cbdata.filesspace);
556 cbdata.filesspacen = 0;
557 queue_free(&cbdata.lookat);
558 queue_free(&cbdata.files);
559 if (conflicts->count > 6)
560 solv_sort(conflicts->elements, conflicts->count / 6, 6 * sizeof(Id), conflicts_cmp, pool);
561 POOL_DEBUG(SOLV_DEBUG_STATS, "found %d file conflicts\n", conflicts->count / 6);
562 POOL_DEBUG(SOLV_DEBUG_STATS, "file conflict detection took %d ms\n", solv_timems(start));
563 return conflicts->count;