2 * Copyright (c) 2009-2013, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
14 #include "repo_rpmdb.h"
15 #include "pool_fileconflicts.h"
21 Queue lookat; /* conflict candidates */
22 Queue lookat_dir; /* not yet conflicting directories */
26 unsigned int cflmapused;
30 unsigned int dirmapused;
35 Id idx; /* index of package we're looking at */
36 Id hx; /* used in findfileconflicts2_cb, limit to files matching hx */
39 unsigned char *filesspace;
40 unsigned int filesspacen;
43 #define FILESSPACE_BLOCK 255
46 growhash(Hashtable map, Hashval *mapnp)
48 Hashval mapn = *mapnp;
49 Hashval newn = (mapn + 1) * 2 - 1;
54 m = solv_calloc(newn + 1, 2 * sizeof(Id));
55 for (i = 0; i <= mapn; i++)
67 h = HASHCHAIN_NEXT(h, hh, newn);
70 m[2 * h + 1] = map[2 * i + 1];
78 finddirs_cb(void *cbdatav, const char *fn, int fmode, const char *md5)
80 struct cbdata *cbdata = cbdatav;
83 Id oidx, idx = cbdata->idx;
88 h = hx & cbdata->dirmapn;
92 qx = cbdata->dirmap[2 * h];
97 h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
104 cbdata->dirmap[2 * h] = hx;
105 cbdata->dirmap[2 * h + 1] = idx;
106 cbdata->dirmapused++;
107 if (cbdata->dirmapused * 2 > cbdata->dirmapn)
108 cbdata->dirmap = growhash(cbdata->dirmap, &cbdata->dirmapn);
111 oidx = cbdata->dirmap[2 * h + 1];
114 /* found a conflict, this dir may be used in multiple packages */
117 MAPSET(&cbdata->idxmap, oidx);
118 cbdata->dirmap[2 * h + 1] = -1;
119 cbdata->dirconflicts++;
121 MAPSET(&cbdata->idxmap, idx);
125 isindirmap(struct cbdata *cbdata, Id hx)
130 h = hx & cbdata->dirmapn;
131 hh = HASHCHAIN_START;
134 qx = cbdata->dirmap[2 * h];
138 return cbdata->dirmap[2 * h + 1] == -1 ? 1 : 0;
139 h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
144 findfileconflicts_cb(void *cbdatav, const char *fn, int fmode, const char *md5)
146 struct cbdata *cbdata = cbdatav;
147 int isdir = S_ISDIR(fmode);
155 dp = strrchr(fn, '/');
158 dhx = strnhash(fn, dp + 1 - fn);
160 /* this mirrors the "if (!hx) hx = strlen(fn) + 1" in finddirs_cb */
161 if (!isindirmap(cbdata, dhx ? dhx : dp + 1 - fn + 1))
164 hx = strhash_cont(dp + 1, dhx);
168 h = hx & cbdata->cflmapn;
169 hh = HASHCHAIN_START;
172 qx = cbdata->cflmap[2 * h];
177 h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
184 cbdata->cflmap[2 * h] = hx;
185 cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx);
186 cbdata->cflmapused++;
187 if (cbdata->cflmapused * 2 > cbdata->cflmapn)
188 cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
191 oidx = cbdata->cflmap[2 * h + 1];
197 /* both are directories. delay the conflict, keep oidx in slot */
198 queue_push2(&cbdata->lookat_dir, hx, idx);
202 /* now have file, had directories before. */
203 cbdata->cflmap[2 * h + 1] = oidx; /* make it a file */
204 /* dump all delayed directory hits for hx */
205 for (i = 0; i < cbdata->lookat_dir.count; i += 2)
206 if (cbdata->lookat_dir.elements[i] == hx)
207 queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
209 else if (oidx == idx)
210 return; /* no conflicts with ourself, please */
211 queue_push2(&cbdata->lookat, hx, oidx);
212 queue_push2(&cbdata->lookat, hx, idx);
216 addfilesspace(struct cbdata *cbdata, unsigned char *data, int len)
218 cbdata->filesspace = solv_extend(cbdata->filesspace, cbdata->filesspacen, len, 1, FILESSPACE_BLOCK);
219 memcpy(cbdata->filesspace + cbdata->filesspacen, data, len);
220 cbdata->filesspacen += len;
224 findfileconflicts2_cb(void *cbdatav, const char *fn, int fmode, const char *md5)
226 struct cbdata *cbdata = cbdatav;
227 Hashval hx = strhash(fn);
232 if ((Id)hx != cbdata->hx)
234 strncpy(md5padded, md5, 32);
236 md5padded[33] = fmode >> 24;
237 /* printf("%d, hx %x -> %s %d %s\n", cbdata->idx, hx, fn, fmode, md5); */
238 queue_push(&cbdata->files, cbdata->filesspacen);
239 addfilesspace(cbdata, (unsigned char *)md5padded, 34);
240 addfilesspace(cbdata, (unsigned char *)fn, strlen(fn) + 1);
244 lookat_cmp(const void *ap, const void *bp, void *dp)
248 unsigned int ahx = (unsigned int)a[0]; /* a[0] can be < 0 */
249 unsigned int bhx = (unsigned int)b[0];
250 return ahx < bhx ? -1 : ahx > bhx ? 1 : a[1] - b[1];
254 conflicts_cmp(const void *ap, const void *bp, void *dp)
259 if (a[0] != b[0]) /* filename1 */
260 return strcmp(pool_id2str(pool, a[0]), pool_id2str(pool, b[0]));
261 if (a[3] != b[3]) /* filename2 */
262 return strcmp(pool_id2str(pool, a[3]), pool_id2str(pool, b[3]));
263 if (a[1] != b[1]) /* pkgid1 */
265 if (a[4] != b[4]) /* pkgid2 */
271 pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, void *(*handle_cb)(Pool *, Id, void *) , void *handle_cbdata)
273 int i, j, cflmapn, idxmapset;
274 struct cbdata cbdata;
275 unsigned int now, start;
278 int obsoleteusescolors = pool_get_flag(pool, POOL_FLAG_OBSOLETEUSESCOLORS);
280 queue_empty(conflicts);
284 now = start = solv_timems(0);
285 POOL_DEBUG(SOLV_DEBUG_STATS, "searching for file conflicts\n");
286 POOL_DEBUG(SOLV_DEBUG_STATS, "packages: %d, cutoff %d\n", pkgs->count, cutoff);
288 memset(&cbdata, 0, sizeof(cbdata));
290 queue_init(&cbdata.lookat);
291 queue_init(&cbdata.lookat_dir);
292 queue_init(&cbdata.files);
293 map_init(&cbdata.idxmap, pkgs->count);
296 cutoff = pkgs->count;
298 /* avarage file list size: 200 files per package */
299 /* avarage dir count: 20 dirs per package */
301 /* first pass: scan dirs */
302 cflmapn = (cutoff + 3) * 64;
303 while ((cflmapn & (cflmapn - 1)) != 0)
304 cflmapn = cflmapn & (cflmapn - 1);
305 cbdata.dirmap = solv_calloc(cflmapn, 2 * sizeof(Id));
306 cbdata.dirmapn = cflmapn - 1; /* make it a mask */
309 for (i = 0; i < pkgs->count; i++)
311 p = pkgs->elements[i];
315 handle = (*handle_cb)(pool, p, handle_cbdata);
317 rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_ONLYDIRS, finddirs_cb, &cbdata);
318 if (MAPTST(&cbdata.idxmap, i))
322 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap size: %d, used %d\n", cbdata.dirmapn + 1, cbdata.dirmapused);
323 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap memory usage: %d K\n", (cbdata.dirmapn + 1) * 2 * (int)sizeof(Id) / 1024);
324 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap creation took %d ms\n", solv_timems(now));
325 POOL_DEBUG(SOLV_DEBUG_STATS, "dir conflicts found: %d, idxmap %d of %d\n", cbdata.dirconflicts, idxmapset, pkgs->count);
327 /* second pass: scan files */
328 now = solv_timems(0);
329 cflmapn = (cutoff + 3) * 128;
330 while ((cflmapn & (cflmapn - 1)) != 0)
331 cflmapn = cflmapn & (cflmapn - 1);
332 cbdata.cflmap = solv_calloc(cflmapn, 2 * sizeof(Id));
333 cbdata.cflmapn = cflmapn - 1; /* make it a mask */
335 for (i = 0; i < pkgs->count; i++)
337 if (!MAPTST(&cbdata.idxmap, i))
339 p = pkgs->elements[i];
343 handle = (*handle_cb)(pool, p, handle_cbdata);
345 rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, findfileconflicts_cb, &cbdata);
348 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap size: %d, used %d\n", cbdata.cflmapn + 1, cbdata.cflmapused);
349 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap memory usage: %d K\n", (cbdata.cflmapn + 1) * 2 * (int)sizeof(Id) / 1024);
350 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap creation took %d ms\n", solv_timems(now));
352 cbdata.dirmap = solv_free(cbdata.dirmap);
354 cbdata.dirmapused = 0;
355 cbdata.cflmap = solv_free(cbdata.cflmap);
357 cbdata.cflmapused = 0;
358 map_free(&cbdata.idxmap);
360 now = solv_timems(0);
361 POOL_DEBUG(SOLV_DEBUG_STATS, "lookat_dir size: %d\n", cbdata.lookat_dir.count);
362 queue_free(&cbdata.lookat_dir);
365 solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 2, sizeof(Id) * 2, &lookat_cmp, pool);
366 for (i = j = 0; i < cbdata.lookat.count; i += 2)
368 Id hx = cbdata.lookat.elements[i];
369 Id idx = cbdata.lookat.elements[i + 1];
370 if (j && hx == cbdata.lookat.elements[j - 2] && idx == cbdata.lookat.elements[j - 1])
372 cbdata.lookat.elements[j++] = hx;
373 cbdata.lookat.elements[j++] = idx;
375 queue_truncate(&cbdata.lookat, j);
376 POOL_DEBUG(SOLV_DEBUG_STATS, "candidates: %d\n", cbdata.lookat.count / 2);
378 /* third pass: scan candidates */
379 for (i = 0; i < cbdata.lookat.count - 2; i += 2)
383 Id hx = cbdata.lookat.elements[i];
384 Id pidx = cbdata.lookat.elements[i + 1];
386 iterflags = RPM_ITERATE_FILELIST_WITHMD5 | RPM_ITERATE_FILELIST_NOGHOSTS;
387 if (obsoleteusescolors)
388 iterflags |= RPM_ITERATE_FILELIST_WITHCOL;
389 p = pkgs->elements[pidx];
390 if (cbdata.lookat.elements[i + 2] != hx)
391 continue; /* no package left */
392 queue_empty(&cbdata.files);
393 cbdata.filesspace = solv_free(cbdata.filesspace);
394 cbdata.filesspacen = 0;
397 cbdata.hx = cbdata.lookat.elements[i];
398 handle = (*handle_cb)(pool, p, handle_cbdata);
401 rpm_iterate_filelist(handle, iterflags, findfileconflicts2_cb, &cbdata);
403 pend = cbdata.files.count;
404 for (j = i + 2; j < cbdata.lookat.count && cbdata.lookat.elements[j] == hx; j += 2)
406 Id qidx = cbdata.lookat.elements[j + 1];
407 Id q = pkgs->elements[qidx];
408 if (pidx >= cutoff && qidx >= cutoff)
409 continue; /* no conflicts between packages with idx >= cutoff */
411 handle = (*handle_cb)(pool, q, handle_cbdata);
414 rpm_iterate_filelist(handle, iterflags, findfileconflicts2_cb, &cbdata);
415 for (ii = 0; ii < pend; ii++)
416 for (jj = pend; jj < cbdata.files.count; jj++)
418 char *fsi = (char *)cbdata.filesspace + cbdata.files.elements[ii];
419 char *fsj = (char *)cbdata.filesspace + cbdata.files.elements[jj];
420 if (strcmp(fsi + 34, fsj + 34))
421 continue; /* different file names */
422 if (!strcmp(fsi, fsj))
423 continue; /* md5 sum matches */
424 if (obsoleteusescolors && fsi[33] && fsj[33] && (fsi[33] & fsj[33]) == 0)
425 continue; /* colors do not conflict */
426 queue_push(conflicts, pool_str2id(pool, fsi + 34, 1));
427 queue_push(conflicts, p);
428 queue_push(conflicts, pool_str2id(pool, fsi, 1));
429 queue_push(conflicts, pool_str2id(pool, fsj + 34, 1));
430 queue_push(conflicts, q);
431 queue_push(conflicts, pool_str2id(pool, fsj, 1));
433 queue_truncate(&cbdata.files, pend);
436 cbdata.filesspace = solv_free(cbdata.filesspace);
437 cbdata.filesspacen = 0;
438 queue_free(&cbdata.lookat);
439 queue_free(&cbdata.files);
440 POOL_DEBUG(SOLV_DEBUG_STATS, "candidate check took %d ms\n", solv_timems(now));
441 if (conflicts->count > 6)
442 solv_sort(conflicts->elements, conflicts->count / 6, 6 * sizeof(Id), conflicts_cmp, pool);
443 (*handle_cb)(pool, 0, handle_cbdata);
444 POOL_DEBUG(SOLV_DEBUG_STATS, "found %d file conflicts\n", conflicts->count / 6);
445 POOL_DEBUG(SOLV_DEBUG_STATS, "file conflict detection took %d ms\n", solv_timems(start));
446 return conflicts->count;