2 * Copyright (c) 2009-2012, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
14 #include "repo_rpmdb.h"
15 #include "pool_fileconflicts.h"
26 unsigned int cflmapused;
30 unsigned int dirmapused;
39 unsigned char *filesspace;
40 unsigned int filesspacen;
43 #define FILESSPACE_BLOCK 255
46 doublehash(Hashval *map, Hashmask *mapnp)
48 Hashmask mapn = *mapnp;
49 Hashmask i, hx, qx, h, hh;
50 Hashmask nn = (mapn + 1) * 2 - 1;
53 m = solv_calloc(nn + 1, 2 * sizeof(Id));
54 for (i = 0; i <= mapn; i++)
66 h = HASHCHAIN_NEXT(h, hh, nn);
69 m[2 * h + 1] = map[2 * i + 1];
77 finddirs_cb(void *cbdatav, const char *fn, int fmode, const char *md5)
79 struct cbdata *cbdata = cbdatav;
80 Hashmask h, hh, hx, qx;
81 Hashval idx = cbdata->idx;
86 h = hx & cbdata->dirmapn;
90 qx = cbdata->dirmap[2 * h];
95 h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
102 cbdata->dirmap[2 * h] = hx;
103 cbdata->dirmap[2 * h + 1] = idx;
104 cbdata->dirmapused++;
105 if (cbdata->dirmapused * 2 > cbdata->dirmapn)
106 cbdata->dirmap = doublehash(cbdata->dirmap, &cbdata->dirmapn);
109 if (cbdata->dirmap[2 * h + 1] == idx)
111 /* found a conflict, this dir is used in multiple packages */
112 if (cbdata->dirmap[2 * h + 1] != -1)
114 MAPSET(&cbdata->idxmap, cbdata->dirmap[2 * h + 1]);
115 cbdata->dirmap[2 * h + 1] = -1;
116 cbdata->dirconflicts++;
118 MAPSET(&cbdata->idxmap, idx);
122 isindirmap(struct cbdata *cbdata, Hashmask hx)
126 h = hx & cbdata->dirmapn;
127 hh = HASHCHAIN_START;
130 qx = cbdata->dirmap[2 * h];
134 return cbdata->dirmap[2 * h + 1] == -1 ? 1 : 0;
135 h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
140 findfileconflicts_cb(void *cbdatav, const char *fn, int fmode, const char *md5)
142 struct cbdata *cbdata = cbdatav;
143 int isdir = S_ISDIR(fmode);
146 Hashmask qx, h, hx, hh, dhx;
150 dp = strrchr(fn, '/');
153 dhx = strnhash(fn, dp + 1 - fn);
155 dhx = 1 + dp + 1 - fn;
157 if (!isindirmap(cbdata, dhx))
161 hx = strhash_cont(dp + 1, dhx);
165 h = hx & cbdata->cflmapn;
166 hh = HASHCHAIN_START;
169 qx = cbdata->cflmap[2 * h];
174 h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
181 cbdata->cflmap[2 * h] = hx;
182 cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx);
183 cbdata->cflmapused++;
184 if (cbdata->cflmapused * 2 > cbdata->cflmapn)
185 cbdata->cflmap = doublehash(cbdata->cflmap, &cbdata->cflmapn);
188 qidx = cbdata->cflmap[2 * h + 1];
195 /* delay the conflict */
196 queue_push2(&cbdata->lookat_dir, hx, qidx);
197 queue_push2(&cbdata->lookat_dir, hx, idx);
200 cbdata->cflmap[2 * h + 1] = qidx;
201 for (i = 0; i < cbdata->lookat_dir.count; i += 2)
202 if (cbdata->lookat_dir.elements[i] == hx)
203 queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
206 return; /* no conflicts with ourself, please */
207 queue_push2(&cbdata->lookat, hx, qidx);
208 queue_push2(&cbdata->lookat, hx, idx);
212 addfilesspace(struct cbdata *cbdata, unsigned char *data, int len)
214 cbdata->filesspace = solv_extend(cbdata->filesspace, cbdata->filesspacen, len, 1, FILESSPACE_BLOCK);
215 memcpy(cbdata->filesspace + cbdata->filesspacen, data, len);
216 cbdata->filesspacen += len;
220 findfileconflicts2_cb(void *cbdatav, const char *fn, int fmode, const char *md5)
222 struct cbdata *cbdata = cbdatav;
223 unsigned int hx = strhash(fn);
228 if (hx != cbdata->hx)
230 strncpy(md5padded, md5, 32);
232 md5padded[33] = fmode >> 24;
233 // printf("%d, hx %x -> %s %d %s\n", cbdata->idx, hx, fn, fmode, md5);
234 queue_push(&cbdata->files, cbdata->filesspacen);
235 addfilesspace(cbdata, (unsigned char *)md5padded, 34);
236 addfilesspace(cbdata, (unsigned char *)fn, strlen(fn) + 1);
239 static int cand_sort(const void *ap, const void *bp, void *dp)
244 unsigned int ax = (unsigned int)a[0];
245 unsigned int bx = (unsigned int)b[0];
250 return (a[1] < 0 ? -a[1] : a[1]) - (b[1] < 0 ? -b[1] : b[1]);
253 static int conflicts_cmp(const void *ap, const void *bp, void *dp)
259 return strcmp(pool_id2str(pool, a[0]), pool_id2str(pool, b[0]));
268 pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, void *(*handle_cb)(Pool *, Id, void *) , void *handle_cbdata)
270 int i, j, cflmapn, idxmapset;
272 struct cbdata cbdata;
273 unsigned int now, start;
277 queue_empty(conflicts);
281 now = start = solv_timems(0);
282 POOL_DEBUG(SOLV_DEBUG_STATS, "searching for file conflicts\n");
283 POOL_DEBUG(SOLV_DEBUG_STATS, "packages: %d, cutoff %d\n", pkgs->count, cutoff);
285 memset(&cbdata, 0, sizeof(cbdata));
287 queue_init(&cbdata.lookat);
288 queue_init(&cbdata.lookat_dir);
289 queue_init(&cbdata.files);
290 map_init(&cbdata.idxmap, pkgs->count);
293 cutoff = pkgs->count;
295 /* avarage file list size: 200 files per package */
296 /* avarage dir count: 20 dirs per package */
298 /* first pass: scan dirs */
299 cflmapn = (cutoff + 3) * 64;
300 while ((cflmapn & (cflmapn - 1)) != 0)
301 cflmapn = cflmapn & (cflmapn - 1);
302 cbdata.dirmap = solv_calloc(cflmapn, 2 * sizeof(Id));
303 cbdata.dirmapn = cflmapn - 1; /* make it a mask */
306 for (i = 0; i < pkgs->count; i++)
308 p = pkgs->elements[i];
312 handle = (*handle_cb)(pool, p, handle_cbdata);
314 rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_ONLYDIRS, finddirs_cb, &cbdata);
315 if (MAPTST(&cbdata.idxmap, i))
319 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap size: %d used %d\n", cbdata.dirmapn + 1, cbdata.dirmapused);
320 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap memory usage: %d K\n", (cbdata.dirmapn + 1) * 2 * (int)sizeof(Id) / 1024);
321 POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap creation took %d ms\n", solv_timems(now));
322 POOL_DEBUG(SOLV_DEBUG_STATS, "dir conflicts found: %d, idxmap %d of %d\n", cbdata.dirconflicts, idxmapset, pkgs->count);
324 /* second pass: scan files */
325 now = solv_timems(0);
326 cflmapn = (cutoff + 3) * 128;
327 while ((cflmapn & (cflmapn - 1)) != 0)
328 cflmapn = cflmapn & (cflmapn - 1);
329 cbdata.cflmap = solv_calloc(cflmapn, 2 * sizeof(Id));
330 cbdata.cflmapn = cflmapn - 1; /* make it a mask */
332 for (i = 0; i < pkgs->count; i++)
334 if (!MAPTST(&cbdata.idxmap, i))
336 p = pkgs->elements[i];
340 handle = (*handle_cb)(pool, p, handle_cbdata);
342 rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, findfileconflicts_cb, &cbdata);
345 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap size: %d used %d\n", cbdata.cflmapn + 1, cbdata.cflmapused);
346 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap memory usage: %d K\n", (cbdata.cflmapn + 1) * 2 * (int)sizeof(Id) / 1024);
347 POOL_DEBUG(SOLV_DEBUG_STATS, "filemap creation took %d ms\n", solv_timems(now));
349 cbdata.dirmap = solv_free(cbdata.dirmap);
351 cbdata.dirmapused = 0;
352 cbdata.cflmap = solv_free(cbdata.cflmap);
354 cbdata.cflmapused = 0;
355 map_free(&cbdata.idxmap);
357 now = solv_timems(0);
358 POOL_DEBUG(SOLV_DEBUG_STATS, "lookat_dir size: %d\n", cbdata.lookat_dir.count);
359 queue_free(&cbdata.lookat_dir);
360 solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 2, sizeof(Id) * 2, &cand_sort, pool);
362 for (i = j = 0; i < cbdata.lookat.count; i += 2)
365 hx = cbdata.lookat.elements[i];
366 idx = cbdata.lookat.elements[i + 1];
367 if (j && hx == cbdata.lookat.elements[j - 2] && idx == cbdata.lookat.elements[j - 1])
369 cbdata.lookat.elements[j++] = hx;
370 cbdata.lookat.elements[j++] = idx;
372 POOL_DEBUG(SOLV_DEBUG_STATS, "candidates: %d\n", cbdata.lookat.count / 2);
374 /* third pass: scan candidates */
375 for (i = 0; i < cbdata.lookat.count - 2; i += 2)
378 int pidx = cbdata.lookat.elements[i + 1];
381 iterflags = RPM_ITERATE_FILELIST_WITHMD5 | RPM_ITERATE_FILELIST_NOGHOSTS;
382 if (pool->obsoleteusescolors)
383 iterflags |= RPM_ITERATE_FILELIST_WITHCOL;
384 p = pkgs->elements[pidx];
385 hx = cbdata.lookat.elements[i];
386 if (cbdata.lookat.elements[i + 2] != hx)
387 continue; /* no package left */
388 queue_empty(&cbdata.files);
389 cbdata.filesspace = solv_free(cbdata.filesspace);
390 cbdata.filesspacen = 0;
393 cbdata.hx = cbdata.lookat.elements[i];
394 handle = (*handle_cb)(pool, p, handle_cbdata);
397 rpm_iterate_filelist(handle, iterflags, findfileconflicts2_cb, &cbdata);
399 pend = cbdata.files.count;
400 for (j = i + 2; j < cbdata.lookat.count && cbdata.lookat.elements[j] == hx; j++)
402 int qidx = cbdata.lookat.elements[j + 1];
403 Id q = pkgs->elements[qidx];
404 if (pidx >= cutoff && qidx >= cutoff)
405 continue; /* no conflicts between packages with idx >= cutoff */
407 handle = (*handle_cb)(pool, q, handle_cbdata);
410 rpm_iterate_filelist(handle, iterflags, findfileconflicts2_cb, &cbdata);
411 for (ii = 0; ii < pend; ii++)
412 for (jj = pend; jj < cbdata.files.count; jj++)
414 char *fsi = (char *)cbdata.filesspace + cbdata.files.elements[ii];
415 char *fsj = (char *)cbdata.filesspace + cbdata.files.elements[jj];
416 if (strcmp(fsi + 34, fsj + 34))
417 continue; /* different file names */
418 if (!strcmp(fsi, fsj))
419 continue; /* md5 sum matches */
420 if (pool->obsoleteusescolors && fsi[33] && fsj[33] && (fsi[33] & fsj[33]) == 0)
421 continue; /* colors do not conflict */
422 queue_push(conflicts, pool_str2id(pool, (char *)cbdata.filesspace + cbdata.files.elements[ii] + 34, 1));
423 queue_push(conflicts, p);
424 queue_push(conflicts, pool_str2id(pool, (char *)cbdata.filesspace + cbdata.files.elements[ii], 1));
425 queue_push(conflicts, q);
426 queue_push(conflicts, pool_str2id(pool, (char *)cbdata.filesspace + cbdata.files.elements[jj], 1));
430 cbdata.filesspace = solv_free(cbdata.filesspace);
431 cbdata.filesspacen = 0;
432 queue_free(&cbdata.lookat);
433 queue_free(&cbdata.files);
434 POOL_DEBUG(SOLV_DEBUG_STATS, "candidate check took %d ms\n", solv_timems(now));
435 if (conflicts->count > 5)
436 solv_sort(conflicts->elements, conflicts->count / 5, 5 * sizeof(Id), conflicts_cmp, pool);
437 (*handle_cb)(pool, 0, handle_cbdata);
438 POOL_DEBUG(SOLV_DEBUG_STATS, "found %d file conflicts\n", conflicts->count / 5);
439 POOL_DEBUG(SOLV_DEBUG_STATS, "file conflict detection took %d ms\n", solv_timems(start));
440 return conflicts->count;