2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
12 #include "dbinc/db_page.h"
13 #include "dbinc/db_verify.h"
14 #include "dbinc/btree.h"
15 #include "dbinc/lock.h"
18 static int __bam_safe_getdata __P((DB *, DB_THREAD_INFO *,
19 PAGE *, u_int32_t, int, DBT *, int *));
20 static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
21 db_indx_t *, u_int32_t));
22 static int __bam_vrfy_treeorder __P((DB *, DB_THREAD_INFO *, PAGE *,
23 BINTERNAL *, BINTERNAL *, int (*)(DB *, const DBT *, const DBT *),
25 static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
26 db_indx_t *, u_int32_t));
30 * Verify the btree-specific part of a metadata page.
32 * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *,
33 * PUBLIC: db_pgno_t, u_int32_t));
36 __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
45 int isbad, t_ret, ret;
51 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
55 * If we came through __db_vrfy_pagezero, we have already checked the
56 * common fields. However, we used the on-disk metadata page, it may
57 * have been stale. We now have the page from mpool, so check that.
59 if ((ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) {
60 if (ret == DB_VERIFY_BAD)
66 /* bt_minkey: must be >= 2; must produce sensible ovflsize */
68 /* avoid division by zero */
69 ovflsize = meta->minkey > 0 ?
70 B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0;
72 if (meta->minkey < 2 ||
73 ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
76 EPRINT((env, DB_STR_A("1034",
77 "Page %lu: nonsensical bt_minkey value %lu on metadata page",
78 "%lu %lu"), (u_long)pgno, (u_long)meta->minkey));
80 pip->bt_minkey = meta->minkey;
82 /* re_len: no constraints on this (may be zero or huge--we make rope) */
83 pip->re_pad = meta->re_pad;
84 pip->re_len = meta->re_len;
87 * The root must not be current page or 0 and it must be within
88 * database. If this metadata page is the master meta data page
89 * of the file, then the root page had better be page 1.
92 if (meta->root == PGNO_INVALID ||
93 meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
94 (pgno == PGNO_BASE_MD && meta->root != 1)) {
96 EPRINT((env, DB_STR_A("1035",
97 "Page %lu: nonsensical root page %lu on metadata page",
98 "%lu %lu"), (u_long)pgno, (u_long)meta->root));
100 pip->root = meta->root;
103 if (F_ISSET(&meta->dbmeta, BTM_RENUMBER))
104 F_SET(pip, VRFY_IS_RRECNO);
106 if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) {
108 * If this is a master db meta page, it had better not have
111 if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
113 EPRINT((env, DB_STR_A("1036",
114 "Page %lu: Btree metadata page has both duplicates and multiple databases",
115 "%lu"), (u_long)pgno));
117 F_SET(pip, VRFY_HAS_SUBDBS);
120 if (F_ISSET(&meta->dbmeta, BTM_DUP))
121 F_SET(pip, VRFY_HAS_DUPS);
122 if (F_ISSET(&meta->dbmeta, BTM_DUPSORT))
123 F_SET(pip, VRFY_HAS_DUPSORT);
124 if (F_ISSET(&meta->dbmeta, BTM_RECNUM))
125 F_SET(pip, VRFY_HAS_RECNUMS);
126 if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
127 EPRINT((env, DB_STR_A("1037",
128 "Page %lu: Btree metadata page illegally has both recnums and dups",
129 "%lu"), (u_long)pgno));
133 if (F_ISSET(&meta->dbmeta, BTM_RECNO)) {
134 F_SET(pip, VRFY_IS_RECNO);
135 dbp->type = DB_RECNO;
136 } else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
138 EPRINT((env, DB_STR_A("1038",
139 "Page %lu: metadata page has renumber flag set but is not recno",
140 "%lu"), (u_long)pgno));
143 #ifdef HAVE_COMPRESSION
144 if (F_ISSET(&meta->dbmeta, BTM_COMPRESS)) {
145 F_SET(pip, VRFY_HAS_COMPRESS);
146 if (!DB_IS_COMPRESSED(dbp)) {
147 ((BTREE *)dbp->bt_internal)->bt_compress =
149 ((BTREE *)dbp->bt_internal)->bt_decompress =
153 * Copy dup_compare to compress_dup_compare, and use the
154 * compression duplicate compare.
156 if (F_ISSET(pip, VRFY_HAS_DUPSORT)) {
157 if (dbp->dup_compare == NULL)
158 dbp->dup_compare = __bam_defcmp;
159 if (((BTREE *)dbp->bt_internal)->compress_dup_compare
161 ((BTREE *)dbp->bt_internal)->
162 compress_dup_compare = dbp->dup_compare;
163 dbp->dup_compare = __bam_compress_dupcmp;
168 if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_COMPRESS)) {
169 EPRINT((env, DB_STR_A("1039",
170 "Page %lu: Btree metadata page illegally has both recnums and compression",
171 "%lu"), (u_long)pgno));
174 if (F_ISSET(pip, VRFY_HAS_DUPS) && !F_ISSET(pip, VRFY_HAS_DUPSORT) &&
175 F_ISSET(pip, VRFY_HAS_COMPRESS)) {
176 EPRINT((env, DB_STR_A("1040",
177 "Page %lu: Btree metadata page illegally has both "
178 "unsorted duplicates and compression",
179 "%lu"), (u_long)pgno));
184 if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
185 EPRINT((env, DB_STR_A("1041",
186 "Page %lu: recno metadata page specifies duplicates",
187 "%lu"), (u_long)pgno));
191 if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN))
192 F_SET(pip, VRFY_IS_FIXEDLEN);
193 else if (pip->re_len > 0) {
195 * It's wrong to have an re_len if it's not a fixed-length
199 EPRINT((env, DB_STR_A("1042",
200 "Page %lu: re_len of %lu in non-fixed-length database",
201 "%lu %lu"), (u_long)pgno, (u_long)pip->re_len));
205 * We do not check that the rest of the page is 0, because it may
206 * not be and may still be correct.
209 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
211 if (LF_ISSET(DB_SALVAGE) &&
212 (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
214 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
219 * Verify a recno leaf page.
221 * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
222 * PUBLIC: u_int32_t));
225 __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
236 int ret, t_ret, isbad;
237 u_int32_t re_len_guess, len;
242 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
245 if (TYPE(h) != P_LRECNO) {
246 ret = __db_unknown_path(env, "__ram_vrfy_leaf");
251 * Verify (and, if relevant, save off) page fields common to
254 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
255 if (ret == DB_VERIFY_BAD)
262 * Verify inp[]. Return immediately if it returns DB_VERIFY_BAD;
263 * further checks are dangerous.
265 if ((ret = __bam_vrfy_inp(dbp,
266 vdp, h, pgno, &pip->entries, flags)) != 0)
269 if (F_ISSET(pip, VRFY_HAS_DUPS)) {
270 EPRINT((env, DB_STR_A("1043",
271 "Page %lu: Recno database has dups",
272 "%lu"), (u_long)pgno));
278 * Walk through inp and see if the lengths of all the records are the
279 * same--if so, this may be a fixed-length database, and we want to
280 * save off this value. We know inp to be safe if we've gotten this
284 for (i = 0; i < NUM_ENT(h); i++) {
285 bk = GET_BKEYDATA(dbp, h, i);
286 /* KEYEMPTY. Go on. */
287 if (B_DISSET(bk->type))
289 if (bk->type == B_OVERFLOW)
290 len = ((BOVERFLOW *)bk)->tlen;
291 else if (bk->type == B_KEYDATA)
295 EPRINT((env, DB_STR_A("1044",
296 "Page %lu: nonsensical type for item %lu",
297 "%lu %lu"), (u_long)pgno, (u_long)i));
300 if (re_len_guess == 0)
304 * Is this item's len the same as the last one's? If not,
305 * reset to 0 and break--we don't have a single re_len.
306 * Otherwise, go on to the next item.
308 if (re_len_guess != len) {
313 pip->re_len = re_len_guess;
315 /* Save off record count. */
316 pip->rec_cnt = NUM_ENT(h);
318 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
320 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
325 * Verify a btree leaf or internal page.
327 * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
328 * PUBLIC: u_int32_t));
331 __bam_vrfy(dbp, vdp, h, pgno, flags)
340 int ret, t_ret, isbad;
345 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
355 ret = __db_unknown_path(env, "__bam_vrfy");
360 * Verify (and, if relevant, save off) page fields common to
363 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
364 if (ret == DB_VERIFY_BAD)
371 * The record count is, on internal pages, stored in an overloaded
372 * next_pgno field. Save it off; we'll verify it when we check
373 * overall database structure. We could overload the field
374 * in VRFY_PAGEINFO, too, but this seems gross, and space
375 * is not at such a premium.
377 pip->rec_cnt = RE_NREC(h);
382 if (TYPE(h) == P_IRECNO) {
383 if ((ret = __ram_vrfy_inp(dbp,
384 vdp, h, pgno, &pip->entries, flags)) != 0)
386 } else if ((ret = __bam_vrfy_inp(dbp,
387 vdp, h, pgno, &pip->entries, flags)) != 0) {
388 if (ret == DB_VERIFY_BAD)
392 EPRINT((env, DB_STR_A("1045",
393 "Page %lu: item order check unsafe: skipping",
394 "%lu"), (u_long)pgno));
395 } else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
396 __bam_vrfy_itemorder(dbp,
397 vdp, vdp->thread_info, h, pgno, 0, 0, 0, flags)) != 0) {
399 * We know that the elements of inp are reasonable.
401 * Check that elements fall in the proper order.
403 if (ret == DB_VERIFY_BAD)
409 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
411 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
416 * Verify that all entries in a P_IRECNO inp[] array are reasonable,
417 * and count them. Note that P_LRECNO uses __bam_vrfy_inp;
418 * P_IRECNOs are a special, and simpler, case, since they have
419 * RINTERNALs rather than BKEYDATA/BINTERNALs.
422 __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
427 db_indx_t *nentriesp;
432 VRFY_CHILDINFO child;
434 int ret, t_ret, isbad;
435 u_int32_t himark, i, offset, nentries;
437 u_int8_t *pagelayout, *p;
441 memset(&child, 0, sizeof(VRFY_CHILDINFO));
445 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
448 if (TYPE(h) != P_IRECNO) {
449 ret = __db_unknown_path(env, "__ram_vrfy_inp");
453 himark = dbp->pgsize;
454 if ((ret = __os_malloc(env, dbp->pgsize, &pagelayout)) != 0)
456 memset(pagelayout, 0, dbp->pgsize);
458 for (i = 0; i < NUM_ENT(h); i++) {
459 if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) {
460 EPRINT((env, DB_STR_A("1046",
461 "Page %lu: entries listing %lu overlaps data",
462 "%lu %lu"), (u_long)pgno, (u_long)i));
468 * Check that the item offset is reasonable: it points
469 * somewhere after the inp array and before the end of the
472 if (offset <= (u_int32_t)((u_int8_t *)inp + i -
474 offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) {
476 EPRINT((env, DB_STR_A("1047",
477 "Page %lu: bad offset %lu at index %lu",
478 "%lu %lu %lu"), (u_long)pgno, (u_long)offset,
483 /* Update the high-water mark (what HOFFSET should be) */
489 /* Make sure this RINTERNAL is not multiply referenced. */
490 ri = GET_RINTERNAL(dbp, h, i);
491 if (pagelayout[offset] == 0) {
492 pagelayout[offset] = 1;
493 child.pgno = ri->pgno;
494 child.type = V_RECNO;
495 child.nrecs = ri->nrecs;
496 if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
499 EPRINT((env, DB_STR_A("1048",
500 "Page %lu: RINTERNAL structure at offset %lu referenced twice",
501 "%lu %lu"), (u_long)pgno, (u_long)offset));
506 for (p = pagelayout + himark;
507 p < pagelayout + dbp->pgsize;
510 EPRINT((env, DB_STR_A("1049",
511 "Page %lu: gap between items at offset %lu",
512 "%lu %lu"), (u_long)pgno,
513 (u_long)(p - pagelayout)));
517 if ((db_indx_t)himark != HOFFSET(h)) {
518 EPRINT((env, DB_STR_A("1050",
519 "Page %lu: bad HOFFSET %lu, appears to be %lu",
520 "%lu %lu %lu"), (u_long)pgno, (u_long)(HOFFSET(h)),
525 *nentriesp = nentries;
527 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
529 if (pagelayout != NULL)
530 __os_free(env, pagelayout);
531 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
534 typedef enum { VRFY_ITEM_NOTSET=0, VRFY_ITEM_BEGIN, VRFY_ITEM_END } VRFY_ITEM;
538 * Verify that all entries in inp[] array are reasonable;
542 __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
547 db_indx_t *nentriesp;
553 VRFY_CHILDINFO child;
554 VRFY_ITEM *pagelayout;
556 u_int32_t himark, offset; /*
557 * These would be db_indx_ts
560 u_int32_t i, endoff, nentries;
561 int isbad, initem, isdupitem, ret, t_ret;
564 isbad = isdupitem = 0;
566 memset(&child, 0, sizeof(VRFY_CHILDINFO));
567 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
578 * In the salvager, we might call this from a page which
579 * we merely suspect is a btree page. Otherwise, it
580 * shouldn't get called--if it is, that's a verifier bug.
582 if (LF_ISSET(DB_SALVAGE))
584 ret = __db_unknown_path(env, "__bam_vrfy_inp");
589 * Loop through inp[], the array of items, until we either
590 * run out of entries or collide with the data. Keep track
591 * of h_offset in himark.
593 * For each element in inp[i], make sure it references a region
594 * that starts after the end of the inp array (as defined by
595 * NUM_ENT(h)), ends before the beginning of the page, doesn't
596 * overlap any other regions, and doesn't have a gap between
597 * it and the region immediately after it.
599 himark = dbp->pgsize;
600 if ((ret = __os_calloc(
601 env, dbp->pgsize, sizeof(pagelayout[0]), &pagelayout)) != 0)
603 for (i = 0; i < NUM_ENT(h); i++) {
604 switch (ret = __db_vrfy_inpitem(dbp,
605 h, pgno, i, 1, flags, &himark, &offset)) {
611 case DB_VERIFY_FATAL:
615 DB_ASSERT(env, ret != 0);
620 * We now have a plausible beginning for the item, and we know
621 * its length is safe.
623 * Mark the beginning and end in pagelayout so we can make sure
624 * items have no overlaps or gaps.
626 bk = GET_BKEYDATA(dbp, h, i);
627 if (pagelayout[offset] == VRFY_ITEM_NOTSET)
628 pagelayout[offset] = VRFY_ITEM_BEGIN;
629 else if (pagelayout[offset] == VRFY_ITEM_BEGIN) {
631 * Having two inp entries that point at the same patch
632 * of page is legal if and only if the page is
633 * a btree leaf and they're onpage duplicate keys--
634 * that is, if (i % P_INDX) == 0.
636 if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) {
637 /* Flag for later. */
638 F_SET(pip, VRFY_HAS_DUPS);
640 /* Bump up nentries so we don't undercount. */
644 * We'll check to make sure the end is
650 EPRINT((env, DB_STR_A("1051",
651 "Page %lu: duplicated item %lu",
652 "%lu %lu"), (u_long)pgno, (u_long)i));
657 * Mark the end. Its location varies with the page type
660 * If the end already has a sign other than 0, do nothing--
661 * it's an overlap that we'll catch later.
663 switch (B_TYPE(bk->type)) {
665 if (TYPE(h) == P_IBTREE)
666 /* It's a BINTERNAL. */
667 endoff = offset + BINTERNAL_SIZE(bk->len) - 1;
669 endoff = offset + BKEYDATA_SIZE(bk->len) - 1;
673 * Flag that we have dups; we'll check whether
674 * that's okay during the structure check.
676 F_SET(pip, VRFY_HAS_DUPS);
680 * Overflow entries on internal pages are stored
681 * as the _data_ of a BINTERNAL; overflow entries
682 * on leaf pages are stored as the entire entry.
685 ((TYPE(h) == P_IBTREE) ?
686 BINTERNAL_SIZE(BOVERFLOW_SIZE) :
691 * We'll complain later; for now, just mark
694 endoff = offset + BKEYDATA_SIZE(0) - 1;
699 * If this is an onpage duplicate key we've seen before,
700 * the end had better coincide too.
702 if (isdupitem && pagelayout[endoff] != VRFY_ITEM_END) {
703 EPRINT((env, DB_STR_A("1052",
704 "Page %lu: duplicated item %lu",
705 "%lu %lu"), (u_long)pgno, (u_long)i));
707 } else if (pagelayout[endoff] == VRFY_ITEM_NOTSET)
708 pagelayout[endoff] = VRFY_ITEM_END;
712 * There should be no deleted items in a quiescent tree,
715 if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
717 EPRINT((env, DB_STR_A("1053",
718 "Page %lu: item %lu marked deleted", "%lu %lu"),
719 (u_long)pgno, (u_long)i));
723 * Check the type and such of bk--make sure it's reasonable
726 switch (B_TYPE(bk->type)) {
729 * This is a normal, non-overflow BKEYDATA or BINTERNAL.
730 * The only thing to check is the len, and that's
735 if (TYPE(h) == P_IBTREE) {
737 EPRINT((env, DB_STR_A("1054",
738 "Page %lu: duplicate page referenced by internal btree page at item %lu",
739 "%lu %lu"), (u_long)pgno, (u_long)i));
741 } else if (TYPE(h) == P_LRECNO) {
743 EPRINT((env, DB_STR_A("1055",
744 "Page %lu: duplicate page referenced by recno page at item %lu",
745 "%lu %lu"), (u_long)pgno, (u_long)i));
750 bo = (TYPE(h) == P_IBTREE) ?
751 (BOVERFLOW *)(((BINTERNAL *)bk)->data) :
754 if (B_TYPE(bk->type) == B_OVERFLOW)
755 /* Make sure tlen is reasonable. */
756 if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
758 EPRINT((env, DB_STR_A("1056",
759 "Page %lu: impossible tlen %lu, item %lu",
760 "%lu %lu %lu"), (u_long)pgno,
761 (u_long)bo->tlen, (u_long)i));
762 /* Don't save as a child. */
766 if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
767 bo->pgno == PGNO_INVALID) {
769 EPRINT((env, DB_STR_A("1057",
770 "Page %lu: offpage item %lu has bad pgno %lu",
771 "%lu %lu %lu"), (u_long)pgno, (u_long)i,
773 /* Don't save as a child. */
777 child.pgno = bo->pgno;
778 child.type = (B_TYPE(bk->type) == B_OVERFLOW ?
779 V_OVERFLOW : V_DUPLICATE);
780 child.tlen = bo->tlen;
781 if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
786 EPRINT((env, DB_STR_A("1058",
787 "Page %lu: item %lu of invalid type %lu",
788 "%lu %lu %lu"), (u_long)pgno, (u_long)i,
789 (u_long)B_TYPE(bk->type)));
795 * Now, loop through and make sure the items are contiguous and
799 for (i = himark; i < dbp->pgsize; i++)
801 switch (pagelayout[i]) {
802 case VRFY_ITEM_NOTSET:
803 /* May be just for alignment. */
804 if (i != DB_ALIGN(i, sizeof(u_int32_t)))
808 EPRINT((env, DB_STR_A("1059",
809 "Page %lu: gap between items at offset %lu",
810 "%lu %lu"), (u_long)pgno, (u_long)i));
811 /* Find the end of the gap */
812 for (; pagelayout[i + 1] == VRFY_ITEM_NOTSET &&
813 (size_t)(i + 1) < dbp->pgsize; i++)
816 case VRFY_ITEM_BEGIN:
817 /* We've found an item. Check its alignment. */
818 if (i != DB_ALIGN(i, sizeof(u_int32_t))) {
820 EPRINT((env, DB_STR_A("1060",
821 "Page %lu: offset %lu unaligned",
822 "%lu %lu"), (u_long)pgno,
830 * We've hit the end of an item even though
831 * we don't think we're in one; must
835 EPRINT((env, DB_STR_A("1061",
836 "Page %lu: overlapping items at offset %lu",
837 "%lu %lu"), (u_long)pgno, (u_long)i));
841 switch (pagelayout[i]) {
842 case VRFY_ITEM_NOTSET:
843 /* In the middle of an item somewhere. Okay. */
846 /* End of an item; switch to out-of-item mode.*/
849 case VRFY_ITEM_BEGIN:
851 * Hit a second item beginning without an
855 EPRINT((env, DB_STR_A("1062",
856 "Page %lu: overlapping items at offset %lu",
857 "%lu %lu"), (u_long)pgno, (u_long)i));
861 __os_free(env, pagelayout);
863 /* Verify HOFFSET. */
864 if ((db_indx_t)himark != HOFFSET(h)) {
865 EPRINT((env, DB_STR_A("1063",
866 "Page %lu: bad HOFFSET %lu, appears to be %lu",
867 "%lu %lu %lu"), (u_long)pgno, (u_long)HOFFSET(h),
872 err: if (nentriesp != NULL)
873 *nentriesp = nentries;
875 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
878 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
882 * __bam_vrfy_itemorder --
883 * Make sure the items on a page sort correctly.
885 * Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are
886 * reasonable; be sure that __bam_vrfy_inp has been called first.
888 * If ovflok is set, it also assumes that overflow page chains
889 * hanging off the current page have been sanity-checked, and so we
890 * can use __bam_cmp to verify their ordering. If it is not set,
891 * and we run into an overflow page, carp and return DB_VERIFY_BAD;
892 * we shouldn't be called if any exist.
894 * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, DB_THREAD_INFO *,
895 * PUBLIC: PAGE *, db_pgno_t, u_int32_t, int, int, u_int32_t));
898 __bam_vrfy_itemorder(dbp, vdp, ip, h, pgno, nentries, ovflok, hasdups, flags)
914 DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
920 int adj, cmp, freedup_1, freedup_2, isbad, ret, t_ret;
921 int (*dupfunc) __P((DB *, const DBT *, const DBT *));
922 int (*func) __P((DB *, const DBT *, const DBT *));
923 void *buf1, *buf2, *tmpbuf;
926 * We need to work in the ORDERCHKONLY environment where we might
927 * not have a pip, but we also may need to work in contexts where
928 * NUM_ENT isn't safe.
931 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
933 nentries = pip->entries;
939 bo = NULL; /* Shut up compiler. */
941 memset(&dbta, 0, sizeof(DBT));
942 F_SET(&dbta, DB_DBT_REALLOC);
944 memset(&dbtb, 0, sizeof(DBT));
945 F_SET(&dbtb, DB_DBT_REALLOC);
949 DB_ASSERT(env, !LF_ISSET(DB_NOORDERCHK));
951 dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
952 if (TYPE(h) == P_LDUP)
956 if (dbp->bt_internal != NULL) {
957 bt = (BTREE *)dbp->bt_internal;
958 if (TYPE(h) == P_IBTREE && (bt->bt_compare != NULL ||
959 dupfunc != __bam_defcmp)) {
961 * The problem here is that we cannot
962 * tell the difference between an off
963 * page duplicate internal page and
964 * a main database internal page.
965 * Walk down the tree to figure it out.
969 while (TYPE(child) == P_IBTREE) {
970 bi = GET_BINTERNAL(dbp, child, 0);
973 (ret = __memp_fput(mpf,
974 vdp->thread_info, child,
975 DB_PRIORITY_UNCHANGED)) != 0)
977 if ((ret = __memp_fget(mpf,
978 &cpgno, vdp->thread_info,
979 NULL, 0, &child)) != 0)
982 if (TYPE(child) == P_LDUP)
984 else if (bt->bt_compare != NULL)
985 func = bt->bt_compare;
986 if ((ret = __memp_fput(mpf, vdp->thread_info,
987 child, DB_PRIORITY_UNCHANGED)) != 0)
989 } else if (bt->bt_compare != NULL)
990 func = bt->bt_compare;
995 * We alternate our use of dbta and dbtb so that we can walk
996 * through the page key-by-key without copying a dbt twice.
997 * p1 is always the dbt for index i - 1, and p2 for index i.
998 * Reset the data pointers in case we are retrying.
1006 * Loop through the entries. nentries ought to contain the
1007 * actual count, and so is a safe way to terminate the loop; whether
1008 * we inc. by one or two depends on whether we're a leaf page--
1009 * on a leaf page, we care only about keys. On internal pages
1010 * and LDUP pages, we want to check the order of all entries.
1012 * Note that on IBTREE pages or the index page of a partitioned
1013 * database, we start with item 1, since item 0 doesn't get looked
1016 inp = P_INP(dbp, h);
1017 adj = (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX;
1018 for (i = (TYPE(h) == P_IBTREE || dbp->p_internal != NULL) ? adj : 0;
1019 i < nentries; i += adj) {
1021 * Put key i-1, now in p2, into p1, by swapping DBTs and bufs.
1031 * Get key i into p2.
1035 bi = GET_BINTERNAL(dbp, h, i);
1036 if (B_TYPE(bi->type) == B_OVERFLOW) {
1037 bo = (BOVERFLOW *)(bi->data);
1040 p2->data = bi->data;
1045 * The leftmost key on an internal page must be
1046 * len 0, since it's just a placeholder and
1047 * automatically sorts less than all keys.
1050 * This criterion does not currently hold!
1051 * See todo list item #1686. Meanwhile, it's harmless
1052 * to just not check for it.
1055 if (i == 0 && bi->len != 0) {
1057 EPRINT((env, DB_STR_A("1064",
1058 "Page %lu: lowest key on internal page of nonzero length",
1059 "%lu"), (u_long)pgno));
1065 bk = GET_BKEYDATA(dbp, h, i);
1066 if (B_TYPE(bk->type) == B_OVERFLOW) {
1067 bo = (BOVERFLOW *)bk;
1070 p2->data = bk->data;
1076 * This means our caller screwed up and sent us
1077 * an inappropriate page.
1079 ret = __db_unknown_path(env, "__bam_vrfy_itemorder");
1085 * If ovflok != 1, we can't safely go chasing
1086 * overflow pages with the normal routines now;
1087 * they might be unsafe or nonexistent. Mark this
1088 * page as incomplete and return.
1090 * Note that we don't need to worry about freeing
1091 * buffers, since they can't have been allocated
1092 * if overflow items are unsafe.
1094 overflow: if (!ovflok) {
1096 F_SET(pip, VRFY_INCOMPLETE);
1101 * Overflow items are safe to chase. Do so.
1102 * Fetch the overflow item into p2->data,
1103 * NULLing it or reallocing it as appropriate.
1105 * (We set p2->data to buf2 before the call
1106 * so we're sure to realloc if we can and if p2
1107 * was just pointing at a non-overflow item.)
1110 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
1111 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
1113 if ((ret = __db_goff(dbc,
1114 p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
1116 EPRINT((env, DB_STR_A("1065",
1117 "Page %lu: error %lu in fetching overflow item %lu",
1118 "%lu %lu %lu"), (u_long)pgno, (u_long)ret,
1121 /* In case it got realloc'ed and thus changed. */
1125 /* Compare with the last key. */
1126 if (p1->data != NULL && p2->data != NULL) {
1127 cmp = inp[i] == inp[i - adj] ? 0 : func(dbp, p1, p2);
1129 /* comparison succeeded */
1132 * If we are looking at an internal page, we
1133 * don't know whether it is part of the main
1134 * database or in an off-page-duplicate tree.
1135 * If the main comparator fails, retry with
1136 * the duplicate comparator.
1138 if (TYPE(h) == P_IBTREE && func != dupfunc) {
1144 EPRINT((env, DB_STR_A("1066",
1145 "Page %lu: out-of-order key at entry %lu",
1146 "%lu %lu"), (u_long)pgno, (u_long)i));
1148 } else if (cmp == 0) {
1149 if (inp[i] != inp[i - adj]) {
1151 if (TYPE(h) == P_IBTREE &&
1157 EPRINT((env, DB_STR_A("1067",
1158 "Page %lu: non-dup dup key at entry %lu",
1159 "%lu %lu"), (u_long)pgno,
1163 * If they compared equally, this
1164 * had better be a (sub)database with dups.
1165 * Mark it so we can check during the
1169 F_SET(pip, VRFY_HAS_DUPS);
1170 else if (hasdups == 0) {
1172 if (TYPE(h) == P_IBTREE &&
1178 EPRINT((env, DB_STR_A("1068",
1179 "Page %lu: database with no duplicates has duplicated keys",
1180 "%lu"), (u_long)pgno));
1184 * If we're a btree leaf, check to see
1185 * if the data items of these on-page dups are
1186 * in sorted order. If not, flag this, so
1187 * that we can make sure during the
1188 * structure checks that the DUPSORT flag
1191 * At this point i points to a duplicate key.
1192 * Compare the datum before it (same key)
1193 * to the datum after it, i.e. i-1 to i+1.
1195 if (TYPE(h) == P_LBTREE) {
1197 * Unsafe; continue and we'll pick
1198 * up the bogus nentries later.
1200 if (i + 1 >= (db_indx_t)nentries)
1204 * We don't bother with clever memory
1205 * management with on-page dups,
1206 * as it's only really a big win
1207 * in the overflow case, and overflow
1208 * dups are probably (?) rare.
1210 if (((ret = __bam_safe_getdata(dbp,
1211 ip, h, i - 1, ovflok,
1212 &dup_1, &freedup_1)) != 0) ||
1213 ((ret = __bam_safe_getdata(dbp,
1214 ip, h, i + 1, ovflok,
1215 &dup_2, &freedup_2)) != 0))
1219 * If either of the data are NULL,
1220 * it's because they're overflows and
1221 * it's not safe to chase them now.
1222 * Mark an incomplete and return.
1224 if (dup_1.data == NULL ||
1225 dup_2.data == NULL) {
1226 DB_ASSERT(env, !ovflok);
1234 * If the dups are out of order,
1235 * flag this. It's not an error
1236 * until we do the structure check
1237 * and see whether DUPSORT is set.
1239 if (dupfunc(dbp, &dup_1, &dup_2) > 0 &&
1241 F_SET(pip, VRFY_DUPS_UNSORTED);
1244 __os_ufree(env, dup_1.data);
1246 __os_ufree(env, dup_2.data);
1252 err: if (pip != NULL && ((t_ret =
1253 __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0)
1257 __os_ufree(env, buf1);
1259 __os_ufree(env, buf2);
1261 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1265 * __bam_vrfy_structure --
1266 * Verify the tree structure of a btree database (including the master
1267 * database containing subdbs).
1269 * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
1270 * PUBLIC: void *, void *, u_int32_t));
1273 __bam_vrfy_structure(dbp, vdp, meta_pgno, lp, rp, flags)
1276 db_pgno_t meta_pgno;
1282 VRFY_PAGEINFO *mip, *rip;
1285 u_int32_t nrecs, level, relen, stflags;
1291 if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0)
1294 if ((ret = __db_vrfy_pgset_get(pgset,
1295 vdp->thread_info, vdp->txn, meta_pgno, (int *)&p)) != 0)
1298 EPRINT((env, DB_STR_A("1069",
1299 "Page %lu: btree metadata page observed twice",
1300 "%lu"), (u_long)meta_pgno));
1301 ret = DB_VERIFY_BAD;
1304 if ((ret = __db_vrfy_pgset_inc(
1305 pgset, vdp->thread_info, vdp->txn, meta_pgno)) != 0)
1311 EPRINT((env, DB_STR_A("1070",
1312 "Page %lu: btree metadata page has no root",
1313 "%lu"), (u_long)meta_pgno));
1314 ret = DB_VERIFY_BAD;
1318 if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0)
1321 switch (rip->type) {
1324 stflags = flags | DB_ST_TOPLEVEL;
1325 if (F_ISSET(mip, VRFY_HAS_DUPS))
1326 stflags |= DB_ST_DUPOK;
1327 if (F_ISSET(mip, VRFY_HAS_DUPSORT))
1328 stflags |= DB_ST_DUPSORT;
1329 if (F_ISSET(mip, VRFY_HAS_RECNUMS))
1330 stflags |= DB_ST_RECNUM;
1331 ret = __bam_vrfy_subtree(dbp,
1332 vdp, root, lp, rp, stflags, NULL, NULL, NULL);
1337 flags | DB_ST_RECNUM | DB_ST_IS_RECNO | DB_ST_TOPLEVEL;
1338 if (mip->re_len > 0)
1339 stflags |= DB_ST_RELEN;
1340 if ((ret = __bam_vrfy_subtree(dbp, vdp,
1341 root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0)
1344 * Even if mip->re_len > 0, re_len may come back zero if the
1345 * tree is empty. It should be okay to just skip the check in
1346 * this case, as if there are any non-deleted keys at all,
1347 * that should never happen.
1349 if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
1350 EPRINT((env, DB_STR_A("1071",
1351 "Page %lu: recno database has bad re_len %lu",
1352 "%lu %lu"), (u_long)meta_pgno, (u_long)relen));
1353 ret = DB_VERIFY_BAD;
1359 EPRINT((env, DB_STR_A("1072",
1360 "Page %lu: duplicate tree referenced from metadata page",
1361 "%lu"), (u_long)meta_pgno));
1362 ret = DB_VERIFY_BAD;
1365 EPRINT((env, DB_STR_A("1073",
1366 "Page %lu: btree root of incorrect type %lu on metadata page",
1367 "%lu %lu"), (u_long)meta_pgno, (u_long)rip->type));
1368 ret = DB_VERIFY_BAD;
1372 err: if (mip != NULL && ((t_ret =
1373 __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0)
1375 if (rip != NULL && ((t_ret =
1376 __db_vrfy_putpageinfo(env, vdp, rip)) != 0) && ret == 0)
1382 * __bam_vrfy_subtree--
1383 * Verify a subtree (or entire) btree with specified root.
1385 * Note that this is public because it must be called to verify
1386 * offpage dup trees, including from hash.
1388 * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *,
1389 * PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *));
1392 __bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
1397 u_int32_t flags, *levelp, *nrecsp, *relenp;
1405 VRFY_CHILDINFO *child;
1408 db_pgno_t next_pgno, prev_pgno;
1409 db_recno_t child_nrecs, nrecs;
1410 u_int32_t child_level, child_relen, j, level, relen, stflags;
1412 int (*func) __P((DB *, const DBT *, const DBT *));
1413 int isbad, p, ret, t_ret, toplevel;
1415 if (levelp != NULL) /* Don't leave uninitialized on error. */
1423 next_pgno = prev_pgno = PGNO_INVALID;
1426 leaf_type = P_INVALID;
1429 /* Provide feedback on our progress to the application. */
1430 if (!LF_ISSET(DB_SALVAGE))
1431 __db_vrfy_struct_feedback(dbp, vdp);
1433 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1437 level = pip->bt_level;
1439 toplevel = LF_ISSET(DB_ST_TOPLEVEL) ? 1 : 0;
1440 LF_CLR(DB_ST_TOPLEVEL);
1443 * If this is the root, initialize the vdp's prev- and next-pgno
1446 * For each leaf page we hit, we'll want to make sure that
1447 * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is
1448 * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno
1449 * and vdp->prev_pgno to our page number, and the next leaf page in
1450 * line should be able to do the same verification.
1454 * Cache the values stored in the vdp so that if we're an
1455 * auxiliary tree such as an off-page duplicate set, our
1456 * caller's leaf page chain doesn't get lost.
1458 prev_pgno = vdp->prev_pgno;
1459 next_pgno = vdp->next_pgno;
1460 leaf_type = vdp->leaf_type;
1461 vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID;
1462 vdp->leaf_type = P_INVALID;
1466 * We are recursively descending a btree, starting from the root
1467 * and working our way out to the leaves.
1469 * There are four cases we need to deal with:
1470 * 1. pgno is a recno leaf page. Any children are overflows.
1471 * 2. pgno is a duplicate leaf page. Any children
1472 * are overflow pages; traverse them, and then return
1474 * 3. pgno is an ordinary leaf page. Check whether dups are
1475 * allowed, and if so, traverse any off-page dups or
1476 * overflows. Then return nrecs and level.
1477 * 4. pgno is a recno internal page. Recursively check any
1478 * child pages, making sure their levels are one lower
1479 * and their nrecs sum to ours.
1480 * 5. pgno is a btree internal page. Same as #4, plus we
1481 * must verify that for each pair of BINTERNAL entries
1482 * N and N+1, the leftmost item on N's child sorts
1483 * greater than N, and the rightmost item on N's child
1484 * sorts less than N+1.
1486 * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE),
1487 * we need to verify the internal sort order is correct if,
1488 * due to overflow items, we were not able to do so earlier.
1490 switch (pip->type) {
1497 * We're some sort of leaf page; verify
1498 * that our linked list of leaves is consistent.
1500 if (vdp->leaf_type == P_INVALID) {
1502 * First leaf page. Set the type that all its
1503 * successors should be, and verify that our prev_pgno
1506 vdp->leaf_type = pip->type;
1507 if (pip->prev_pgno != PGNO_INVALID)
1511 * Successor leaf page. Check our type, the previous
1512 * page's next_pgno, and our prev_pgno.
1514 if (pip->type != vdp->leaf_type) {
1516 EPRINT((env, DB_STR_A("1074",
1517 "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)",
1518 "%lu %lu %lu"), (u_long)pip->pgno,
1520 (u_long)vdp->leaf_type));
1524 * Don't do the prev/next_pgno checks if we've lost
1525 * leaf pages due to another corruption.
1527 if (!F_ISSET(vdp, VRFY_LEAFCHAIN_BROKEN)) {
1528 if (pip->pgno != vdp->next_pgno) {
1530 EPRINT((env, DB_STR_A("1075",
1531 "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)",
1533 (u_long)vdp->prev_pgno,
1534 (u_long)vdp->next_pgno,
1535 (u_long)pip->pgno));
1537 if (pip->prev_pgno != vdp->prev_pgno) {
1538 bad_prev: isbad = 1;
1539 EPRINT((env, DB_STR_A("1076",
1540 "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)",
1543 (u_long)pip->prev_pgno,
1544 (u_long)vdp->prev_pgno));
1548 vdp->prev_pgno = pip->pgno;
1549 vdp->next_pgno = pip->next_pgno;
1550 F_CLR(vdp, VRFY_LEAFCHAIN_BROKEN);
1553 * Overflow pages are common to all three leaf types;
1554 * traverse the child list, looking for overflows.
1556 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
1558 for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
1559 ret = __db_vrfy_ccnext(cc, &child))
1560 if (child->type == V_OVERFLOW &&
1561 (ret = __db_vrfy_ovfl_structure(dbp, vdp,
1562 child->pgno, child->tlen,
1563 flags | DB_ST_OVFL_LEAF)) != 0) {
1564 if (ret == DB_VERIFY_BAD)
1570 if ((ret = __db_vrfy_ccclose(cc)) != 0)
1575 if (pip->type == P_LRECNO) {
1576 if (!LF_ISSET(DB_ST_IS_RECNO) &&
1577 !(LF_ISSET(DB_ST_DUPOK) &&
1578 !LF_ISSET(DB_ST_DUPSORT))) {
1580 EPRINT((env, DB_STR_A("1077",
1581 "Page %lu: recno leaf page non-recno tree",
1582 "%lu"), (u_long)pgno));
1586 } else if (LF_ISSET(DB_ST_IS_RECNO)) {
1588 * It's a non-recno leaf. Had better not be a recno
1592 EPRINT((env, DB_STR_A("1078",
1593 "Page %lu: non-recno leaf page in recno tree",
1594 "%lu"), (u_long)pgno));
1598 /* Case 2--no more work. */
1599 if (pip->type == P_LDUP)
1604 /* Check if we have any dups. */
1605 if (F_ISSET(pip, VRFY_HAS_DUPS)) {
1606 /* If dups aren't allowed in this btree, trouble. */
1607 if (!LF_ISSET(DB_ST_DUPOK)) {
1609 EPRINT((env, DB_STR_A("1079",
1610 "Page %lu: duplicates in non-dup btree",
1611 "%lu"), (u_long)pgno));
1614 * We correctly have dups. If any are off-page,
1615 * traverse those btrees recursively.
1618 __db_vrfy_childcursor(vdp, &cc)) != 0)
1620 for (ret = __db_vrfy_ccset(cc, pgno, &child);
1622 ret = __db_vrfy_ccnext(cc, &child)) {
1624 flags | DB_ST_RECNUM | DB_ST_DUPSET;
1625 /* Skip any overflow entries. */
1626 if (child->type == V_DUPLICATE) {
1627 if ((ret = __db_vrfy_duptype(
1628 dbp, vdp, child->pgno,
1634 if ((ret = __bam_vrfy_subtree(
1635 dbp, vdp, child->pgno,
1637 stflags | DB_ST_TOPLEVEL,
1638 NULL, NULL, NULL)) != 0) {
1648 if ((ret = __db_vrfy_ccclose(cc)) != 0)
1653 * If VRFY_DUPS_UNSORTED is set,
1654 * DB_ST_DUPSORT had better not be.
1656 if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
1657 LF_ISSET(DB_ST_DUPSORT)) {
1659 EPRINT((env, DB_STR_A("1080",
1660 "Page %lu: unsorted duplicate set in sorted-dup database",
1661 "%lu"), (u_long)pgno));
1668 /* We handle these below. */
1672 * If a P_IBTREE or P_IRECNO contains a reference to an
1673 * invalid page, we'll wind up here; handle it gracefully.
1674 * Note that the code at the "done" label assumes that the
1675 * current page is a btree/recno one of some sort; this
1676 * is not the case here, so we goto err.
1678 * If the page is entirely zeroed, its pip->type will be a lie
1679 * (we assumed it was a hash page, as they're allowed to be
1680 * zeroed); handle this case specially.
1682 if (F_ISSET(pip, VRFY_IS_ALLZEROES))
1683 ZEROPG_ERR_PRINT(env, pgno, DB_STR_P(
1684 "btree or recno page"));
1686 EPRINT((env, DB_STR_A("1081",
1687 "Page %lu: btree or recno page is of inappropriate type %lu",
1688 "%lu %lu"), (u_long)pgno, (u_long)pip->type));
1691 * We probably lost a leaf page (or more if this was an
1692 * internal page) from our prev/next_pgno chain. Flag
1693 * that this is expected; we don't want or need to
1694 * spew error messages about erroneous prev/next_pgnos,
1695 * since that's probably not the real problem.
1697 F_SET(vdp, VRFY_LEAFCHAIN_BROKEN);
1699 ret = DB_VERIFY_BAD;
1704 * Cases 4 & 5: This is a btree or recno internal page. For each child,
1705 * recurse, keeping a running count of nrecs and making sure the level
1706 * is always reasonable.
1708 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
1710 for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
1711 ret = __db_vrfy_ccnext(cc, &child))
1712 if (child->type == V_RECNO) {
1713 if (pip->type != P_IRECNO) {
1714 ret = __db_unknown_path(
1715 env, "__bam_vrfy_subtree");
1718 if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno,
1719 NULL, NULL, flags, &child_level, &child_nrecs,
1720 &child_relen)) != 0) {
1721 if (ret == DB_VERIFY_BAD)
1727 if (LF_ISSET(DB_ST_RELEN)) {
1729 relen = child_relen;
1731 * child_relen may be zero if the child subtree
1734 else if (child_relen > 0 &&
1735 relen != child_relen) {
1737 EPRINT((env, DB_STR_A("1082",
1738 "Page %lu: recno page returned bad re_len %lu",
1739 "%lu %lu"), (u_long)child->pgno,
1740 (u_long)child_relen));
1745 if (LF_ISSET(DB_ST_RECNUM)) {
1746 if (child->nrecs != child_nrecs) {
1748 EPRINT((env, DB_STR_A("1083",
1749 "Page %lu: record count incorrect: actual %lu, in record %lu",
1751 (u_long)child->pgno,
1752 (u_long)child_nrecs,
1753 (u_long)child->nrecs));
1755 nrecs += child_nrecs;
1757 if (isbad == 0 && level != child_level + 1) {
1759 EPRINT((env, DB_STR_A("1084",
1760 "Page %lu: recno level incorrect: got %lu, expected %lu",
1762 (u_long)child->pgno, (u_long)child_level,
1763 (u_long)(level - 1)));
1765 } else if (child->type == V_OVERFLOW) {
1767 * It is possible for one internal page to reference
1768 * a single overflow page twice, if all the items
1769 * in the subtree referenced by slot 0 are deleted,
1770 * then a similar number of items are put back
1771 * before the key that formerly had been in slot 1.
1773 * (Btree doesn't look at the key in slot 0, so the
1774 * fact that the key formerly at slot 1 is the "wrong"
1775 * parent of the stuff in the slot 0 subtree isn't
1776 * really incorrect.)
1778 * __db_vrfy_ovfl_structure is designed to be
1779 * efficiently called multiple times for multiple
1780 * references; call it here as many times as is
1784 /* Otherwise, __db_vrfy_childput would be broken. */
1785 DB_ASSERT(env, child->refcnt >= 1);
1788 * An overflow referenced more than twice here
1791 if (child->refcnt > 2) {
1793 EPRINT((env, DB_STR_A("1085",
1794 "Page %lu: overflow page %lu referenced more than twice from internal page",
1795 "%lu %lu"), (u_long)pgno,
1796 (u_long)child->pgno));
1798 for (j = 0; j < child->refcnt; j++)
1799 if ((ret = __db_vrfy_ovfl_structure(dbp,
1800 vdp, child->pgno, child->tlen,
1802 if (ret == DB_VERIFY_BAD)
1809 if ((ret = __db_vrfy_ccclose(cc)) != 0)
1813 /* We're done with case 4. */
1814 if (pip->type == P_IRECNO)
1818 * Case 5. Btree internal pages.
1819 * As described above, we need to iterate through all the
1820 * items on the page and make sure that our children sort appropriately
1821 * with respect to them.
1823 * For each entry, li will be the "left-hand" key for the entry
1824 * itself, which must sort lower than all entries on its child;
1825 * ri will be the key to its right, which must sort greater.
1828 (ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
1830 for (i = 0; i < pip->entries; i += O_INDX) {
1831 li = GET_BINTERNAL(dbp, h, i);
1832 ri = (i + O_INDX < pip->entries) ?
1833 GET_BINTERNAL(dbp, h, i + O_INDX) : r;
1836 * The leftmost key is forcibly sorted less than all entries,
1837 * so don't bother passing it.
1839 if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno,
1840 i == 0 ? NULL : li, ri, flags, &child_level,
1841 &child_nrecs, NULL)) != 0) {
1842 if (ret == DB_VERIFY_BAD)
1848 if (LF_ISSET(DB_ST_RECNUM)) {
1850 * Keep a running tally on the actual record count so
1851 * we can return it to our parent (if we have one) or
1852 * compare it to the NRECS field if we're a root page.
1854 nrecs += child_nrecs;
1857 * Make sure the actual record count of the child
1858 * is equal to the value in the BINTERNAL structure.
1860 if (li->nrecs != child_nrecs) {
1862 EPRINT((env, DB_STR_A("1086",
1863 "Page %lu: item %lu has incorrect record count of %lu, should be %lu",
1864 "%lu %lu %lu %lu"), (u_long)pgno,
1865 (u_long)i, (u_long)li->nrecs,
1866 (u_long)child_nrecs));
1870 if (level != child_level + 1) {
1872 EPRINT((env, DB_STR_A("1087",
1873 "Page %lu: Btree level incorrect: got %lu, expected %lu",
1874 "%lu %lu %lu"), (u_long)li->pgno,
1875 (u_long)child_level, (u_long)(level - 1)));
1880 leaf: level = LEAFLEVEL;
1881 if (LF_ISSET(DB_ST_RECNUM))
1882 nrecs = pip->rec_cnt;
1885 * We should verify that the record count on a leaf page
1886 * is the sum of the number of keys and the number of
1887 * records in its off-page dups. This requires looking
1888 * at the page again, however, and it may all be changing
1889 * soon, so for now we don't bother.
1892 if (LF_ISSET(DB_ST_RELEN) && relenp)
1893 *relenp = pip->re_len;
1895 done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
1897 * During the page-by-page pass, item order verification was
1898 * not finished due to the presence of overflow items. If
1899 * isbad == 0, though, it's now safe to do so, as we've
1900 * traversed any child overflow pages. Do it.
1902 if (h == NULL && (ret = __memp_fget(mpf, &pgno,
1903 vdp->thread_info, NULL, 0, &h)) != 0)
1905 if ((ret = __bam_vrfy_itemorder(dbp,
1906 vdp, vdp->thread_info, h, pgno, 0, 1, 0, flags)) != 0)
1908 F_CLR(pip, VRFY_INCOMPLETE);
1912 * It's possible to get to this point with a page that has no
1913 * items, but without having detected any sort of failure yet.
1914 * Having zero items is legal if it's a leaf--it may be the
1915 * root page in an empty tree, or the tree may have been
1916 * modified with the DB_REVSPLITOFF flag set (there's no way
1917 * to tell from what's on disk). For an internal page,
1918 * though, having no items is a problem (all internal pages
1919 * must have children).
1921 if (isbad == 0 && ret == 0) {
1922 if (h == NULL && (ret = __memp_fget(mpf, &pgno,
1923 vdp->thread_info, NULL, 0, &h)) != 0)
1926 if (NUM_ENT(h) == 0 && ISINTERNAL(h)) {
1928 EPRINT((env, DB_STR_A("1088",
1929 "Page %lu: internal page is empty and should not be",
1930 "%lu"), (u_long)pgno));
1936 * Our parent has sent us BINTERNAL pointers to parent records
1937 * so that we can verify our place with respect to them. If it's
1938 * appropriate--we have a default sort function--verify this.
1940 if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) &&
1941 pip->type != P_IRECNO && pip->type != P_LRECNO) {
1942 if (h == NULL && (ret = __memp_fget(mpf, &pgno,
1943 vdp->thread_info, NULL, 0, &h)) != 0)
1947 * __bam_vrfy_treeorder needs to know what comparison function
1948 * to use. If DB_ST_DUPSET is set, we're in a duplicate tree
1949 * and we use the duplicate comparison function; otherwise,
1950 * use the btree one. If unset, use the default, of course.
1952 func = LF_ISSET(DB_ST_DUPSET) ? dbp->dup_compare :
1953 ((BTREE *)dbp->bt_internal)->bt_compare;
1955 func = __bam_defcmp;
1957 if ((ret = __bam_vrfy_treeorder(dbp,
1958 vdp->thread_info, h, l, r, func, flags)) != 0) {
1959 if (ret == DB_VERIFY_BAD)
1967 * This is guaranteed to succeed for leaf pages, but no harm done.
1969 * Internal pages below the top level do not store their own
1970 * record numbers, so we skip them.
1972 if (LF_ISSET(DB_ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
1974 EPRINT((env, DB_STR_A("1089",
1975 "Page %lu: bad record count: has %lu records, claims %lu",
1976 "%lu %lu %lu"), (u_long)pgno, (u_long)nrecs,
1977 (u_long)pip->rec_cnt));
1986 if ((ret = __db_vrfy_pgset_get(pgset,
1987 vdp->thread_info, vdp->txn, pgno, &p)) != 0)
1991 EPRINT((env, DB_STR_A("1090",
1992 "Page %lu: linked twice", "%lu"), (u_long)pgno));
1994 __db_vrfy_pgset_inc(pgset, vdp->thread_info, vdp->txn, pgno)) != 0)
1999 * The last page's next_pgno in the leaf chain should have been
2002 if (vdp->next_pgno != PGNO_INVALID) {
2004 EPRINT((env, DB_STR_A("1091",
2005 "Page %lu: unterminated leaf chain",
2006 "%lu"), (u_long)vdp->prev_pgno));
2009 err: if (toplevel) {
2010 /* Restore our caller's settings. */
2011 vdp->next_pgno = next_pgno;
2012 vdp->prev_pgno = prev_pgno;
2013 vdp->leaf_type = leaf_type;
2016 if (h != NULL && (t_ret = __memp_fput(mpf,
2017 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0)
2019 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
2021 if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
2023 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
2027 * __bam_vrfy_treeorder --
2028 * Verify that the lowest key on a page sorts greater than the
2029 * BINTERNAL which points to it (lp), and the highest key
2030 * sorts less than the BINTERNAL above that (rp).
2032 * If lp is NULL, this means that it was the leftmost key on the
2033 * parent, which (regardless of sort function) sorts less than
2034 * all keys. No need to check it.
2036 * If rp is NULL, lp was the highest key on the parent, so there's
2037 * no higher key we must sort less than.
2040 __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
2045 int (*func) __P((DB *, const DBT *, const DBT *));
2056 memset(&dbt, 0, sizeof(DBT));
2057 F_SET(&dbt, DB_DBT_MALLOC);
2061 * Empty pages are sorted correctly by definition. We check
2062 * to see whether they ought to be empty elsewhere; leaf
2063 * pages legally may be.
2065 if (NUM_ENT(h) == 0)
2071 last = NUM_ENT(h) - O_INDX;
2074 last = NUM_ENT(h) - P_INDX;
2077 return (__db_unknown_path(env, "__bam_vrfy_treeorder"));
2080 /* Populate a dummy cursor. */
2081 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
2082 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
2085 * The key on page h, the child page, is more likely to be
2086 * an overflow page, so we pass its offset, rather than lp/rp's,
2087 * into __bam_cmp. This will take advantage of __db_moff.
2091 * Skip first-item check if we're an internal page--the first
2092 * entry on an internal page is treated specially by __bam_cmp,
2093 * so what's on the page shouldn't matter. (Plus, since we're passing
2094 * our page and item 0 as to __bam_cmp, we'll sort before our
2095 * parent and falsely report a failure.)
2097 if (lp != NULL && TYPE(h) != P_IBTREE) {
2098 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
2099 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
2101 if (lp->type == B_KEYDATA) {
2102 dbt.data = lp->data;
2104 } else if (lp->type == B_OVERFLOW) {
2105 bo = (BOVERFLOW *)lp->data;
2106 if ((ret = __db_goff(dbc, &dbt,
2107 bo->tlen, bo->pgno, NULL, NULL)) != 0)
2111 __db_unknown_path(env, "__bam_vrfy_treeorder"));
2113 /* On error, fall through, free if needed, and return. */
2114 if ((ret = __bam_cmp(dbc, &dbt, h, 0, func, &cmp)) == 0) {
2116 EPRINT((env, DB_STR_A("1092",
2117 "Page %lu: first item on page sorted greater than parent entry",
2118 "%lu"), (u_long)PGNO(h)));
2119 ret = DB_VERIFY_BAD;
2122 EPRINT((env, DB_STR_A("1093",
2123 "Page %lu: first item on page had comparison error",
2124 "%lu"), (u_long)PGNO(h)));
2126 if (dbt.data != lp->data)
2127 __os_ufree(env, dbt.data);
2133 if (rp->type == B_KEYDATA) {
2134 dbt.data = rp->data;
2136 } else if (rp->type == B_OVERFLOW) {
2137 bo = (BOVERFLOW *)rp->data;
2138 if ((ret = __db_goff(dbc, &dbt,
2139 bo->tlen, bo->pgno, NULL, NULL)) != 0)
2143 __db_unknown_path(env, "__bam_vrfy_treeorder"));
2145 /* On error, fall through, free if needed, and return. */
2146 if ((ret = __bam_cmp(dbc, &dbt, h, last, func, &cmp)) == 0) {
2148 EPRINT((env, DB_STR_A("1094",
2149 "Page %lu: last item on page sorted greater than parent entry",
2150 "%lu"), (u_long)PGNO(h)));
2151 ret = DB_VERIFY_BAD;
2154 EPRINT((env, DB_STR_A("1095",
2155 "Page %lu: last item on page had comparison error",
2156 "%lu"), (u_long)PGNO(h)));
2158 if (dbt.data != rp->data)
2159 __os_ufree(env, dbt.data);
2167 * Safely dump out anything that looks like a key on an alleged
2168 * btree leaf page, also mark overflow pages as seen. For internal btree
2169 * pages, just mark any overflow pages as seen.
2171 * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *,
2172 * PUBLIC: db_pgno_t, u_int32_t, PAGE *, void *,
2173 * PUBLIC: int (*)(void *, const void *), DBT *, u_int32_t));
2176 __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
2183 int (*callback) __P((void *, const void *));
2189 DBT dbt, repldbt, unknown_key, unknown_data;
2192 db_indx_t i, last, beg, end, *inp;
2194 u_int32_t himark, ovfl_bufsz;
2196 int adj, ret, t_ret, t2_ret;
2197 #ifdef HAVE_COMPRESSION
2198 DBT kcpy, *last_key;
2199 int unknown_dup_key;
2203 ovflbuf = pgmap = NULL;
2204 inp = P_INP(dbp, h);
2206 memset(&dbt, 0, sizeof(DBT));
2207 dbt.flags = DB_DBT_REALLOC;
2208 memset(&repldbt, 0, sizeof(DBT));
2210 #ifdef HAVE_COMPRESSION
2211 memset(&kcpy, 0, sizeof(DBT));
2212 unknown_dup_key = LF_ISSET(DB_SA_UNKNOWNKEY);
2213 last_key = unknown_dup_key ? NULL : key;
2215 LF_CLR(DB_SA_UNKNOWNKEY);
2217 DB_INIT_DBT(unknown_key, "UNKNOWN_KEY", sizeof("UNKNOWN_KEY") - 1);
2218 DB_INIT_DBT(unknown_data, "UNKNOWN_DATA", sizeof("UNKNOWN_DATA") - 1);
2221 * Allocate a buffer for overflow items. Start at one page;
2222 * __db_safe_goff will realloc as needed.
2224 if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
2226 ovfl_bufsz = dbp->pgsize;
2228 if (LF_ISSET(DB_AGGRESSIVE) && (ret =
2229 __os_calloc(env, dbp->pgsize, sizeof(pgmap[0]), &pgmap)) != 0)
2233 * Loop through the inp array, spitting out key/data pairs.
2235 * If we're salvaging normally, loop from 0 through NUM_ENT(h). If
2236 * we're being aggressive, loop until we hit the end of the page --
2237 * NUM_ENT() may be bogus.
2239 himark = dbp->pgsize;
2240 for (i = 0, last = UINT16_MAX;; i += O_INDX) {
2242 * If we're not aggressive, or if we're on an internal page,
2243 * break when we hit NUM_ENT(h).
2245 if ((!LF_ISSET(DB_AGGRESSIVE) ||
2246 pgtype == P_IBTREE) && i >= NUM_ENT(h))
2249 /* Verify the current item. */
2251 __db_vrfy_inpitem(dbp, h, pgno, i, 1, flags, &himark, NULL);
2255 * If this is a btree leaf and we've printed out a key
2256 * but not its associated data item, fix this imbalance
2257 * by printing an "UNKNOWN_DATA".
2259 if (pgtype == P_LBTREE && i % P_INDX == 1 &&
2260 last == i - 1 && (t2_ret = __db_vrfy_prdbt(
2262 0, " ", handle, callback, 0, 0, vdp)) != 0) {
2269 * Don't return DB_VERIFY_FATAL; it's private and means
2270 * only that we can't go on with this page, not with
2271 * the whole database. It's not even an error if we've
2272 * run into it after NUM_ENT(h).
2274 if (t_ret == DB_VERIFY_FATAL) {
2275 if (i < NUM_ENT(h) && ret == 0)
2276 ret = DB_VERIFY_BAD;
2283 * If this returned 0, it's safe to print or (carefully)
2286 * We only print deleted items if DB_AGGRESSIVE is set.
2288 bk = GET_BKEYDATA(dbp, h, i);
2289 if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type))
2293 * If this is a btree leaf and we're about to print out a data
2294 * item for which we didn't print out a key, fix this imbalance
2295 * by printing an "UNKNOWN_KEY".
2297 if (pgtype == P_LBTREE && i % P_INDX == 1 && last != i - 1) {
2298 #ifdef HAVE_COMPRESSION
2301 if ((t_ret = __db_vrfy_prdbt(&unknown_key,
2302 0, " ", handle, callback, 0, 0, vdp)) != 0) {
2311 * We're going to go try to print the next item. If key is
2312 * non-NULL, we're a dup page, so we've got to print the key
2313 * first, unless DB_SA_SKIPFIRSTKEY is set and we're on the
2316 if (key != NULL && (i != 0 || !LF_ISSET(DB_SA_SKIPFIRSTKEY))) {
2317 #ifdef HAVE_COMPRESSION
2318 last_key = unknown_dup_key ? NULL : key;
2320 if ((t_ret = __db_vrfy_prdbt(key,
2321 0, " ", handle, callback, 0, 0, vdp)) != 0) {
2329 switch (B_TYPE(bk->type)) {
2331 if (pgtype == P_IBTREE)
2334 end = beg + BOVERFLOW_SIZE - 1;
2336 * If we're not on a normal btree leaf page, there
2337 * shouldn't be off-page dup sets. Something's
2338 * confused; just drop it, and the code to pick up
2339 * unlinked offpage dup sets will print it out
2340 * with key "UNKNOWN" later.
2342 if (pgtype != P_LBTREE)
2345 bo = (BOVERFLOW *)bk;
2348 * If the page number is unreasonable, or if this is
2349 * supposed to be a key item, output "UNKNOWN_KEY" --
2350 * the best we can do is run into the data items in
2351 * the unlinked offpage dup pass.
2353 if (!IS_VALID_PGNO(bo->pgno) || (i % P_INDX == 0)) {
2354 /* Not much to do on failure. */
2355 #ifdef HAVE_COMPRESSION
2356 if (key == NULL && i % P_INDX == 0)
2359 if ((t_ret = __db_vrfy_prdbt(
2360 i % P_INDX == 0 ? &unknown_key : &unknown_data,
2361 0, " ", handle, callback, 0, 0,vdp)) != 0) {
2369 /* Don't stop on error. */
2370 if ((t_ret = __db_salvage_duptree(dbp,
2371 vdp, bo->pgno, &dbt, handle, callback,
2372 flags | DB_SA_SKIPFIRSTKEY
2373 #ifdef HAVE_COMPRESSION
2374 | (last_key == NULL ? DB_SA_UNKNOWNKEY : 0)
2376 )) != 0 && ret == 0)
2381 if (pgtype == P_IBTREE)
2384 end = (db_indx_t)DB_ALIGN(
2385 beg + bk->len, sizeof(u_int32_t)) - 1;
2387 dbt.data = bk->data;
2390 #ifdef HAVE_COMPRESSION
2391 if (DB_IS_COMPRESSED(dbp) && last_key != NULL &&
2392 (key != NULL || (i % P_INDX == 1))) {
2393 /* Decompress the key/data pair - the key
2394 is in last_key, and the data is in dbt */
2395 if ((t_ret = __bam_compress_salvage(dbp, vdp,
2396 handle, callback, last_key, &dbt)) != 0) {
2397 if (t_ret == DB_VERIFY_FATAL) {
2399 ret = DB_VERIFY_BAD;
2400 if (!LF_ISSET(DB_AGGRESSIVE))
2402 } else if (ret == 0) {
2408 if (key == NULL && i % P_INDX == 0) {
2409 if ((ret = __os_realloc(
2410 env, dbt.size, &kcpy.data)) != 0)
2412 memcpy(kcpy.data, dbt.data, dbt.size);
2413 kcpy.size = dbt.size;
2418 if ((t_ret = __db_vrfy_prdbt(&dbt,
2419 0, " ", handle, callback, 0, 0, vdp)) != 0) {
2424 #ifdef HAVE_COMPRESSION
2429 if (pgtype != P_IBTREE)
2430 end = beg + BOVERFLOW_SIZE - 1;
2431 bo = (BOVERFLOW *)bk;
2434 * Check for replicated overflow keys, so that we only
2435 * call __db_safe_goff once per overflow page. If we
2436 * get the same offset as the previous key just re-use
2439 * P_IBTREE pages will never have replicated overflow
2442 adj = pgtype == P_IBTREE ? O_INDX : P_INDX;
2443 if (pgtype == P_IBTREE) {
2445 * If we're looking at a P_IBTREE, we just want
2446 * to mark the overflow page as seen.
2448 * Note that this call to __db_safe_goff differs
2449 * from the non-P_IBTREE call.
2451 * Only call __db_safe_goff if the overflow page
2454 ovflpg = ((BOVERFLOW *)
2455 ((BINTERNAL *)bk)->data)->pgno;
2456 if (__db_salvage_isdone(vdp, ovflpg) == 0 &&
2457 (t_ret =__db_safe_goff(dbp, vdp, ovflpg,
2459 &ovfl_bufsz, flags)) != 0 && ret == 0)
2462 } else if (i > adj - 1 &&
2463 i % adj == 0 && inp[i] == inp[i - adj])
2466 /* Don't stop on error. */
2467 if ((t_ret = __db_safe_goff(dbp, vdp,
2468 bo->pgno, &dbt, &ovflbuf,
2469 &ovfl_bufsz, flags)) != 0 && ret == 0)
2473 * If this is a key, save it in case the next
2474 * key is a replicated overflow, so we don't
2475 * call __db_safe_goff again. Copy out dbt.data
2476 * in case that pointer gets realloc'd when
2477 * getting a data item.
2479 if (i % P_INDX == 0) {
2481 if ((t_ret = __os_realloc(env,
2483 &repldbt.data)) != 0) {
2488 memcpy(repldbt.data,
2489 dbt.data, dbt.size);
2490 repldbt.size = dbt.size;
2492 if (__os_realloc(env,
2494 &repldbt.data) != 0)
2496 memcpy(repldbt.data,
2499 repldbt.size = unknown_key.size;
2505 #ifdef HAVE_COMPRESSION
2506 if (DB_IS_COMPRESSED(dbp) && last_key && t_ret == 0 &&
2507 (key != NULL || (i % P_INDX == 1))) {
2508 /* Decompress the key/data pair - the key
2509 is in last_key, and the data is in dbt */
2510 if ((t_ret = __bam_compress_salvage(dbp, vdp,
2511 handle, callback, last_key, &dbt)) != 0) {
2512 if (t_ret == DB_VERIFY_FATAL) {
2514 ret = DB_VERIFY_BAD;
2515 if (!LF_ISSET(DB_AGGRESSIVE))
2517 } else if (ret == 0) {
2523 if (key == NULL && i % P_INDX == 0) {
2525 if ((ret = __os_realloc(env,
2526 dbt.size, &kcpy.data)) != 0)
2528 memcpy(kcpy.data, dbt.data,
2530 kcpy.size = dbt.size;
2537 if ((t_ret = __db_vrfy_prdbt(
2538 t_ret == 0 ? &dbt : &unknown_key,
2539 0, " ", handle, callback, 0, 0, vdp))
2542 #ifdef HAVE_COMPRESSION
2548 * We should never get here; __db_vrfy_inpitem should
2549 * not be returning 0 if bk->type is unrecognizable.
2551 t_ret = __db_unknown_path(env, "__bam_salvage");
2558 * If we're being aggressive, mark the beginning and end of
2559 * the item; we'll come back and print whatever "junk" is in
2560 * the gaps in case we had any bogus inp elements and thereby
2563 if (LF_ISSET(DB_AGGRESSIVE) && pgtype != P_IBTREE) {
2564 pgmap[beg] = VRFY_ITEM_BEGIN;
2565 pgmap[end] = VRFY_ITEM_END;
2569 err: if (pgmap != NULL)
2570 __os_free(env, pgmap);
2571 if (ovflbuf != NULL)
2572 __os_free(env, ovflbuf);
2573 if (repldbt.data != NULL)
2574 __os_free(env, repldbt.data);
2575 #ifdef HAVE_COMPRESSION
2576 if (kcpy.data != NULL)
2577 __os_free(env, kcpy.data);
2580 /* Mark this page as done. */
2581 if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
2588 * __bam_salvage_walkdupint --
2589 * Walk a known-good btree or recno internal page which is part of
2590 * a dup tree, calling __db_salvage_duptree on each child page.
2592 * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *,
2593 * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t));
2596 __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
2602 int (*callback) __P((void *, const void *));
2614 for (i = 0; i < NUM_ENT(h); i++) {
2617 bi = GET_BINTERNAL(dbp, h, i);
2618 if ((t_ret = __db_salvage_duptree(dbp,
2619 vdp, bi->pgno, key, handle, callback, flags)) != 0)
2623 ri = GET_RINTERNAL(dbp, h, i);
2624 if ((t_ret = __db_salvage_duptree(dbp,
2625 vdp, ri->pgno, key, handle, callback, flags)) != 0)
2629 return (__db_unknown_path(
2630 env, "__bam_salvage_walkdupint"));
2632 /* Pass DB_SA_SKIPFIRSTKEY, if set, on to the 0th child only. */
2633 flags &= ~LF_ISSET(DB_SA_SKIPFIRSTKEY);
2640 * __bam_meta2pgset --
2641 * Given a known-good meta page, return in pgsetp a 0-terminated list of
2642 * db_pgno_t's corresponding to the pages in the btree.
2644 * We do this by a somewhat sleazy method, to avoid having to traverse the
2645 * btree structure neatly: we walk down the left side to the very
2646 * first leaf page, then we mark all the pages in the chain of
2647 * NEXT_PGNOs (being wary of cycles and invalid ones), then we
2648 * consolidate our scratch array into a nice list, and return. This
2649 * avoids the memory management hassles of recursion and the
2650 * trouble of walking internal pages--they just don't matter, except
2651 * for the left branch.
2653 * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *,
2654 * PUBLIC: u_int32_t, DB *));
2657 __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
2668 db_pgno_t current, p;
2671 DB_ASSERT(dbp->env, pgset != NULL);
2677 for (current = btmeta->root;;) {
2678 if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) {
2679 err_ret = DB_VERIFY_BAD;
2682 if ((ret = __memp_fget(mpf, ¤t,
2683 vdp->thread_info, NULL, 0, &h)) != 0) {
2691 if ((ret = __bam_vrfy(dbp,
2692 vdp, h, current, flags | DB_NOORDERCHK)) != 0) {
2696 if (TYPE(h) == P_IBTREE) {
2697 bi = GET_BINTERNAL(dbp, h, 0);
2699 } else { /* P_IRECNO */
2700 ri = GET_RINTERNAL(dbp, h, 0);
2708 err_ret = DB_VERIFY_BAD;
2712 if ((ret = __memp_fput(mpf,
2713 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
2719 * At this point, current is the pgno of leaf page h, the 0th in the
2720 * tree we're concerned with.
2723 while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
2724 if (h == NULL && (ret = __memp_fget(mpf,
2725 ¤t, vdp->thread_info, NULL, 0, &h)) != 0) {
2730 if ((ret = __db_vrfy_pgset_get(pgset,
2731 vdp->thread_info, vdp->txn, current, (int *)&p)) != 0)
2736 * We've found a cycle. Return success anyway--
2737 * our caller may as well use however much of
2738 * the pgset we've come up with.
2742 if ((ret = __db_vrfy_pgset_inc(
2743 pgset, vdp->thread_info, vdp->txn, current)) != 0)
2746 current = NEXT_PGNO(h);
2747 if ((ret = __memp_fput(mpf,
2748 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
2754 (void)__memp_fput(mpf,
2755 vdp->thread_info, h, DB_PRIORITY_UNCHANGED);
2757 return (ret == 0 ? err_ret : ret);
2761 * __bam_safe_getdata --
2763 * Utility function for __bam_vrfy_itemorder. Safely gets the datum at
2764 * index i, page h, and sticks it in DBT dbt. If ovflok is 1 and i's an
2765 * overflow item, we do a safe_goff to get the item and signal that we need
2766 * to free dbt->data; if ovflok is 0, we leaves the DBT zeroed.
2769 __bam_safe_getdata(dbp, ip, h, i, ovflok, dbt, freedbtp)
2783 memset(dbt, 0, sizeof(DBT));
2786 bk = GET_BKEYDATA(dbp, h, i);
2787 if (B_TYPE(bk->type) == B_OVERFLOW) {
2791 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
2792 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
2794 bo = (BOVERFLOW *)bk;
2795 F_SET(dbt, DB_DBT_MALLOC);
2798 return (__db_goff(dbc, dbt, bo->tlen, bo->pgno, NULL, NULL));
2800 dbt->data = bk->data;
2801 dbt->size = bk->len;