2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1999-2009 Oracle. All rights reserved.
12 #include "dbinc/db_page.h"
13 #include "dbinc/db_verify.h"
14 #include "dbinc/btree.h"
15 #include "dbinc/lock.h"
18 static int __bam_safe_getdata __P((DB *, DB_THREAD_INFO *,
19 PAGE *, u_int32_t, int, DBT *, int *));
20 static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
21 db_indx_t *, u_int32_t));
22 static int __bam_vrfy_treeorder __P((DB *, DB_THREAD_INFO *, PAGE *,
23 BINTERNAL *, BINTERNAL *, int (*)(DB *, const DBT *, const DBT *),
25 static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
26 db_indx_t *, u_int32_t));
30 * Verify the btree-specific part of a metadata page.
32 * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *,
33 * PUBLIC: db_pgno_t, u_int32_t));
36 __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
45 int isbad, t_ret, ret;
51 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
55 * If VRFY_INCOMPLETE is not set, then we didn't come through
56 * __db_vrfy_pagezero and didn't incompletely
57 * check this page--we haven't checked it at all.
58 * Thus we need to call __db_vrfy_meta and check the common fields.
60 * If VRFY_INCOMPLETE is set, we've already done all the same work
61 * in __db_vrfy_pagezero, so skip the check.
63 if (!F_ISSET(pip, VRFY_INCOMPLETE) &&
64 (ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) {
65 if (ret == DB_VERIFY_BAD)
71 /* bt_minkey: must be >= 2; must produce sensible ovflsize */
73 /* avoid division by zero */
74 ovflsize = meta->minkey > 0 ?
75 B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0;
77 if (meta->minkey < 2 ||
78 ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
82 "Page %lu: nonsensical bt_minkey value %lu on metadata page",
83 (u_long)pgno, (u_long)meta->minkey));
85 pip->bt_minkey = meta->minkey;
87 /* re_len: no constraints on this (may be zero or huge--we make rope) */
88 pip->re_pad = meta->re_pad;
89 pip->re_len = meta->re_len;
92 * The root must not be current page or 0 and it must be within
93 * database. If this metadata page is the master meta data page
94 * of the file, then the root page had better be page 1.
97 if (meta->root == PGNO_INVALID ||
98 meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
99 (pgno == PGNO_BASE_MD && meta->root != 1)) {
102 "Page %lu: nonsensical root page %lu on metadata page",
103 (u_long)pgno, (u_long)meta->root));
105 pip->root = meta->root;
108 if (F_ISSET(&meta->dbmeta, BTM_RENUMBER))
109 F_SET(pip, VRFY_IS_RRECNO);
111 if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) {
113 * If this is a master db meta page, it had better not have
116 if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
119 "Page %lu: Btree metadata page has both duplicates and multiple databases",
122 F_SET(pip, VRFY_HAS_SUBDBS);
125 if (F_ISSET(&meta->dbmeta, BTM_DUP))
126 F_SET(pip, VRFY_HAS_DUPS);
127 if (F_ISSET(&meta->dbmeta, BTM_DUPSORT))
128 F_SET(pip, VRFY_HAS_DUPSORT);
129 if (F_ISSET(&meta->dbmeta, BTM_RECNUM))
130 F_SET(pip, VRFY_HAS_RECNUMS);
131 if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
133 "Page %lu: Btree metadata page illegally has both recnums and dups",
138 if (F_ISSET(&meta->dbmeta, BTM_RECNO)) {
139 F_SET(pip, VRFY_IS_RECNO);
140 dbp->type = DB_RECNO;
141 } else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
144 "Page %lu: metadata page has renumber flag set but is not recno",
148 #ifdef HAVE_COMPRESSION
149 if (F_ISSET(&meta->dbmeta, BTM_COMPRESS)) {
150 F_SET(pip, VRFY_HAS_COMPRESS);
151 if (!DB_IS_COMPRESSED(dbp)) {
152 ((BTREE *)dbp->bt_internal)->bt_compress =
154 ((BTREE *)dbp->bt_internal)->bt_decompress =
158 * Copy dup_compare to compress_dup_compare, and use the
159 * compression duplicate compare.
161 if (F_ISSET(pip, VRFY_HAS_DUPSORT)) {
162 if (dbp->dup_compare == NULL)
163 dbp->dup_compare = __bam_defcmp;
164 if (((BTREE *)dbp->bt_internal)->compress_dup_compare
166 ((BTREE *)dbp->bt_internal)->
167 compress_dup_compare = dbp->dup_compare;
168 dbp->dup_compare = __bam_compress_dupcmp;
173 if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_COMPRESS)) {
175 "Page %lu: Btree metadata page illegally has both recnums and compression",
179 if (F_ISSET(pip, VRFY_HAS_DUPS) && !F_ISSET(pip, VRFY_HAS_DUPSORT) &&
180 F_ISSET(pip, VRFY_HAS_COMPRESS)) {
182 "Page %lu: Btree metadata page illegally has both unsorted duplicates%s",
184 " and compression"));
189 if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
191 "Page %lu: recno metadata page specifies duplicates",
196 if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN))
197 F_SET(pip, VRFY_IS_FIXEDLEN);
198 else if (pip->re_len > 0) {
200 * It's wrong to have an re_len if it's not a fixed-length
205 "Page %lu: re_len of %lu in non-fixed-length database",
206 (u_long)pgno, (u_long)pip->re_len));
210 * We do not check that the rest of the page is 0, because it may
211 * not be and may still be correct.
214 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
216 if (LF_ISSET(DB_SALVAGE) &&
217 (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
219 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
224 * Verify a recno leaf page.
226 * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
227 * PUBLIC: u_int32_t));
230 __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
241 int ret, t_ret, isbad;
242 u_int32_t re_len_guess, len;
247 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
250 if (TYPE(h) != P_LRECNO) {
251 ret = __db_unknown_path(env, "__ram_vrfy_leaf");
256 * Verify (and, if relevant, save off) page fields common to
259 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
260 if (ret == DB_VERIFY_BAD)
267 * Verify inp[]. Return immediately if it returns DB_VERIFY_BAD;
268 * further checks are dangerous.
270 if ((ret = __bam_vrfy_inp(dbp,
271 vdp, h, pgno, &pip->entries, flags)) != 0)
274 if (F_ISSET(pip, VRFY_HAS_DUPS)) {
276 "Page %lu: Recno database has dups", (u_long)pgno));
282 * Walk through inp and see if the lengths of all the records are the
283 * same--if so, this may be a fixed-length database, and we want to
284 * save off this value. We know inp to be safe if we've gotten this
288 for (i = 0; i < NUM_ENT(h); i++) {
289 bk = GET_BKEYDATA(dbp, h, i);
290 /* KEYEMPTY. Go on. */
291 if (B_DISSET(bk->type))
293 if (bk->type == B_OVERFLOW)
294 len = ((BOVERFLOW *)bk)->tlen;
295 else if (bk->type == B_KEYDATA)
300 "Page %lu: nonsensical type for item %lu",
301 (u_long)pgno, (u_long)i));
304 if (re_len_guess == 0)
308 * Is this item's len the same as the last one's? If not,
309 * reset to 0 and break--we don't have a single re_len.
310 * Otherwise, go on to the next item.
312 if (re_len_guess != len) {
317 pip->re_len = re_len_guess;
319 /* Save off record count. */
320 pip->rec_cnt = NUM_ENT(h);
322 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
324 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
329 * Verify a btree leaf or internal page.
331 * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
332 * PUBLIC: u_int32_t));
335 __bam_vrfy(dbp, vdp, h, pgno, flags)
344 int ret, t_ret, isbad;
349 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
359 ret = __db_unknown_path(env, "__bam_vrfy");
364 * Verify (and, if relevant, save off) page fields common to
367 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
368 if (ret == DB_VERIFY_BAD)
375 * The record count is, on internal pages, stored in an overloaded
376 * next_pgno field. Save it off; we'll verify it when we check
377 * overall database structure. We could overload the field
378 * in VRFY_PAGEINFO, too, but this seems gross, and space
379 * is not at such a premium.
381 pip->rec_cnt = RE_NREC(h);
386 if (TYPE(h) == P_IRECNO) {
387 if ((ret = __ram_vrfy_inp(dbp,
388 vdp, h, pgno, &pip->entries, flags)) != 0)
390 } else if ((ret = __bam_vrfy_inp(dbp,
391 vdp, h, pgno, &pip->entries, flags)) != 0) {
392 if (ret == DB_VERIFY_BAD)
397 "Page %lu: item order check unsafe: skipping",
399 } else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
400 __bam_vrfy_itemorder(dbp,
401 vdp, vdp->thread_info, h, pgno, 0, 0, 0, flags)) != 0) {
403 * We know that the elements of inp are reasonable.
405 * Check that elements fall in the proper order.
407 if (ret == DB_VERIFY_BAD)
413 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
415 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
420 * Verify that all entries in a P_IRECNO inp[] array are reasonable,
421 * and count them. Note that P_LRECNO uses __bam_vrfy_inp;
422 * P_IRECNOs are a special, and simpler, case, since they have
423 * RINTERNALs rather than BKEYDATA/BINTERNALs.
426 __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
431 db_indx_t *nentriesp;
436 VRFY_CHILDINFO child;
438 int ret, t_ret, isbad;
439 u_int32_t himark, i, offset, nentries;
441 u_int8_t *pagelayout, *p;
445 memset(&child, 0, sizeof(VRFY_CHILDINFO));
449 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
452 if (TYPE(h) != P_IRECNO) {
453 ret = __db_unknown_path(env, "__ram_vrfy_inp");
457 himark = dbp->pgsize;
458 if ((ret = __os_malloc(env, dbp->pgsize, &pagelayout)) != 0)
460 memset(pagelayout, 0, dbp->pgsize);
462 for (i = 0; i < NUM_ENT(h); i++) {
463 if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) {
465 "Page %lu: entries listing %lu overlaps data",
466 (u_long)pgno, (u_long)i));
472 * Check that the item offset is reasonable: it points
473 * somewhere after the inp array and before the end of the
476 if (offset <= (u_int32_t)((u_int8_t *)inp + i -
478 offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) {
481 "Page %lu: bad offset %lu at index %lu",
482 (u_long)pgno, (u_long)offset, (u_long)i));
486 /* Update the high-water mark (what HOFFSET should be) */
492 /* Make sure this RINTERNAL is not multiply referenced. */
493 ri = GET_RINTERNAL(dbp, h, i);
494 if (pagelayout[offset] == 0) {
495 pagelayout[offset] = 1;
496 child.pgno = ri->pgno;
497 child.type = V_RECNO;
498 child.nrecs = ri->nrecs;
499 if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
503 "Page %lu: RINTERNAL structure at offset %lu referenced twice",
504 (u_long)pgno, (u_long)offset));
509 for (p = pagelayout + himark;
510 p < pagelayout + dbp->pgsize;
514 "Page %lu: gap between items at offset %lu",
515 (u_long)pgno, (u_long)(p - pagelayout)));
519 if ((db_indx_t)himark != HOFFSET(h)) {
521 "Page %lu: bad HOFFSET %lu, appears to be %lu",
522 (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark));
526 *nentriesp = nentries;
528 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
530 if (pagelayout != NULL)
531 __os_free(env, pagelayout);
532 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
535 typedef enum { VRFY_ITEM_NOTSET=0, VRFY_ITEM_BEGIN, VRFY_ITEM_END } VRFY_ITEM;
539 * Verify that all entries in inp[] array are reasonable;
543 __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
548 db_indx_t *nentriesp;
554 VRFY_CHILDINFO child;
555 VRFY_ITEM *pagelayout;
557 u_int32_t himark, offset; /*
558 * These would be db_indx_ts
561 u_int32_t i, endoff, nentries;
562 int isbad, initem, isdupitem, ret, t_ret;
565 isbad = isdupitem = 0;
567 memset(&child, 0, sizeof(VRFY_CHILDINFO));
568 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
579 * In the salvager, we might call this from a page which
580 * we merely suspect is a btree page. Otherwise, it
581 * shouldn't get called--if it is, that's a verifier bug.
583 if (LF_ISSET(DB_SALVAGE))
585 ret = __db_unknown_path(env, "__bam_vrfy_inp");
590 * Loop through inp[], the array of items, until we either
591 * run out of entries or collide with the data. Keep track
592 * of h_offset in himark.
594 * For each element in inp[i], make sure it references a region
595 * that starts after the end of the inp array (as defined by
596 * NUM_ENT(h)), ends before the beginning of the page, doesn't
597 * overlap any other regions, and doesn't have a gap between
598 * it and the region immediately after it.
600 himark = dbp->pgsize;
601 if ((ret = __os_calloc(
602 env, dbp->pgsize, sizeof(pagelayout[0]), &pagelayout)) != 0)
604 for (i = 0; i < NUM_ENT(h); i++) {
605 switch (ret = __db_vrfy_inpitem(dbp,
606 h, pgno, i, 1, flags, &himark, &offset)) {
612 case DB_VERIFY_FATAL:
616 DB_ASSERT(env, ret != 0);
621 * We now have a plausible beginning for the item, and we know
622 * its length is safe.
624 * Mark the beginning and end in pagelayout so we can make sure
625 * items have no overlaps or gaps.
627 bk = GET_BKEYDATA(dbp, h, i);
628 if (pagelayout[offset] == VRFY_ITEM_NOTSET)
629 pagelayout[offset] = VRFY_ITEM_BEGIN;
630 else if (pagelayout[offset] == VRFY_ITEM_BEGIN) {
632 * Having two inp entries that point at the same patch
633 * of page is legal if and only if the page is
634 * a btree leaf and they're onpage duplicate keys--
635 * that is, if (i % P_INDX) == 0.
637 if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) {
638 /* Flag for later. */
639 F_SET(pip, VRFY_HAS_DUPS);
641 /* Bump up nentries so we don't undercount. */
645 * We'll check to make sure the end is
651 EPRINT((env, "Page %lu: duplicated item %lu",
652 (u_long)pgno, (u_long)i));
657 * Mark the end. Its location varies with the page type
660 * If the end already has a sign other than 0, do nothing--
661 * it's an overlap that we'll catch later.
663 switch (B_TYPE(bk->type)) {
665 if (TYPE(h) == P_IBTREE)
666 /* It's a BINTERNAL. */
667 endoff = offset + BINTERNAL_SIZE(bk->len) - 1;
669 endoff = offset + BKEYDATA_SIZE(bk->len) - 1;
673 * Flag that we have dups; we'll check whether
674 * that's okay during the structure check.
676 F_SET(pip, VRFY_HAS_DUPS);
680 * Overflow entries on internal pages are stored
681 * as the _data_ of a BINTERNAL; overflow entries
682 * on leaf pages are stored as the entire entry.
685 ((TYPE(h) == P_IBTREE) ?
686 BINTERNAL_SIZE(BOVERFLOW_SIZE) :
691 * We'll complain later; for now, just mark
694 endoff = offset + BKEYDATA_SIZE(0) - 1;
699 * If this is an onpage duplicate key we've seen before,
700 * the end had better coincide too.
702 if (isdupitem && pagelayout[endoff] != VRFY_ITEM_END) {
703 EPRINT((env, "Page %lu: duplicated item %lu",
704 (u_long)pgno, (u_long)i));
706 } else if (pagelayout[endoff] == VRFY_ITEM_NOTSET)
707 pagelayout[endoff] = VRFY_ITEM_END;
711 * There should be no deleted items in a quiescent tree,
714 if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
716 EPRINT((env, "Page %lu: item %lu marked deleted",
717 (u_long)pgno, (u_long)i));
721 * Check the type and such of bk--make sure it's reasonable
724 switch (B_TYPE(bk->type)) {
727 * This is a normal, non-overflow BKEYDATA or BINTERNAL.
728 * The only thing to check is the len, and that's
733 if (TYPE(h) == P_IBTREE) {
736 "Page %lu: duplicate page referenced by internal btree page at item %lu",
737 (u_long)pgno, (u_long)i));
739 } else if (TYPE(h) == P_LRECNO) {
742 "Page %lu: duplicate page referenced by recno page at item %lu",
743 (u_long)pgno, (u_long)i));
748 bo = (TYPE(h) == P_IBTREE) ?
749 (BOVERFLOW *)(((BINTERNAL *)bk)->data) :
752 if (B_TYPE(bk->type) == B_OVERFLOW)
753 /* Make sure tlen is reasonable. */
754 if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
757 "Page %lu: impossible tlen %lu, item %lu",
759 (u_long)bo->tlen, (u_long)i));
760 /* Don't save as a child. */
764 if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
765 bo->pgno == PGNO_INVALID) {
768 "Page %lu: offpage item %lu has bad pgno %lu",
769 (u_long)pgno, (u_long)i, (u_long)bo->pgno));
770 /* Don't save as a child. */
774 child.pgno = bo->pgno;
775 child.type = (B_TYPE(bk->type) == B_OVERFLOW ?
776 V_OVERFLOW : V_DUPLICATE);
777 child.tlen = bo->tlen;
778 if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
783 EPRINT((env, "Page %lu: item %lu of invalid type %lu",
784 (u_long)pgno, (u_long)i, (u_long)B_TYPE(bk->type)));
790 * Now, loop through and make sure the items are contiguous and
794 for (i = himark; i < dbp->pgsize; i++)
796 switch (pagelayout[i]) {
797 case VRFY_ITEM_NOTSET:
798 /* May be just for alignment. */
799 if (i != DB_ALIGN(i, sizeof(u_int32_t)))
804 "Page %lu: gap between items at offset %lu",
805 (u_long)pgno, (u_long)i));
806 /* Find the end of the gap */
807 for (; pagelayout[i + 1] == VRFY_ITEM_NOTSET &&
808 (size_t)(i + 1) < dbp->pgsize; i++)
811 case VRFY_ITEM_BEGIN:
812 /* We've found an item. Check its alignment. */
813 if (i != DB_ALIGN(i, sizeof(u_int32_t))) {
816 "Page %lu: offset %lu unaligned",
817 (u_long)pgno, (u_long)i));
824 * We've hit the end of an item even though
825 * we don't think we're in one; must
830 "Page %lu: overlapping items at offset %lu",
831 (u_long)pgno, (u_long)i));
835 switch (pagelayout[i]) {
836 case VRFY_ITEM_NOTSET:
837 /* In the middle of an item somewhere. Okay. */
840 /* End of an item; switch to out-of-item mode.*/
843 case VRFY_ITEM_BEGIN:
845 * Hit a second item beginning without an
850 "Page %lu: overlapping items at offset %lu",
851 (u_long)pgno, (u_long)i));
855 __os_free(env, pagelayout);
857 /* Verify HOFFSET. */
858 if ((db_indx_t)himark != HOFFSET(h)) {
859 EPRINT((env, "Page %lu: bad HOFFSET %lu, appears to be %lu",
860 (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark));
864 err: if (nentriesp != NULL)
865 *nentriesp = nentries;
867 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
870 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
874 * __bam_vrfy_itemorder --
875 * Make sure the items on a page sort correctly.
877 * Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are
878 * reasonable; be sure that __bam_vrfy_inp has been called first.
880 * If ovflok is set, it also assumes that overflow page chains
881 * hanging off the current page have been sanity-checked, and so we
882 * can use __bam_cmp to verify their ordering. If it is not set,
883 * and we run into an overflow page, carp and return DB_VERIFY_BAD;
884 * we shouldn't be called if any exist.
886 * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, DB_THREAD_INFO *,
887 * PUBLIC: PAGE *, db_pgno_t, u_int32_t, int, int, u_int32_t));
890 __bam_vrfy_itemorder(dbp, vdp, ip, h, pgno, nentries, ovflok, hasdups, flags)
905 DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
909 int adj, cmp, freedup_1, freedup_2, isbad, ret, t_ret;
910 int (*dupfunc) __P((DB *, const DBT *, const DBT *));
911 int (*func) __P((DB *, const DBT *, const DBT *));
912 void *buf1, *buf2, *tmpbuf;
915 * We need to work in the ORDERCHKONLY environment where we might
916 * not have a pip, but we also may need to work in contexts where
917 * NUM_ENT isn't safe.
920 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
922 nentries = pip->entries;
928 bo = NULL; /* Shut up compiler. */
930 memset(&dbta, 0, sizeof(DBT));
931 F_SET(&dbta, DB_DBT_REALLOC);
933 memset(&dbtb, 0, sizeof(DBT));
934 F_SET(&dbtb, DB_DBT_REALLOC);
938 DB_ASSERT(env, !LF_ISSET(DB_NOORDERCHK));
940 dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
941 if (TYPE(h) == P_LDUP)
945 if (dbp->bt_internal != NULL) {
946 bt = (BTREE *)dbp->bt_internal;
947 if (bt->bt_compare != NULL)
948 func = bt->bt_compare;
953 * We alternate our use of dbta and dbtb so that we can walk
954 * through the page key-by-key without copying a dbt twice.
955 * p1 is always the dbt for index i - 1, and p2 for index i.
956 * Reset the data pointers in case we are retrying.
964 * Loop through the entries. nentries ought to contain the
965 * actual count, and so is a safe way to terminate the loop; whether
966 * we inc. by one or two depends on whether we're a leaf page--
967 * on a leaf page, we care only about keys. On internal pages
968 * and LDUP pages, we want to check the order of all entries.
970 * Note that on IBTREE pages or the index page of a partitioned
971 * database, we start with item 1, since item 0 doesn't get looked
975 adj = (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX;
976 for (i = (TYPE(h) == P_IBTREE || dbp->p_internal != NULL) ? adj : 0;
977 i < nentries; i += adj) {
979 * Put key i-1, now in p2, into p1, by swapping DBTs and bufs.
993 bi = GET_BINTERNAL(dbp, h, i);
994 if (B_TYPE(bi->type) == B_OVERFLOW) {
995 bo = (BOVERFLOW *)(bi->data);
1003 * The leftmost key on an internal page must be
1004 * len 0, since it's just a placeholder and
1005 * automatically sorts less than all keys.
1008 * This criterion does not currently hold!
1009 * See todo list item #1686. Meanwhile, it's harmless
1010 * to just not check for it.
1013 if (i == 0 && bi->len != 0) {
1016 "Page %lu: lowest key on internal page of nonzero length",
1023 bk = GET_BKEYDATA(dbp, h, i);
1024 if (B_TYPE(bk->type) == B_OVERFLOW) {
1025 bo = (BOVERFLOW *)bk;
1028 p2->data = bk->data;
1034 * This means our caller screwed up and sent us
1035 * an inappropriate page.
1037 ret = __db_unknown_path(env, "__bam_vrfy_itemorder");
1043 * If ovflok != 1, we can't safely go chasing
1044 * overflow pages with the normal routines now;
1045 * they might be unsafe or nonexistent. Mark this
1046 * page as incomplete and return.
1048 * Note that we don't need to worry about freeing
1049 * buffers, since they can't have been allocated
1050 * if overflow items are unsafe.
1052 overflow: if (!ovflok) {
1053 F_SET(pip, VRFY_INCOMPLETE);
1058 * Overflow items are safe to chase. Do so.
1059 * Fetch the overflow item into p2->data,
1060 * NULLing it or reallocing it as appropriate.
1062 * (We set p2->data to buf2 before the call
1063 * so we're sure to realloc if we can and if p2
1064 * was just pointing at a non-overflow item.)
1067 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
1068 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
1070 if ((ret = __db_goff(dbc,
1071 p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
1074 "Page %lu: error %lu in fetching overflow item %lu",
1075 (u_long)pgno, (u_long)ret, (u_long)i));
1077 /* In case it got realloc'ed and thus changed. */
1081 /* Compare with the last key. */
1082 if (p1->data != NULL && p2->data != NULL) {
1083 cmp = inp[i] == inp[i - adj] ? 0 : func(dbp, p1, p2);
1085 /* comparison succeeded */
1088 * If we are looking at an internal page, we
1089 * don't know whether it is part of the main
1090 * database or in an off-page-duplicate tree.
1091 * If the main comparator fails, retry with
1092 * the duplicate comparator.
1094 if (TYPE(h) == P_IBTREE && func != dupfunc) {
1101 "Page %lu: out-of-order key at entry %lu",
1102 (u_long)pgno, (u_long)i));
1104 } else if (cmp == 0) {
1105 if (inp[i] != inp[i - adj]) {
1107 if (TYPE(h) == P_IBTREE &&
1114 "Page %lu: non-dup dup key at entry %lu",
1115 (u_long)pgno, (u_long)i));
1118 * If they compared equally, this
1119 * had better be a (sub)database with dups.
1120 * Mark it so we can check during the
1124 F_SET(pip, VRFY_HAS_DUPS);
1125 else if (hasdups == 0) {
1127 if (TYPE(h) == P_IBTREE &&
1134 "Page %lu: database with no duplicates has duplicated keys",
1139 * If we're a btree leaf, check to see
1140 * if the data items of these on-page dups are
1141 * in sorted order. If not, flag this, so
1142 * that we can make sure during the
1143 * structure checks that the DUPSORT flag
1146 * At this point i points to a duplicate key.
1147 * Compare the datum before it (same key)
1148 * to the datum after it, i.e. i-1 to i+1.
1150 if (TYPE(h) == P_LBTREE) {
1152 * Unsafe; continue and we'll pick
1153 * up the bogus nentries later.
1155 if (i + 1 >= (db_indx_t)nentries)
1159 * We don't bother with clever memory
1160 * management with on-page dups,
1161 * as it's only really a big win
1162 * in the overflow case, and overflow
1163 * dups are probably (?) rare.
1165 if (((ret = __bam_safe_getdata(dbp,
1166 ip, h, i - 1, ovflok,
1167 &dup_1, &freedup_1)) != 0) ||
1168 ((ret = __bam_safe_getdata(dbp,
1169 ip, h, i + 1, ovflok,
1170 &dup_2, &freedup_2)) != 0))
1174 * If either of the data are NULL,
1175 * it's because they're overflows and
1176 * it's not safe to chase them now.
1177 * Mark an incomplete and return.
1179 if (dup_1.data == NULL ||
1180 dup_2.data == NULL) {
1181 DB_ASSERT(env, !ovflok);
1182 F_SET(pip, VRFY_INCOMPLETE);
1187 * If the dups are out of order,
1188 * flag this. It's not an error
1189 * until we do the structure check
1190 * and see whether DUPSORT is set.
1192 if (dupfunc(dbp, &dup_1, &dup_2) > 0)
1193 F_SET(pip, VRFY_DUPS_UNSORTED);
1196 __os_ufree(env, dup_1.data);
1198 __os_ufree(env, dup_2.data);
1204 err: if (pip != NULL && ((t_ret =
1205 __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0)
1209 __os_ufree(env, buf1);
1211 __os_ufree(env, buf2);
1213 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1217 * __bam_vrfy_structure --
1218 * Verify the tree structure of a btree database (including the master
1219 * database containing subdbs).
1221 * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
1222 * PUBLIC: void *, void *, u_int32_t));
1225 __bam_vrfy_structure(dbp, vdp, meta_pgno, lp, rp, flags)
1228 db_pgno_t meta_pgno;
1234 VRFY_PAGEINFO *mip, *rip;
1237 u_int32_t nrecs, level, relen, stflags;
1243 if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0)
1246 if ((ret = __db_vrfy_pgset_get(pgset,
1247 vdp->thread_info, meta_pgno, (int *)&p)) != 0)
1251 "Page %lu: btree metadata page observed twice",
1252 (u_long)meta_pgno));
1253 ret = DB_VERIFY_BAD;
1257 __db_vrfy_pgset_inc(pgset, vdp->thread_info, meta_pgno)) != 0)
1264 "Page %lu: btree metadata page has no root",
1265 (u_long)meta_pgno));
1266 ret = DB_VERIFY_BAD;
1270 if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0)
1273 switch (rip->type) {
1276 stflags = flags | DB_ST_TOPLEVEL;
1277 if (F_ISSET(mip, VRFY_HAS_DUPS))
1278 stflags |= DB_ST_DUPOK;
1279 if (F_ISSET(mip, VRFY_HAS_DUPSORT))
1280 stflags |= DB_ST_DUPSORT;
1281 if (F_ISSET(mip, VRFY_HAS_RECNUMS))
1282 stflags |= DB_ST_RECNUM;
1283 ret = __bam_vrfy_subtree(dbp,
1284 vdp, root, lp, rp, stflags, NULL, NULL, NULL);
1289 flags | DB_ST_RECNUM | DB_ST_IS_RECNO | DB_ST_TOPLEVEL;
1290 if (mip->re_len > 0)
1291 stflags |= DB_ST_RELEN;
1292 if ((ret = __bam_vrfy_subtree(dbp, vdp,
1293 root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0)
1296 * Even if mip->re_len > 0, re_len may come back zero if the
1297 * tree is empty. It should be okay to just skip the check in
1298 * this case, as if there are any non-deleted keys at all,
1299 * that should never happen.
1301 if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
1303 "Page %lu: recno database has bad re_len %lu",
1304 (u_long)meta_pgno, (u_long)relen));
1305 ret = DB_VERIFY_BAD;
1312 "Page %lu: duplicate tree referenced from metadata page",
1313 (u_long)meta_pgno));
1314 ret = DB_VERIFY_BAD;
1318 "Page %lu: btree root of incorrect type %lu on metadata page",
1319 (u_long)meta_pgno, (u_long)rip->type));
1320 ret = DB_VERIFY_BAD;
1324 err: if (mip != NULL && ((t_ret =
1325 __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0)
1327 if (rip != NULL && ((t_ret =
1328 __db_vrfy_putpageinfo(env, vdp, rip)) != 0) && ret == 0)
1334 * __bam_vrfy_subtree--
1335 * Verify a subtree (or entire) btree with specified root.
1337 * Note that this is public because it must be called to verify
1338 * offpage dup trees, including from hash.
1340 * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *,
1341 * PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *));
1344 __bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
1349 u_int32_t flags, *levelp, *nrecsp, *relenp;
1357 VRFY_CHILDINFO *child;
1360 db_pgno_t next_pgno, prev_pgno;
1361 db_recno_t child_nrecs, nrecs;
1362 u_int32_t child_level, child_relen, j, level, relen, stflags;
1364 int (*func) __P((DB *, const DBT *, const DBT *));
1365 int isbad, p, ret, t_ret, toplevel;
1367 if (levelp != NULL) /* Don't leave uninitialized on error. */
1375 next_pgno = prev_pgno = PGNO_INVALID;
1378 leaf_type = P_INVALID;
1381 /* Provide feedback on our progress to the application. */
1382 if (!LF_ISSET(DB_SALVAGE))
1383 __db_vrfy_struct_feedback(dbp, vdp);
1385 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1389 level = pip->bt_level;
1391 toplevel = LF_ISSET(DB_ST_TOPLEVEL) ? 1 : 0;
1392 LF_CLR(DB_ST_TOPLEVEL);
1395 * If this is the root, initialize the vdp's prev- and next-pgno
1398 * For each leaf page we hit, we'll want to make sure that
1399 * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is
1400 * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno
1401 * and vdp->prev_pgno to our page number, and the next leaf page in
1402 * line should be able to do the same verification.
1406 * Cache the values stored in the vdp so that if we're an
1407 * auxiliary tree such as an off-page duplicate set, our
1408 * caller's leaf page chain doesn't get lost.
1410 prev_pgno = vdp->prev_pgno;
1411 next_pgno = vdp->next_pgno;
1412 leaf_type = vdp->leaf_type;
1413 vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID;
1414 vdp->leaf_type = P_INVALID;
1418 * We are recursively descending a btree, starting from the root
1419 * and working our way out to the leaves.
1421 * There are four cases we need to deal with:
1422 * 1. pgno is a recno leaf page. Any children are overflows.
1423 * 2. pgno is a duplicate leaf page. Any children
1424 * are overflow pages; traverse them, and then return
1426 * 3. pgno is an ordinary leaf page. Check whether dups are
1427 * allowed, and if so, traverse any off-page dups or
1428 * overflows. Then return nrecs and level.
1429 * 4. pgno is a recno internal page. Recursively check any
1430 * child pages, making sure their levels are one lower
1431 * and their nrecs sum to ours.
1432 * 5. pgno is a btree internal page. Same as #4, plus we
1433 * must verify that for each pair of BINTERNAL entries
1434 * N and N+1, the leftmost item on N's child sorts
1435 * greater than N, and the rightmost item on N's child
1436 * sorts less than N+1.
1438 * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE),
1439 * we need to verify the internal sort order is correct if,
1440 * due to overflow items, we were not able to do so earlier.
1442 switch (pip->type) {
1449 * We're some sort of leaf page; verify
1450 * that our linked list of leaves is consistent.
1452 if (vdp->leaf_type == P_INVALID) {
1454 * First leaf page. Set the type that all its
1455 * successors should be, and verify that our prev_pgno
1458 vdp->leaf_type = pip->type;
1459 if (pip->prev_pgno != PGNO_INVALID)
1463 * Successor leaf page. Check our type, the previous
1464 * page's next_pgno, and our prev_pgno.
1466 if (pip->type != vdp->leaf_type) {
1469 "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)",
1470 (u_long)pip->pgno, (u_long)pip->type,
1471 (u_long)vdp->leaf_type));
1475 * Don't do the prev/next_pgno checks if we've lost
1476 * leaf pages due to another corruption.
1478 if (!F_ISSET(vdp, VRFY_LEAFCHAIN_BROKEN)) {
1479 if (pip->pgno != vdp->next_pgno) {
1482 "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)",
1483 (u_long)vdp->prev_pgno,
1484 (u_long)vdp->next_pgno,
1485 (u_long)pip->pgno));
1487 if (pip->prev_pgno != vdp->prev_pgno) {
1488 bad_prev: isbad = 1;
1490 "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)",
1492 (u_long)pip->prev_pgno,
1493 (u_long)vdp->prev_pgno));
1497 vdp->prev_pgno = pip->pgno;
1498 vdp->next_pgno = pip->next_pgno;
1499 F_CLR(vdp, VRFY_LEAFCHAIN_BROKEN);
1502 * Overflow pages are common to all three leaf types;
1503 * traverse the child list, looking for overflows.
1505 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
1507 for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
1508 ret = __db_vrfy_ccnext(cc, &child))
1509 if (child->type == V_OVERFLOW &&
1510 (ret = __db_vrfy_ovfl_structure(dbp, vdp,
1511 child->pgno, child->tlen,
1512 flags | DB_ST_OVFL_LEAF)) != 0) {
1513 if (ret == DB_VERIFY_BAD)
1519 if ((ret = __db_vrfy_ccclose(cc)) != 0)
1524 if (pip->type == P_LRECNO) {
1525 if (!LF_ISSET(DB_ST_IS_RECNO) &&
1526 !(LF_ISSET(DB_ST_DUPOK) &&
1527 !LF_ISSET(DB_ST_DUPSORT))) {
1530 "Page %lu: recno leaf page non-recno tree",
1535 } else if (LF_ISSET(DB_ST_IS_RECNO)) {
1537 * It's a non-recno leaf. Had better not be a recno
1542 "Page %lu: non-recno leaf page in recno tree",
1547 /* Case 2--no more work. */
1548 if (pip->type == P_LDUP)
1553 /* Check if we have any dups. */
1554 if (F_ISSET(pip, VRFY_HAS_DUPS)) {
1555 /* If dups aren't allowed in this btree, trouble. */
1556 if (!LF_ISSET(DB_ST_DUPOK)) {
1559 "Page %lu: duplicates in non-dup btree",
1563 * We correctly have dups. If any are off-page,
1564 * traverse those btrees recursively.
1567 __db_vrfy_childcursor(vdp, &cc)) != 0)
1569 for (ret = __db_vrfy_ccset(cc, pgno, &child);
1571 ret = __db_vrfy_ccnext(cc, &child)) {
1573 flags | DB_ST_RECNUM | DB_ST_DUPSET;
1574 /* Skip any overflow entries. */
1575 if (child->type == V_DUPLICATE) {
1576 if ((ret = __db_vrfy_duptype(
1577 dbp, vdp, child->pgno,
1583 if ((ret = __bam_vrfy_subtree(
1584 dbp, vdp, child->pgno,
1586 stflags | DB_ST_TOPLEVEL,
1587 NULL, NULL, NULL)) != 0) {
1597 if ((ret = __db_vrfy_ccclose(cc)) != 0)
1602 * If VRFY_DUPS_UNSORTED is set,
1603 * DB_ST_DUPSORT had better not be.
1605 if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
1606 LF_ISSET(DB_ST_DUPSORT)) {
1609 "Page %lu: unsorted duplicate set in sorted-dup database",
1617 /* We handle these below. */
1621 * If a P_IBTREE or P_IRECNO contains a reference to an
1622 * invalid page, we'll wind up here; handle it gracefully.
1623 * Note that the code at the "done" label assumes that the
1624 * current page is a btree/recno one of some sort; this
1625 * is not the case here, so we goto err.
1627 * If the page is entirely zeroed, its pip->type will be a lie
1628 * (we assumed it was a hash page, as they're allowed to be
1629 * zeroed); handle this case specially.
1631 if (F_ISSET(pip, VRFY_IS_ALLZEROES))
1632 ZEROPG_ERR_PRINT(env, pgno, "btree or recno page");
1635 "Page %lu: btree or recno page is of inappropriate type %lu",
1636 (u_long)pgno, (u_long)pip->type));
1639 * We probably lost a leaf page (or more if this was an
1640 * internal page) from our prev/next_pgno chain. Flag
1641 * that this is expected; we don't want or need to
1642 * spew error messages about erroneous prev/next_pgnos,
1643 * since that's probably not the real problem.
1645 F_SET(vdp, VRFY_LEAFCHAIN_BROKEN);
1647 ret = DB_VERIFY_BAD;
1652 * Cases 4 & 5: This is a btree or recno internal page. For each child,
1653 * recurse, keeping a running count of nrecs and making sure the level
1654 * is always reasonable.
1656 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
1658 for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
1659 ret = __db_vrfy_ccnext(cc, &child))
1660 if (child->type == V_RECNO) {
1661 if (pip->type != P_IRECNO) {
1662 ret = __db_unknown_path(
1663 env, "__bam_vrfy_subtree");
1666 if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno,
1667 NULL, NULL, flags, &child_level, &child_nrecs,
1668 &child_relen)) != 0) {
1669 if (ret == DB_VERIFY_BAD)
1675 if (LF_ISSET(DB_ST_RELEN)) {
1677 relen = child_relen;
1679 * child_relen may be zero if the child subtree
1682 else if (child_relen > 0 &&
1683 relen != child_relen) {
1686 "Page %lu: recno page returned bad re_len %lu",
1687 (u_long)child->pgno,
1688 (u_long)child_relen));
1693 if (LF_ISSET(DB_ST_RECNUM)) {
1694 if (child->nrecs != child_nrecs) {
1697 "Page %lu: record count incorrect: actual %lu, in record %lu",
1698 (u_long)child->pgno,
1699 (u_long)child_nrecs,
1700 (u_long)child->nrecs));
1702 nrecs += child_nrecs;
1704 if (isbad == 0 && level != child_level + 1) {
1707 "Page %lu: recno level incorrect: got %lu, expected %lu",
1708 (u_long)child->pgno, (u_long)child_level,
1709 (u_long)(level - 1)));
1711 } else if (child->type == V_OVERFLOW) {
1713 * It is possible for one internal page to reference
1714 * a single overflow page twice, if all the items
1715 * in the subtree referenced by slot 0 are deleted,
1716 * then a similar number of items are put back
1717 * before the key that formerly had been in slot 1.
1719 * (Btree doesn't look at the key in slot 0, so the
1720 * fact that the key formerly at slot 1 is the "wrong"
1721 * parent of the stuff in the slot 0 subtree isn't
1722 * really incorrect.)
1724 * __db_vrfy_ovfl_structure is designed to be
1725 * efficiently called multiple times for multiple
1726 * references; call it here as many times as is
1730 /* Otherwise, __db_vrfy_childput would be broken. */
1731 DB_ASSERT(env, child->refcnt >= 1);
1734 * An overflow referenced more than twice here
1737 if (child->refcnt > 2) {
1740 "Page %lu: overflow page %lu referenced more than twice from internal page",
1741 (u_long)pgno, (u_long)child->pgno));
1743 for (j = 0; j < child->refcnt; j++)
1744 if ((ret = __db_vrfy_ovfl_structure(dbp,
1745 vdp, child->pgno, child->tlen,
1747 if (ret == DB_VERIFY_BAD)
1754 if ((ret = __db_vrfy_ccclose(cc)) != 0)
1758 /* We're done with case 4. */
1759 if (pip->type == P_IRECNO)
1763 * Case 5. Btree internal pages.
1764 * As described above, we need to iterate through all the
1765 * items on the page and make sure that our children sort appropriately
1766 * with respect to them.
1768 * For each entry, li will be the "left-hand" key for the entry
1769 * itself, which must sort lower than all entries on its child;
1770 * ri will be the key to its right, which must sort greater.
1773 (ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
1775 for (i = 0; i < pip->entries; i += O_INDX) {
1776 li = GET_BINTERNAL(dbp, h, i);
1777 ri = (i + O_INDX < pip->entries) ?
1778 GET_BINTERNAL(dbp, h, i + O_INDX) : r;
1781 * The leftmost key is forcibly sorted less than all entries,
1782 * so don't bother passing it.
1784 if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno,
1785 i == 0 ? NULL : li, ri, flags, &child_level,
1786 &child_nrecs, NULL)) != 0) {
1787 if (ret == DB_VERIFY_BAD)
1793 if (LF_ISSET(DB_ST_RECNUM)) {
1795 * Keep a running tally on the actual record count so
1796 * we can return it to our parent (if we have one) or
1797 * compare it to the NRECS field if we're a root page.
1799 nrecs += child_nrecs;
1802 * Make sure the actual record count of the child
1803 * is equal to the value in the BINTERNAL structure.
1805 if (li->nrecs != child_nrecs) {
1808 "Page %lu: item %lu has incorrect record count of %lu, should be %lu",
1809 (u_long)pgno, (u_long)i, (u_long)li->nrecs,
1810 (u_long)child_nrecs));
1814 if (level != child_level + 1) {
1817 "Page %lu: Btree level incorrect: got %lu, expected %lu",
1819 (u_long)child_level, (u_long)(level - 1)));
1824 leaf: level = LEAFLEVEL;
1825 if (LF_ISSET(DB_ST_RECNUM))
1826 nrecs = pip->rec_cnt;
1829 * We should verify that the record count on a leaf page
1830 * is the sum of the number of keys and the number of
1831 * records in its off-page dups. This requires looking
1832 * at the page again, however, and it may all be changing
1833 * soon, so for now we don't bother.
1836 if (LF_ISSET(DB_ST_RELEN) && relenp)
1837 *relenp = pip->re_len;
1839 done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
1841 * During the page-by-page pass, item order verification was
1842 * not finished due to the presence of overflow items. If
1843 * isbad == 0, though, it's now safe to do so, as we've
1844 * traversed any child overflow pages. Do it.
1846 if (h == NULL && (ret = __memp_fget(mpf, &pgno,
1847 vdp->thread_info, NULL, 0, &h)) != 0)
1849 if ((ret = __bam_vrfy_itemorder(dbp,
1850 vdp, vdp->thread_info, h, pgno, 0, 1, 0, flags)) != 0)
1852 F_CLR(pip, VRFY_INCOMPLETE);
1856 * It's possible to get to this point with a page that has no
1857 * items, but without having detected any sort of failure yet.
1858 * Having zero items is legal if it's a leaf--it may be the
1859 * root page in an empty tree, or the tree may have been
1860 * modified with the DB_REVSPLITOFF flag set (there's no way
1861 * to tell from what's on disk). For an internal page,
1862 * though, having no items is a problem (all internal pages
1863 * must have children).
1865 if (isbad == 0 && ret == 0) {
1866 if (h == NULL && (ret = __memp_fget(mpf, &pgno,
1867 vdp->thread_info, NULL, 0, &h)) != 0)
1870 if (NUM_ENT(h) == 0 && ISINTERNAL(h)) {
1873 "Page %lu: internal page is empty and should not be",
1880 * Our parent has sent us BINTERNAL pointers to parent records
1881 * so that we can verify our place with respect to them. If it's
1882 * appropriate--we have a default sort function--verify this.
1884 if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) &&
1885 pip->type != P_IRECNO && pip->type != P_LRECNO) {
1886 if (h == NULL && (ret = __memp_fget(mpf, &pgno,
1887 vdp->thread_info, NULL, 0, &h)) != 0)
1891 * __bam_vrfy_treeorder needs to know what comparison function
1892 * to use. If DB_ST_DUPSET is set, we're in a duplicate tree
1893 * and we use the duplicate comparison function; otherwise,
1894 * use the btree one. If unset, use the default, of course.
1896 func = LF_ISSET(DB_ST_DUPSET) ? dbp->dup_compare :
1897 ((BTREE *)dbp->bt_internal)->bt_compare;
1899 func = __bam_defcmp;
1901 if ((ret = __bam_vrfy_treeorder(dbp,
1902 vdp->thread_info, h, l, r, func, flags)) != 0) {
1903 if (ret == DB_VERIFY_BAD)
1911 * This is guaranteed to succeed for leaf pages, but no harm done.
1913 * Internal pages below the top level do not store their own
1914 * record numbers, so we skip them.
1916 if (LF_ISSET(DB_ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
1919 "Page %lu: bad record count: has %lu records, claims %lu",
1920 (u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt));
1929 if ((ret = __db_vrfy_pgset_get(pgset,
1930 vdp->thread_info, pgno, &p)) != 0)
1934 EPRINT((env, "Page %lu: linked twice", (u_long)pgno));
1936 __db_vrfy_pgset_inc(pgset, vdp->thread_info, pgno)) != 0)
1941 * The last page's next_pgno in the leaf chain should have been
1944 if (vdp->next_pgno != PGNO_INVALID) {
1946 EPRINT((env, "Page %lu: unterminated leaf chain",
1947 (u_long)vdp->prev_pgno));
1950 err: if (toplevel) {
1951 /* Restore our caller's settings. */
1952 vdp->next_pgno = next_pgno;
1953 vdp->prev_pgno = prev_pgno;
1954 vdp->leaf_type = leaf_type;
1957 if (h != NULL && (t_ret = __memp_fput(mpf,
1958 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0)
1960 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1962 if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
1964 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1968 * __bam_vrfy_treeorder --
1969 * Verify that the lowest key on a page sorts greater than the
1970 * BINTERNAL which points to it (lp), and the highest key
1971 * sorts less than the BINTERNAL above that (rp).
1973 * If lp is NULL, this means that it was the leftmost key on the
1974 * parent, which (regardless of sort function) sorts less than
1975 * all keys. No need to check it.
1977 * If rp is NULL, lp was the highest key on the parent, so there's
1978 * no higher key we must sort less than.
1981 __bam_vrfy_treeorder(dbp, ip, h, lp, rp, func, flags)
1986 int (*func) __P((DB *, const DBT *, const DBT *));
1997 memset(&dbt, 0, sizeof(DBT));
1998 F_SET(&dbt, DB_DBT_MALLOC);
2002 * Empty pages are sorted correctly by definition. We check
2003 * to see whether they ought to be empty elsewhere; leaf
2004 * pages legally may be.
2006 if (NUM_ENT(h) == 0)
2012 last = NUM_ENT(h) - O_INDX;
2015 last = NUM_ENT(h) - P_INDX;
2018 return (__db_unknown_path(env, "__bam_vrfy_treeorder"));
2021 /* Populate a dummy cursor. */
2022 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
2023 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
2026 * The key on page h, the child page, is more likely to be
2027 * an overflow page, so we pass its offset, rather than lp/rp's,
2028 * into __bam_cmp. This will take advantage of __db_moff.
2032 * Skip first-item check if we're an internal page--the first
2033 * entry on an internal page is treated specially by __bam_cmp,
2034 * so what's on the page shouldn't matter. (Plus, since we're passing
2035 * our page and item 0 as to __bam_cmp, we'll sort before our
2036 * parent and falsely report a failure.)
2038 if (lp != NULL && TYPE(h) != P_IBTREE) {
2039 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
2040 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
2042 if (lp->type == B_KEYDATA) {
2043 dbt.data = lp->data;
2045 } else if (lp->type == B_OVERFLOW) {
2046 bo = (BOVERFLOW *)lp->data;
2047 if ((ret = __db_goff(dbc, &dbt,
2048 bo->tlen, bo->pgno, NULL, NULL)) != 0)
2052 __db_unknown_path(env, "__bam_vrfy_treeorder"));
2054 /* On error, fall through, free if needed, and return. */
2055 if ((ret = __bam_cmp(dbc, &dbt, h, 0, func, &cmp)) == 0) {
2058 "Page %lu: first item on page sorted greater than parent entry",
2060 ret = DB_VERIFY_BAD;
2064 "Page %lu: first item on page had comparison error",
2067 if (dbt.data != lp->data)
2068 __os_ufree(env, dbt.data);
2074 if (rp->type == B_KEYDATA) {
2075 dbt.data = rp->data;
2077 } else if (rp->type == B_OVERFLOW) {
2078 bo = (BOVERFLOW *)rp->data;
2079 if ((ret = __db_goff(dbc, &dbt,
2080 bo->tlen, bo->pgno, NULL, NULL)) != 0)
2084 __db_unknown_path(env, "__bam_vrfy_treeorder"));
2086 /* On error, fall through, free if needed, and return. */
2087 if ((ret = __bam_cmp(dbc, &dbt, h, last, func, &cmp)) == 0) {
2090 "Page %lu: last item on page sorted greater than parent entry",
2092 ret = DB_VERIFY_BAD;
2096 "Page %lu: last item on page had comparison error",
2099 if (dbt.data != rp->data)
2100 __os_ufree(env, dbt.data);
2108 * Safely dump out anything that looks like a key on an alleged
2109 * btree leaf page, also mark overflow pages as seen. For internal btree
2110 * pages, just mark any overflow pages as seen.
2112 * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *,
2113 * PUBLIC: db_pgno_t, u_int32_t, PAGE *, void *,
2114 * PUBLIC: int (*)(void *, const void *), DBT *, u_int32_t));
2117 __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
2124 int (*callback) __P((void *, const void *));
2130 DBT dbt, repldbt, unknown_key, unknown_data;
2133 db_indx_t i, last, beg, end, *inp;
2135 u_int32_t himark, ovfl_bufsz;
2137 int adj, ret, t_ret, t2_ret;
2138 #ifdef HAVE_COMPRESSION
2139 DBT kcpy, *last_key;
2140 int unknown_dup_key;
2144 ovflbuf = pgmap = NULL;
2145 inp = P_INP(dbp, h);
2147 memset(&dbt, 0, sizeof(DBT));
2148 dbt.flags = DB_DBT_REALLOC;
2149 memset(&repldbt, 0, sizeof(DBT));
2151 #ifdef HAVE_COMPRESSION
2152 memset(&kcpy, 0, sizeof(DBT));
2153 unknown_dup_key = LF_ISSET(DB_SA_UNKNOWNKEY);
2154 last_key = unknown_dup_key ? NULL : key;
2156 LF_CLR(DB_SA_UNKNOWNKEY);
2158 DB_INIT_DBT(unknown_key, "UNKNOWN_KEY", sizeof("UNKNOWN_KEY") - 1);
2159 DB_INIT_DBT(unknown_data, "UNKNOWN_DATA", sizeof("UNKNOWN_DATA") - 1);
2162 * Allocate a buffer for overflow items. Start at one page;
2163 * __db_safe_goff will realloc as needed.
2165 if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
2167 ovfl_bufsz = dbp->pgsize;
2169 if (LF_ISSET(DB_AGGRESSIVE) && (ret =
2170 __os_calloc(env, dbp->pgsize, sizeof(pgmap[0]), &pgmap)) != 0)
2174 * Loop through the inp array, spitting out key/data pairs.
2176 * If we're salvaging normally, loop from 0 through NUM_ENT(h). If
2177 * we're being aggressive, loop until we hit the end of the page --
2178 * NUM_ENT() may be bogus.
2180 himark = dbp->pgsize;
2181 for (i = 0, last = UINT16_MAX;; i += O_INDX) {
2183 * If we're not aggressive, or if we're on an internal page,
2184 * break when we hit NUM_ENT(h).
2186 if ((!LF_ISSET(DB_AGGRESSIVE) ||
2187 pgtype == P_IBTREE) && i >= NUM_ENT(h))
2190 /* Verify the current item. */
2192 __db_vrfy_inpitem(dbp, h, pgno, i, 1, flags, &himark, NULL);
2196 * If this is a btree leaf and we've printed out a key
2197 * but not its associated data item, fix this imbalance
2198 * by printing an "UNKNOWN_DATA".
2200 if (pgtype == P_LBTREE && i % P_INDX == 1 &&
2201 last == i - 1 && (t2_ret = __db_vrfy_prdbt(
2203 0, " ", handle, callback, 0, vdp)) != 0) {
2210 * Don't return DB_VERIFY_FATAL; it's private and means
2211 * only that we can't go on with this page, not with
2212 * the whole database. It's not even an error if we've
2213 * run into it after NUM_ENT(h).
2215 if (t_ret == DB_VERIFY_FATAL) {
2216 if (i < NUM_ENT(h) && ret == 0)
2217 ret = DB_VERIFY_BAD;
2224 * If this returned 0, it's safe to print or (carefully)
2227 * We only print deleted items if DB_AGGRESSIVE is set.
2229 bk = GET_BKEYDATA(dbp, h, i);
2230 if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type))
2234 * If this is a btree leaf and we're about to print out a data
2235 * item for which we didn't print out a key, fix this imbalance
2236 * by printing an "UNKNOWN_KEY".
2238 if (pgtype == P_LBTREE && i % P_INDX == 1 && last != i - 1) {
2239 #ifdef HAVE_COMPRESSION
2242 if ((t_ret = __db_vrfy_prdbt(&unknown_key,
2243 0, " ", handle, callback, 0, vdp)) != 0) {
2252 * We're going to go try to print the next item. If key is
2253 * non-NULL, we're a dup page, so we've got to print the key
2254 * first, unless DB_SA_SKIPFIRSTKEY is set and we're on the
2257 if (key != NULL && (i != 0 || !LF_ISSET(DB_SA_SKIPFIRSTKEY))) {
2258 #ifdef HAVE_COMPRESSION
2259 last_key = unknown_dup_key ? NULL : key;
2261 if ((t_ret = __db_vrfy_prdbt(key,
2262 0, " ", handle, callback, 0, vdp)) != 0) {
2270 switch (B_TYPE(bk->type)) {
2272 if (pgtype == P_IBTREE)
2275 end = beg + BOVERFLOW_SIZE - 1;
2277 * If we're not on a normal btree leaf page, there
2278 * shouldn't be off-page dup sets. Something's
2279 * confused; just drop it, and the code to pick up
2280 * unlinked offpage dup sets will print it out
2281 * with key "UNKNOWN" later.
2283 if (pgtype != P_LBTREE)
2286 bo = (BOVERFLOW *)bk;
2289 * If the page number is unreasonable, or if this is
2290 * supposed to be a key item, output "UNKNOWN_KEY" --
2291 * the best we can do is run into the data items in
2292 * the unlinked offpage dup pass.
2294 if (!IS_VALID_PGNO(bo->pgno) || (i % P_INDX == 0)) {
2295 /* Not much to do on failure. */
2296 #ifdef HAVE_COMPRESSION
2297 if (key == NULL && i % P_INDX == 0)
2300 if ((t_ret = __db_vrfy_prdbt(
2301 i % P_INDX == 0 ? &unknown_key : &unknown_data,
2302 0, " ", handle, callback, 0, vdp)) != 0) {
2310 /* Don't stop on error. */
2311 if ((t_ret = __db_salvage_duptree(dbp,
2312 vdp, bo->pgno, &dbt, handle, callback,
2313 flags | DB_SA_SKIPFIRSTKEY
2314 #ifdef HAVE_COMPRESSION
2315 | (last_key == NULL ? DB_SA_UNKNOWNKEY : 0)
2317 )) != 0 && ret == 0)
2322 if (pgtype == P_IBTREE)
2325 end = (db_indx_t)DB_ALIGN(
2326 beg + bk->len, sizeof(u_int32_t)) - 1;
2328 dbt.data = bk->data;
2331 #ifdef HAVE_COMPRESSION
2332 if (DB_IS_COMPRESSED(dbp) && last_key != NULL &&
2333 (key != NULL || (i % P_INDX == 1))) {
2334 /* Decompress the key/data pair - the key
2335 is in last_key, and the data is in dbt */
2336 if ((t_ret = __bam_compress_salvage(dbp, vdp,
2337 handle, callback, last_key, &dbt)) != 0) {
2338 if (t_ret == DB_VERIFY_FATAL) {
2340 ret = DB_VERIFY_BAD;
2341 if (!LF_ISSET(DB_AGGRESSIVE))
2343 } else if (ret == 0) {
2349 if (key == NULL && i % P_INDX == 0) {
2350 if ((ret = __os_realloc(
2351 env, dbt.size, &kcpy.data)) != 0)
2353 memcpy(kcpy.data, dbt.data, dbt.size);
2354 kcpy.size = dbt.size;
2359 if ((t_ret = __db_vrfy_prdbt(&dbt,
2360 0, " ", handle, callback, 0, vdp)) != 0) {
2365 #ifdef HAVE_COMPRESSION
2370 if (pgtype != P_IBTREE)
2371 end = beg + BOVERFLOW_SIZE - 1;
2372 bo = (BOVERFLOW *)bk;
2375 * Check for replicated overflow keys, so that we only
2376 * call __db_safe_goff once per overflow page. If we
2377 * get the same offset as the previous key just re-use
2380 * P_IBTREE pages will never have replicated overflow
2383 adj = pgtype == P_IBTREE ? O_INDX : P_INDX;
2384 if (pgtype == P_IBTREE) {
2386 * If we're looking at a P_IBTREE, we just want
2387 * to mark the overflow page as seen.
2389 * Note that this call to __db_safe_goff differs
2390 * from the non-P_IBTREE call.
2392 * Only call __db_safe_goff if the overflow page
2395 ovflpg = ((BOVERFLOW *)
2396 ((BINTERNAL *)bk)->data)->pgno;
2397 if (__db_salvage_isdone(vdp, ovflpg) == 0 &&
2398 (t_ret =__db_safe_goff(dbp, vdp, ovflpg,
2400 &ovfl_bufsz, flags)) != 0 && ret == 0)
2403 } else if (i > adj - 1 &&
2404 i % adj == 0 && inp[i] == inp[i - adj])
2407 /* Don't stop on error. */
2408 if ((t_ret = __db_safe_goff(dbp, vdp,
2409 bo->pgno, &dbt, &ovflbuf,
2410 &ovfl_bufsz, flags)) != 0 && ret == 0)
2414 * If this is a key, save it in case the next
2415 * key is a replicated overflow, so we don't
2416 * call __db_safe_goff again. Copy out dbt.data
2417 * in case that pointer gets realloc'd when
2418 * getting a data item.
2420 if (i % P_INDX == 0) {
2422 if ((t_ret = __os_realloc(env,
2424 &repldbt.data)) != 0) {
2429 memcpy(repldbt.data,
2430 dbt.data, dbt.size);
2431 repldbt.size = dbt.size;
2433 if (__os_realloc(env,
2435 &repldbt.data) != 0)
2437 memcpy(repldbt.data,
2440 repldbt.size = unknown_key.size;
2446 #ifdef HAVE_COMPRESSION
2447 if (DB_IS_COMPRESSED(dbp) && last_key && t_ret == 0 &&
2448 (key != NULL || (i % P_INDX == 1))) {
2449 /* Decompress the key/data pair - the key
2450 is in last_key, and the data is in dbt */
2451 if ((t_ret = __bam_compress_salvage(dbp, vdp,
2452 handle, callback, last_key, &dbt)) != 0) {
2453 if (t_ret == DB_VERIFY_FATAL) {
2455 ret = DB_VERIFY_BAD;
2456 if (!LF_ISSET(DB_AGGRESSIVE))
2458 } else if (ret == 0) {
2464 if (key == NULL && i % P_INDX == 0) {
2466 if ((ret = __os_realloc(env,
2467 dbt.size, &kcpy.data)) != 0)
2469 memcpy(kcpy.data, dbt.data,
2471 kcpy.size = dbt.size;
2478 if ((t_ret = __db_vrfy_prdbt(
2479 t_ret == 0 ? &dbt : &unknown_key,
2480 0, " ", handle, callback, 0, vdp))
2483 #ifdef HAVE_COMPRESSION
2489 * We should never get here; __db_vrfy_inpitem should
2490 * not be returning 0 if bk->type is unrecognizable.
2492 t_ret = __db_unknown_path(env, "__bam_salvage");
2499 * If we're being aggressive, mark the beginning and end of
2500 * the item; we'll come back and print whatever "junk" is in
2501 * the gaps in case we had any bogus inp elements and thereby
2504 if (LF_ISSET(DB_AGGRESSIVE) && pgtype != P_IBTREE) {
2505 pgmap[beg] = VRFY_ITEM_BEGIN;
2506 pgmap[end] = VRFY_ITEM_END;
2510 err: if (pgmap != NULL)
2511 __os_free(env, pgmap);
2512 if (ovflbuf != NULL)
2513 __os_free(env, ovflbuf);
2514 if (repldbt.data != NULL)
2515 __os_free(env, repldbt.data);
2516 #ifdef HAVE_COMPRESSION
2517 if (kcpy.data != NULL)
2518 __os_free(env, kcpy.data);
2521 /* Mark this page as done. */
2522 if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
2529 * __bam_salvage_walkdupint --
2530 * Walk a known-good btree or recno internal page which is part of
2531 * a dup tree, calling __db_salvage_duptree on each child page.
2533 * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *,
2534 * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t));
2537 __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
2543 int (*callback) __P((void *, const void *));
2555 for (i = 0; i < NUM_ENT(h); i++) {
2558 bi = GET_BINTERNAL(dbp, h, i);
2559 if ((t_ret = __db_salvage_duptree(dbp,
2560 vdp, bi->pgno, key, handle, callback, flags)) != 0)
2564 ri = GET_RINTERNAL(dbp, h, i);
2565 if ((t_ret = __db_salvage_duptree(dbp,
2566 vdp, ri->pgno, key, handle, callback, flags)) != 0)
2570 return (__db_unknown_path(
2571 env, "__bam_salvage_walkdupint"));
2573 /* Pass DB_SA_SKIPFIRSTKEY, if set, on to the 0th child only. */
2574 flags &= ~LF_ISSET(DB_SA_SKIPFIRSTKEY);
2581 * __bam_meta2pgset --
2582 * Given a known-good meta page, return in pgsetp a 0-terminated list of
2583 * db_pgno_t's corresponding to the pages in the btree.
2585 * We do this by a somewhat sleazy method, to avoid having to traverse the
2586 * btree structure neatly: we walk down the left side to the very
2587 * first leaf page, then we mark all the pages in the chain of
2588 * NEXT_PGNOs (being wary of cycles and invalid ones), then we
2589 * consolidate our scratch array into a nice list, and return. This
2590 * avoids the memory management hassles of recursion and the
2591 * trouble of walking internal pages--they just don't matter, except
2592 * for the left branch.
2594 * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *,
2595 * PUBLIC: u_int32_t, DB *));
2598 __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
2609 db_pgno_t current, p;
2612 DB_ASSERT(dbp->env, pgset != NULL);
2618 for (current = btmeta->root;;) {
2619 if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) {
2620 err_ret = DB_VERIFY_BAD;
2623 if ((ret = __memp_fget(mpf, ¤t,
2624 vdp->thread_info, NULL, 0, &h)) != 0) {
2632 if ((ret = __bam_vrfy(dbp,
2633 vdp, h, current, flags | DB_NOORDERCHK)) != 0) {
2637 if (TYPE(h) == P_IBTREE) {
2638 bi = GET_BINTERNAL(dbp, h, 0);
2640 } else { /* P_IRECNO */
2641 ri = GET_RINTERNAL(dbp, h, 0);
2649 err_ret = DB_VERIFY_BAD;
2653 if ((ret = __memp_fput(mpf,
2654 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
2660 * At this point, current is the pgno of leaf page h, the 0th in the
2661 * tree we're concerned with.
2664 while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
2665 if (h == NULL && (ret = __memp_fget(mpf,
2666 ¤t, vdp->thread_info, NULL, 0, &h)) != 0) {
2671 if ((ret = __db_vrfy_pgset_get(pgset,
2672 vdp->thread_info, current, (int *)&p)) != 0)
2677 * We've found a cycle. Return success anyway--
2678 * our caller may as well use however much of
2679 * the pgset we've come up with.
2684 __db_vrfy_pgset_inc(pgset, vdp->thread_info, current)) != 0)
2687 current = NEXT_PGNO(h);
2688 if ((ret = __memp_fput(mpf,
2689 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
2695 (void)__memp_fput(mpf,
2696 vdp->thread_info, h, DB_PRIORITY_UNCHANGED);
2698 return (ret == 0 ? err_ret : ret);
2702 * __bam_safe_getdata --
2704 * Utility function for __bam_vrfy_itemorder. Safely gets the datum at
2705 * index i, page h, and sticks it in DBT dbt. If ovflok is 1 and i's an
2706 * overflow item, we do a safe_goff to get the item and signal that we need
2707 * to free dbt->data; if ovflok is 0, we leaves the DBT zeroed.
2710 __bam_safe_getdata(dbp, ip, h, i, ovflok, dbt, freedbtp)
2724 memset(dbt, 0, sizeof(DBT));
2727 bk = GET_BKEYDATA(dbp, h, i);
2728 if (B_TYPE(bk->type) == B_OVERFLOW) {
2732 if ((ret = __db_cursor_int(dbp, ip, NULL, DB_BTREE,
2733 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
2735 bo = (BOVERFLOW *)bk;
2736 F_SET(dbt, DB_DBT_MALLOC);
2739 return (__db_goff(dbc, dbt, bo->tlen, bo->pgno, NULL, NULL));
2741 dbt->data = bk->data;
2742 dbt->size = bk->len;