2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996-2009 Oracle. All rights reserved.
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/lock.h"
15 #include "dbinc/log.h"
18 #define IS_BTREE_PAGE(pagep) \
19 (TYPE(pagep) == P_IBTREE || \
20 TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP)
23 * __bam_split_recover --
24 * Recovery function for split.
26 * PUBLIC: int __bam_split_recover
27 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
30 __bam_split_recover(env, dbtp, lsnp, op, info)
37 __bam_split_args *argp;
43 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
44 db_pgno_t pgno, parent_pgno;
45 u_int32_t ptype, size;
46 int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret;
48 ip = ((DB_TXNHEAD *)info)->thread_info;
49 REC_PRINT(__bam_split_print);
51 _lp = lp = np = pp = _rp = rp = NULL;
54 REC_INTRO(__bam_split_read, ip, 0);
56 if ((ret = __db_cursor_int(file_dbp, ip, NULL,
57 (argp->opflags & SPL_RECNO) ? DB_RECNO : DB_BTREE,
58 PGNO_INVALID, 0, NULL, &dbc)) != 0)
60 if (argp->opflags & SPL_NRECS)
61 F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM);
62 F_SET(dbc, DBC_RECOVER);
65 * There are two kinds of splits that we have to recover from. The
66 * first is a root-page split, where the root page is split from a
67 * leaf page into an internal page and two new leaf pages are created.
68 * The second is where a page is split into two pages, and a new key
69 * is inserted into the parent page.
71 * DBTs are not aligned in log records, so we need to copy the page
72 * so that we can access fields within it throughout this routine.
73 * Although we could hardcode the unaligned copies in this routine,
74 * we will be calling into regular btree functions with this page,
75 * so it's got to be aligned. Copying it into allocated memory is
76 * the only way to guarantee this.
78 if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
80 memcpy(sp, argp->pg.data, argp->pg.size);
83 parent_pgno = argp->ppgno;
84 rootsplit = parent_pgno == pgno;
86 /* Get the pages going down the tree. */
87 REC_FGET(mpf, ip, parent_pgno, &pp, left);
88 left: REC_FGET(mpf, ip, argp->left, &lp, right);
89 right: REC_FGET(mpf, ip, argp->right, &rp, redo);
91 redo: if (DB_REDO(op)) {
92 l_update = r_update = p_update = 0;
94 * Decide if we need to resplit the page.
96 * If this is a root split, then the root has to exist unless
97 * we have truncated it due to a future deallocation.
101 plsnp = &LSN(argp->pg.data);
104 cmp = LOG_COMPARE(&LSN(pp), plsnp);
105 CHECK_LSN(env, op, cmp, &LSN(pp), plsnp);
111 cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
112 CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
118 cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
119 CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
124 if (!p_update && !l_update && !r_update)
127 /* Allocate and initialize new left/right child pages. */
128 if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
129 (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
132 P_INIT(_lp, file_dbp->pgsize, argp->left,
134 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
135 LEVEL(sp), TYPE(sp));
136 P_INIT(_rp, file_dbp->pgsize, argp->right,
137 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
138 PGNO_INVALID, LEVEL(sp), TYPE(sp));
140 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
141 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
142 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
143 LEVEL(sp), TYPE(sp));
144 P_INIT(_rp, file_dbp->pgsize, argp->right,
145 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
146 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
147 LEVEL(sp), TYPE(sp));
150 /* Split the page. */
151 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
152 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
157 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
158 memcpy(lp, _lp, file_dbp->pgsize);
163 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
164 memcpy(rp, _rp, file_dbp->pgsize);
169 * Drop the latches on the lower level pages before
170 * getting an exclusive latch on the higher level page.
172 if (lp != NULL && (ret = __memp_fput(mpf,
173 ip, lp, file_dbp->priority)) && ret == 0)
176 if (rp != NULL && (ret = __memp_fput(mpf,
177 ip, rp, file_dbp->priority)) && ret == 0)
181 * If the parent page is wrong, update it.
182 * Initialize the page. If it is a root page update
183 * the record counts if needed and put the first record in.
184 * Then insert the record for the right hand child page.
187 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
188 if (argp->opflags & SPL_RECNO)
194 P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID,
195 PGNO_INVALID, _lp->level + 1, ptype);
196 if (argp->opflags & SPL_NRECS) {
198 __bam_total(file_dbp, _lp) +
199 __bam_total(file_dbp, _rp));
201 if ((ret = __db_pitem_nolog(dbc, pp,
202 argp->pindx, argp->pentry.size,
203 &argp->pentry, NULL)) != 0)
207 if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1,
208 argp->rentry.size, &argp->rentry, NULL)) != 0)
214 * Finally, redo the next-page link if necessary. This is of
215 * interest only if it wasn't a root split -- inserting a new
216 * page in the tree requires that any following page have its
217 * previous-page pointer updated to our new page. The next
218 * page must exist because we're redoing the operation.
220 if (!rootsplit && argp->npgno != PGNO_INVALID) {
221 REC_FGET(mpf, ip, argp->npgno, &np, done);
222 cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
223 CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
225 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
226 PREV_PGNO(np) = argp->right;
232 * If it's a root split and the left child ever existed, update
233 * its LSN. Otherwise its the split page. If
234 * right child ever existed, root split or not, update its LSN.
235 * The undo of the page allocation(s) will restore them to the
238 if (rootsplit && lp != NULL &&
239 LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
240 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
241 lp->lsn = argp->llsn;
244 LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
245 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
246 rp->lsn = argp->rlsn;
249 * Drop the lower level pages before getting an exclusive
250 * latch on the parent.
252 if (rp != NULL && (ret = __memp_fput(mpf,
253 ip, rp, file_dbp->priority)))
258 * Check the state of the split page. If its a rootsplit
259 * then thats the rootpage otherwise its the left page.
262 DB_ASSERT(env, pgno == argp->ppgno);
263 if (lp != NULL && (ret = __memp_fput(mpf, ip,
264 lp, file_dbp->priority)) != 0)
270 cmp = LOG_COMPARE(lsnp, &LSN(lp));
271 CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp);
273 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
274 memcpy(lp, argp->pg.data, argp->pg.size);
275 if ((ret = __memp_fput(mpf,
276 ip, lp, file_dbp->priority)))
283 * Next we can update the parent removing the new index.
286 DB_ASSERT(env, !rootsplit);
287 cmp = LOG_COMPARE(lsnp, &LSN(pp));
288 CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp);
290 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
291 if (argp->opflags & SPL_RECNO)
292 size = RINTERNAL_SIZE;
294 size = BINTERNAL_SIZE(
295 GET_BINTERNAL(file_dbp,
296 pp, argp->pindx + 1)->len);
298 if ((ret = __db_ditem(dbc, pp,
299 argp->pindx + 1, size)) != 0)
301 pp->lsn = argp->plsn;
306 * Finally, undo the next-page link if necessary. This is of
307 * interest only if it wasn't a root split -- inserting a new
308 * page in the tree requires that any following page have its
309 * previous-page pointer updated to our new page. Since it's
310 * possible that the next-page never existed, we ignore it as
311 * if there's nothing to undo.
313 if (!rootsplit && argp->npgno != PGNO_INVALID) {
314 if ((ret = __memp_fget(mpf, &argp->npgno,
315 ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
319 if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
320 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
321 PREV_PGNO(np) = argp->left;
322 np->lsn = argp->nlsn;
327 done: *lsnp = argp->prev_lsn;
330 out: /* Free any pages that are left. */
331 if (lp != NULL && (t_ret = __memp_fput(mpf,
332 ip, lp, file_dbp->priority)) != 0 && ret == 0)
334 if (np != NULL && (t_ret = __memp_fput(mpf,
335 ip, np, file_dbp->priority)) != 0 && ret == 0)
337 if (rp != NULL && (t_ret = __memp_fput(mpf,
338 ip, rp, file_dbp->priority)) != 0 && ret == 0)
340 if (pp != NULL && (t_ret = __memp_fput(mpf,
341 ip, pp, file_dbp->priority)) != 0 && ret == 0)
344 /* Free any allocated space. */
355 * __bam_split_recover --
356 * Recovery function for split.
358 * PUBLIC: int __bam_split_42_recover
359 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
362 __bam_split_42_recover(env, dbtp, lsnp, op, info)
369 __bam_split_42_args *argp;
374 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
375 db_pgno_t pgno, root_pgno;
377 int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret;
379 ip = ((DB_TXNHEAD *)info)->thread_info;
380 REC_PRINT(__bam_split_print);
382 _lp = lp = np = pp = _rp = rp = NULL;
385 REC_INTRO(__bam_split_42_read, ip, 0);
388 * There are two kinds of splits that we have to recover from. The
389 * first is a root-page split, where the root page is split from a
390 * leaf page into an internal page and two new leaf pages are created.
391 * The second is where a page is split into two pages, and a new key
392 * is inserted into the parent page.
394 * DBTs are not aligned in log records, so we need to copy the page
395 * so that we can access fields within it throughout this routine.
396 * Although we could hardcode the unaligned copies in this routine,
397 * we will be calling into regular btree functions with this page,
398 * so it's got to be aligned. Copying it into allocated memory is
399 * the only way to guarantee this.
401 if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
403 memcpy(sp, argp->pg.data, argp->pg.size);
406 root_pgno = argp->root_pgno;
407 rootsplit = root_pgno != PGNO_INVALID;
408 REC_FGET(mpf, ip, argp->left, &lp, right);
409 right: REC_FGET(mpf, ip, argp->right, &rp, redo);
411 redo: if (DB_REDO(op)) {
412 l_update = r_update = p_update = 0;
414 * Decide if we need to resplit the page.
416 * If this is a root split, then the root has to exist unless
417 * we have truncated it due to a future deallocation.
420 REC_FGET(mpf, ip, root_pgno, &pp, do_left);
421 cmp = LOG_COMPARE(&LSN(pp), &LSN(argp->pg.data));
423 cmp, &LSN(pp), &LSN(argp->pg.data));
427 do_left: if (lp != NULL) {
428 cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
429 CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
435 cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
436 CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
441 if (!p_update && !l_update && !r_update)
444 /* Allocate and initialize new left/right child pages. */
445 if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
446 (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
449 P_INIT(_lp, file_dbp->pgsize, argp->left,
451 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
452 LEVEL(sp), TYPE(sp));
453 P_INIT(_rp, file_dbp->pgsize, argp->right,
454 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
455 PGNO_INVALID, LEVEL(sp), TYPE(sp));
457 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
458 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
459 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
460 LEVEL(sp), TYPE(sp));
461 P_INIT(_rp, file_dbp->pgsize, argp->right,
462 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
463 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
464 LEVEL(sp), TYPE(sp));
467 /* Split the page. */
468 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
469 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
474 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
475 memcpy(lp, _lp, file_dbp->pgsize);
477 if ((ret = __memp_fput(mpf,
478 ip, lp, file_dbp->priority)) != 0)
484 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
485 memcpy(rp, _rp, file_dbp->pgsize);
487 if ((ret = __memp_fput(mpf,
488 ip, rp, file_dbp->priority)) != 0)
494 * If the parent page is wrong, update it. This is of interest
495 * only if it was a root split, since root splits create parent
496 * pages. All other splits modify a parent page, but those are
497 * separately logged and recovered.
499 if (rootsplit && p_update) {
500 if (IS_BTREE_PAGE(sp)) {
502 rc = argp->opflags & SPL_NRECS ? 1 : 0;
508 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
509 P_INIT(pp, file_dbp->pgsize, root_pgno,
510 PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype);
511 RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) +
512 __bam_total(file_dbp, _rp) : 0);
515 if ((ret = __memp_fput(mpf,
516 ip, pp, file_dbp->priority)) != 0)
522 * Finally, redo the next-page link if necessary. This is of
523 * interest only if it wasn't a root split -- inserting a new
524 * page in the tree requires that any following page have its
525 * previous-page pointer updated to our new page. The next
526 * page must exist because we're redoing the operation.
528 if (!rootsplit && argp->npgno != PGNO_INVALID) {
529 if ((ret = __memp_fget(mpf, &argp->npgno,
530 ip, NULL, 0, &np)) != 0) {
531 if (ret != DB_PAGE_NOTFOUND) {
533 file_dbp, argp->npgno, ret);
538 cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
539 CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
541 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
542 PREV_PGNO(np) = argp->right;
544 if ((ret = __memp_fput(mpf, ip,
545 np, file_dbp->priority)) != 0)
552 * If the split page is wrong, replace its contents with the
553 * logged page contents. If the page doesn't exist, it means
554 * that the create of the page never happened, nor did any of
555 * the adds onto the page that caused the split, and there's
556 * really no undo-ing to be done.
558 if ((ret = __memp_fget(mpf, &pgno, ip, NULL,
559 DB_MPOOL_EDIT, &pp)) != 0) {
563 if (LOG_COMPARE(lsnp, &LSN(pp)) == 0) {
564 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
565 memcpy(pp, argp->pg.data, argp->pg.size);
566 if ((ret = __memp_fput(mpf,
567 ip, pp, file_dbp->priority)) != 0)
573 * If it's a root split and the left child ever existed, update
574 * its LSN. (If it's not a root split, we've updated the left
575 * page already -- it's the same as the split page.) If the
576 * right child ever existed, root split or not, update its LSN.
577 * The undo of the page allocation(s) will restore them to the
580 lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
581 if (rootsplit && lp != NULL &&
582 LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
583 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
584 lp->lsn = argp->llsn;
585 if ((ret = __memp_fput(mpf, ip,
586 lp, file_dbp->priority)) != 0)
591 LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
592 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
593 rp->lsn = argp->rlsn;
594 if ((ret = __memp_fput(mpf, ip,
595 rp, file_dbp->priority)) != 0)
602 * Finally, undo the next-page link if necessary. This is of
603 * interest only if it wasn't a root split -- inserting a new
604 * page in the tree requires that any following page have its
605 * previous-page pointer updated to our new page. Since it's
606 * possible that the next-page never existed, we ignore it as
607 * if there's nothing to undo.
609 if (!rootsplit && argp->npgno != PGNO_INVALID) {
610 if ((ret = __memp_fget(mpf, &argp->npgno,
611 ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
615 if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
616 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
617 PREV_PGNO(np) = argp->left;
618 np->lsn = argp->nlsn;
620 ip, np, file_dbp->priority))
627 done: *lsnp = argp->prev_lsn;
630 out: /* Free any pages that weren't dirtied. */
631 if (pp != NULL && (t_ret = __memp_fput(mpf,
632 ip, pp, file_dbp->priority)) != 0 && ret == 0)
634 if (lp != NULL && (t_ret = __memp_fput(mpf,
635 ip, lp, file_dbp->priority)) != 0 && ret == 0)
637 if (np != NULL && (t_ret = __memp_fput(mpf,
638 ip, np, file_dbp->priority)) != 0 && ret == 0)
640 if (rp != NULL && (t_ret = __memp_fput(mpf,
641 ip, rp, file_dbp->priority)) != 0 && ret == 0)
644 /* Free any allocated space. */
656 * __bam_rsplit_recover --
657 * Recovery function for a reverse split.
659 * PUBLIC: int __bam_rsplit_recover
660 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
663 __bam_rsplit_recover(env, dbtp, lsnp, op, info)
670 __bam_rsplit_args *argp;
677 db_pgno_t pgno, root_pgno;
679 int cmp_n, cmp_p, ret;
681 ip = ((DB_TXNHEAD *)info)->thread_info;
683 REC_PRINT(__bam_rsplit_print);
684 REC_INTRO(__bam_rsplit_read, ip, 1);
686 /* Fix the root page. */
687 pgno = root_pgno = argp->root_pgno;
688 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) != 0) {
689 if (ret != DB_PAGE_NOTFOUND) {
690 ret = __db_pgerr(file_dbp, pgno, ret);
696 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
697 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->rootlsn);
698 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->rootlsn);
699 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
700 if (cmp_p == 0 && DB_REDO(op)) {
702 * Copy the new data to the root page. If it is not now a
703 * leaf page we need to restore the record number. We could
704 * try to determine if C_RECNUM was set in the btree, but
705 * that's not really necessary since the field is not used
708 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
709 rcnt = RE_NREC(pagep);
710 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
711 if (LEVEL(pagep) > LEAFLEVEL)
712 RE_NREC_SET(pagep, rcnt);
713 pagep->pgno = root_pgno;
715 } else if (cmp_n == 0 && DB_UNDO(op)) {
716 /* Need to undo update described. */
717 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
718 P_INIT(pagep, file_dbp->pgsize, root_pgno,
719 argp->nrec, PGNO_INVALID, pagep->level + 1,
720 IS_BTREE_PAGE(pagep) ? P_IBTREE : P_IRECNO);
721 if ((ret = __db_pitem(dbc, pagep, 0,
722 argp->rootent.size, &argp->rootent, NULL)) != 0)
724 pagep->lsn = argp->rootlsn;
726 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
731 * Fix the page copied over the root page. It's possible that the
732 * page never made it to disk, or was truncated so if the page
733 * doesn't exist, it's okay and there's nothing further to do.
735 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
736 if (ret != DB_PAGE_NOTFOUND) {
737 ret = __db_pgerr(file_dbp, argp->pgno, ret);
742 (void)__ua_memcpy(©_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
743 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
744 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
745 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
746 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
747 if (cmp_p == 0 && DB_REDO(op)) {
748 /* Need to redo update described. */
749 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
751 } else if (cmp_n == 0 && DB_UNDO(op)) {
752 /* Need to undo update described. */
753 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
754 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
756 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
760 done: *lsnp = argp->prev_lsn;
763 out: if (pagep != NULL)
764 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
769 * __bam_adj_recover --
770 * Recovery function for adj.
772 * PUBLIC: int __bam_adj_recover
773 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
776 __bam_adj_recover(env, dbtp, lsnp, op, info)
783 __bam_adj_args *argp;
789 int cmp_n, cmp_p, ret;
791 ip = ((DB_TXNHEAD *)info)->thread_info;
793 REC_PRINT(__bam_adj_print);
794 REC_INTRO(__bam_adj_read, ip, 1);
796 /* Get the page; if it never existed and we're undoing, we're done. */
797 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
798 if (ret != DB_PAGE_NOTFOUND) {
799 ret = __db_pgerr(file_dbp, argp->pgno, ret);
805 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
806 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
807 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
808 if (cmp_p == 0 && DB_REDO(op)) {
809 /* Need to redo update described. */
810 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
811 if ((ret = __bam_adjindx(dbc,
812 pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
816 } else if (cmp_n == 0 && DB_UNDO(op)) {
817 /* Need to undo update described. */
818 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
819 if ((ret = __bam_adjindx(dbc,
820 pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
823 LSN(pagep) = argp->lsn;
825 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
829 done: *lsnp = argp->prev_lsn;
832 out: if (pagep != NULL)
833 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
838 * __bam_cadjust_recover --
839 * Recovery function for the adjust of a count change in an internal
842 * PUBLIC: int __bam_cadjust_recover
843 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
846 __bam_cadjust_recover(env, dbtp, lsnp, op, info)
853 __bam_cadjust_args *argp;
859 int cmp_n, cmp_p, ret;
861 ip = ((DB_TXNHEAD *)info)->thread_info;
863 REC_PRINT(__bam_cadjust_print);
864 REC_INTRO(__bam_cadjust_read, ip, 0);
866 /* Get the page; if it never existed and we're undoing, we're done. */
867 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
868 if (ret != DB_PAGE_NOTFOUND) {
869 ret = __db_pgerr(file_dbp, argp->pgno, ret);
875 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
876 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
877 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
878 if (cmp_p == 0 && DB_REDO(op)) {
879 /* Need to redo update described. */
880 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
881 if (IS_BTREE_PAGE(pagep)) {
882 GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
884 if (argp->opflags & CAD_UPDATEROOT)
885 RE_NREC_ADJ(pagep, argp->adjust);
887 GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
889 if (argp->opflags & CAD_UPDATEROOT)
890 RE_NREC_ADJ(pagep, argp->adjust);
894 } else if (cmp_n == 0 && DB_UNDO(op)) {
895 /* Need to undo update described. */
896 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
897 if (IS_BTREE_PAGE(pagep)) {
898 GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
900 if (argp->opflags & CAD_UPDATEROOT)
901 RE_NREC_ADJ(pagep, -(argp->adjust));
903 GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
905 if (argp->opflags & CAD_UPDATEROOT)
906 RE_NREC_ADJ(pagep, -(argp->adjust));
908 LSN(pagep) = argp->lsn;
910 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
914 done: *lsnp = argp->prev_lsn;
917 out: if (pagep != NULL)
918 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
923 * __bam_cdel_recover --
924 * Recovery function for the intent-to-delete of a cursor record.
926 * PUBLIC: int __bam_cdel_recover
927 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
930 __bam_cdel_recover(env, dbtp, lsnp, op, info)
937 __bam_cdel_args *argp;
944 int cmp_n, cmp_p, ret;
946 ip = ((DB_TXNHEAD *)info)->thread_info;
948 REC_PRINT(__bam_cdel_print);
949 REC_INTRO(__bam_cdel_read, ip, 0);
951 /* Get the page; if it never existed and we're undoing, we're done. */
952 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
953 if (ret != DB_PAGE_NOTFOUND) {
954 ret = __db_pgerr(file_dbp, argp->pgno, ret);
960 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
961 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
962 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
963 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
964 if (cmp_p == 0 && DB_REDO(op)) {
965 /* Need to redo update described. */
966 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
967 indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
968 B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type);
971 } else if (cmp_n == 0 && DB_UNDO(op)) {
972 /* Need to undo update described. */
973 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
974 indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
975 B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type);
977 if ((ret = __bam_ca_delete(
978 file_dbp, argp->pgno, argp->indx, 0, NULL)) != 0)
981 LSN(pagep) = argp->lsn;
983 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
987 done: *lsnp = argp->prev_lsn;
990 out: if (pagep != NULL)
991 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
996 * __bam_repl_recover --
997 * Recovery function for page item replacement.
999 * PUBLIC: int __bam_repl_recover
1000 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1003 __bam_repl_recover(env, dbtp, lsnp, op, info)
1010 __bam_repl_args *argp;
1019 int cmp_n, cmp_p, ret;
1023 ip = ((DB_TXNHEAD *)info)->thread_info;
1025 REC_PRINT(__bam_repl_print);
1026 REC_INTRO(__bam_repl_read, ip, 1);
1028 /* Get the page; if it never existed and we're undoing, we're done. */
1029 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1030 if (ret != DB_PAGE_NOTFOUND) {
1031 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1037 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1038 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1039 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1040 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1041 if (cmp_p == 0 && DB_REDO(op)) {
1043 * Need to redo update described.
1045 * Re-build the replacement item.
1047 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1048 if (TYPE(pagep) == P_IBTREE) {
1049 /* Point at the internal struct past the type. */
1050 bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
1053 SSZA(BINTERNAL, data) - SSZ(BINTERNAL, unused);
1055 bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
1059 memset(&dbt, 0, sizeof(dbt));
1060 dbt.size = argp->prefix + argp->suffix + argp->repl.size;
1061 if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0)
1064 memcpy(p, dp, argp->prefix);
1066 memcpy(p, argp->repl.data, argp->repl.size);
1067 p += argp->repl.size;
1068 memcpy(p, dp + (len - argp->suffix), argp->suffix);
1070 /* isdeleted has become the type flag for non-leaf replace */
1071 ret = __bam_ritem(dbc,
1072 pagep, argp->indx, &dbt, argp->isdeleted);
1073 __os_free(env, dbt.data);
1078 } else if (cmp_n == 0 && DB_UNDO(op)) {
1080 * Need to undo update described.
1082 * Re-build the original item.
1084 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1085 if (TYPE(pagep) == P_IBTREE) {
1086 /* Point at the internal struct past the type. */
1087 bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
1090 SSZA(BINTERNAL, data) - SSZ(BINTERNAL, unused);
1092 bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
1096 memset(&dbt, 0, sizeof(dbt));
1097 dbt.size = argp->prefix + argp->suffix + argp->orig.size;
1098 if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0)
1101 memcpy(p, dp, argp->prefix);
1103 memcpy(p, argp->orig.data, argp->orig.size);
1104 p += argp->orig.size;
1105 memcpy(p, dp + (len - argp->suffix), argp->suffix);
1107 ret = __bam_ritem(dbc,
1108 pagep, argp->indx, &dbt, argp->isdeleted);
1109 __os_free(env, dbt.data);
1113 /* Reset the deleted flag, if necessary. */
1114 if (argp->isdeleted && LEVEL(pagep) == LEAFLEVEL)
1115 B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type);
1117 LSN(pagep) = argp->lsn;
1119 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1123 done: *lsnp = argp->prev_lsn;
1126 out: if (pagep != NULL)
1127 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
1132 * __bam_root_recover --
1133 * Recovery function for setting the root page on the meta-data page.
1135 * PUBLIC: int __bam_root_recover
1136 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1139 __bam_root_recover(env, dbtp, lsnp, op, info)
1146 __bam_root_args *argp;
1152 int cmp_n, cmp_p, ret;
1154 ip = ((DB_TXNHEAD *)info)->thread_info;
1156 REC_PRINT(__bam_root_print);
1157 REC_INTRO(__bam_root_read, ip, 0);
1159 if ((ret = __memp_fget(mpf, &argp->meta_pgno, ip, NULL,
1161 if (ret != DB_PAGE_NOTFOUND) {
1162 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
1168 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1169 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1170 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1171 CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
1172 if (cmp_p == 0 && DB_REDO(op)) {
1173 /* Need to redo update described. */
1174 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1175 meta->root = argp->root_pgno;
1176 meta->dbmeta.lsn = *lsnp;
1177 ((BTREE *)file_dbp->bt_internal)->bt_root = meta->root;
1178 } else if (cmp_n == 0 && DB_UNDO(op)) {
1179 /* Nothing to undo except lsn. */
1180 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1181 meta->dbmeta.lsn = argp->meta_lsn;
1183 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1187 done: *lsnp = argp->prev_lsn;
1190 out: if (meta != NULL)
1191 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1196 * __bam_curadj_recover --
1197 * Transaction abort function to undo cursor adjustments.
1198 * This should only be triggered by subtransaction aborts.
1200 * PUBLIC: int __bam_curadj_recover
1201 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1204 __bam_curadj_recover(env, dbtp, lsnp, op, info)
1211 __bam_curadj_args *argp;
1218 COMPQUIET(mpf, NULL);
1220 ip = ((DB_TXNHEAD *)info)->thread_info;
1221 REC_PRINT(__bam_curadj_print);
1222 REC_INTRO(__bam_curadj_read, ip, 1);
1225 if (op != DB_TXN_ABORT)
1228 switch (argp->mode) {
1230 if ((ret = __bam_ca_di(dbc, argp->from_pgno,
1231 argp->from_indx, -(int)argp->first_indx)) != 0)
1235 if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx,
1236 argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
1242 __bam_ca_rsplit(dbc, argp->to_pgno, argp->from_pgno)) != 0)
1247 if ((ret = __bam_ca_undosplit(file_dbp, argp->from_pgno,
1248 argp->to_pgno, argp->left_pgno, argp->from_indx)) != 0)
1253 done: *lsnp = argp->prev_lsn;
1258 * __bam_rcuradj_recover --
1259 * Transaction abort function to undo cursor adjustments in rrecno.
1260 * This should only be triggered by subtransaction aborts.
1262 * PUBLIC: int __bam_rcuradj_recover
1263 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1266 __bam_rcuradj_recover(env, dbtp, lsnp, op, info)
1273 __bam_rcuradj_args *argp;
1281 COMPQUIET(mpf, NULL);
1283 ip = ((DB_TXNHEAD *)info)->thread_info;
1285 REC_PRINT(__bam_rcuradj_print);
1286 REC_INTRO(__bam_rcuradj_read, ip, 1);
1290 if (op != DB_TXN_ABORT)
1294 * We don't know whether we're in an offpage dup set, and
1295 * thus don't know whether the dbc REC_INTRO has handed us is
1296 * of a reasonable type. It's certainly unset, so if this is
1297 * an offpage dup set, we don't have an OPD cursor. The
1298 * simplest solution is just to allocate a whole new cursor
1299 * for our use; we're only really using it to hold pass some
1300 * state into __ram_ca, and this way we don't need to make
1301 * this function know anything about how offpage dups work.
1303 if ((ret = __db_cursor_int(file_dbp, NULL,
1304 NULL, DB_RECNO, argp->root, 0, NULL, &rdbc)) != 0)
1307 cp = (BTREE_CURSOR *)rdbc->internal;
1308 F_SET(cp, C_RENUMBER);
1309 cp->recno = argp->recno;
1311 switch (argp->mode) {
1314 * The way to undo a delete is with an insert. Since
1315 * we're undoing it, the delete flag must be set.
1317 F_SET(cp, C_DELETED);
1318 F_SET(cp, C_RENUMBER); /* Just in case. */
1319 cp->order = argp->order;
1320 if ((ret = __ram_ca(rdbc, CA_ICURRENT, NULL)) != 0)
1327 * The way to undo an insert is with a delete. The delete
1328 * flag is unset to start with.
1330 F_CLR(cp, C_DELETED);
1331 cp->order = INVALID_ORDER;
1332 if ((ret = __ram_ca(rdbc, CA_DELETE, NULL)) != 0)
1337 done: *lsnp = argp->prev_lsn;
1338 out: if (rdbc != NULL && (t_ret = __dbc_close(rdbc)) != 0 && ret == 0)
1344 * __bam_relink_recover --
1345 * Recovery function for relink.
1347 * PUBLIC: int __bam_relink_recover
1348 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1351 __bam_relink_recover(env, dbtp, lsnp, op, info)
1358 __bam_relink_args *argp;
1364 int cmp_n, cmp_p, ret;
1366 ip = ((DB_TXNHEAD *)info)->thread_info;
1368 REC_PRINT(__bam_relink_print);
1369 REC_INTRO(__bam_relink_read, ip, 0);
1372 * There are up to three pages we need to check -- the page, and the
1373 * previous and next pages, if they existed. For a page add operation,
1374 * the current page is the result of a split and is being recovered
1375 * elsewhere, so all we need do is recover the next page.
1377 if (argp->next == PGNO_INVALID)
1379 if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
1380 if (ret != DB_PAGE_NOTFOUND) {
1381 ret = __db_pgerr(file_dbp, argp->next, ret);
1387 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1388 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
1389 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
1390 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1391 if (cmp_p == 0 && DB_REDO(op)) {
1392 /* Redo the remove or replace. */
1393 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1394 if (argp->new_pgno == PGNO_INVALID)
1395 pagep->prev_pgno = argp->prev;
1397 pagep->prev_pgno = argp->new_pgno;
1400 } else if (cmp_n == 0 && DB_UNDO(op)) {
1401 /* Undo the remove or replace. */
1402 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1403 pagep->prev_pgno = argp->pgno;
1405 pagep->lsn = argp->lsn_next;
1408 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1412 prev: if (argp->prev == PGNO_INVALID)
1414 if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
1415 if (ret != DB_PAGE_NOTFOUND) {
1416 ret = __db_pgerr(file_dbp, argp->prev, ret);
1422 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1423 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
1424 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
1425 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1426 if (cmp_p == 0 && DB_REDO(op)) {
1427 /* Redo the relink. */
1428 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1429 if (argp->new_pgno == PGNO_INVALID)
1430 pagep->next_pgno = argp->next;
1432 pagep->next_pgno = argp->new_pgno;
1435 } else if (cmp_n == 0 && DB_UNDO(op)) {
1436 /* Undo the relink. */
1437 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1438 pagep->next_pgno = argp->pgno;
1439 pagep->lsn = argp->lsn_prev;
1442 if ((ret = __memp_fput(mpf,
1443 ip, pagep, file_dbp->priority)) != 0)
1447 done: *lsnp = argp->prev_lsn;
1450 out: if (pagep != NULL)
1451 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1456 * __bam_merge_44_recover --
1457 * Recovery function for merge.
1459 * PUBLIC: int __bam_merge_44_recover
1460 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1463 __bam_merge_44_recover(env, dbtp, lsnp, op, info)
1470 __bam_merge_44_args *argp;
1477 db_indx_t indx, *ninp, *pinp;
1480 int cmp_n, cmp_p, i, ret;
1482 ip = ((DB_TXNHEAD *)info)->thread_info;
1483 REC_PRINT(__bam_merge_44_print);
1484 REC_INTRO(__bam_merge_44_read, ip, 1);
1486 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1487 if (ret != DB_PAGE_NOTFOUND) {
1488 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1494 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1495 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1496 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1498 if (cmp_p == 0 && DB_REDO(op)) {
1500 * If the header is provided the page is empty, copy the
1503 DB_ASSERT(env, argp->hdr.size == 0 || NUM_ENT(pagep) == 0);
1504 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1505 if (argp->hdr.size != 0) {
1506 P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
1507 PREV_PGNO(argp->hdr.data),
1508 NEXT_PGNO(argp->hdr.data),
1509 LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
1511 if (TYPE(pagep) == P_OVERFLOW) {
1512 OV_REF(pagep) = OV_REF(argp->hdr.data);
1513 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1514 bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp);
1515 memcpy(bp, argp->data.data, argp->data.size);
1517 /* Copy the data segment. */
1518 bp = (u_int8_t *)pagep +
1519 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1520 memcpy(bp, argp->data.data, argp->data.size);
1522 /* Copy index table offset past the current entries. */
1523 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1524 ninp = argp->ind.data;
1526 i < (int)(argp->ind.size / sizeof(*ninp)); i++)
1528 - (file_dbp->pgsize - HOFFSET(pagep));
1529 HOFFSET(pagep) -= argp->data.size;
1530 NUM_ENT(pagep) += i;
1533 } else if (cmp_n == 0 && !DB_REDO(op)) {
1535 * Since logging is logical at the page level
1536 * we cannot just truncate the data space. Delete
1537 * the proper number of items from the logical end
1540 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1541 for (i = 0; i < (int)(argp->ind.size / sizeof(*ninp)); i++) {
1542 indx = NUM_ENT(pagep) - 1;
1543 if (P_INP(file_dbp, pagep)[indx] ==
1544 P_INP(file_dbp, pagep)[indx - P_INDX]) {
1548 switch (TYPE(pagep)) {
1552 bk = GET_BKEYDATA(file_dbp, pagep, indx);
1553 size = BITEM_SIZE(bk);
1557 size = BINTERNAL_SIZE(
1558 GET_BINTERNAL(file_dbp, pagep, indx)->len);
1561 size = RINTERNAL_SIZE;
1565 ret = __db_pgfmt(env, PGNO(pagep));
1569 __db_ditem(dbc, pagep, indx, size)) != 0)
1572 if (argp->ind.size == 0)
1573 HOFFSET(pagep) = file_dbp->pgsize;
1574 pagep->lsn = argp->lsn;
1577 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1580 next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) {
1581 if (ret != DB_PAGE_NOTFOUND) {
1582 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1588 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1589 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
1590 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
1592 if (cmp_p == 0 && DB_REDO(op)) {
1593 /* Need to truncate the page. */
1594 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1595 HOFFSET(pagep) = file_dbp->pgsize;
1598 } else if (cmp_n == 0 && !DB_REDO(op)) {
1599 /* Need to put the data back on the page. */
1600 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1601 if (TYPE(pagep) == P_OVERFLOW) {
1602 OV_REF(pagep) = OV_REF(argp->hdr.data);
1603 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1604 bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp);
1605 memcpy(bp, argp->data.data, argp->data.size);
1607 bp = (u_int8_t *)pagep +
1608 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1609 memcpy(bp, argp->data.data, argp->data.size);
1611 /* Copy index table. */
1612 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1613 ninp = argp->ind.data;
1615 i < (int)(argp->ind.size / sizeof(*ninp)); i++)
1617 HOFFSET(pagep) -= argp->data.size;
1620 pagep->lsn = argp->nlsn;
1623 if ((ret = __memp_fput(mpf,
1624 ip, pagep, dbc->priority)) != 0)
1627 *lsnp = argp->prev_lsn;
1634 * __bam_merge_recover --
1635 * Recovery function for merge.
1637 * PUBLIC: int __bam_merge_recover
1638 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1641 __bam_merge_recover(env, dbtp, lsnp, op, info)
1648 __bam_merge_args *argp;
1655 db_indx_t indx, *ninp, *pinp;
1658 int cmp_n, cmp_p, i, ret;
1660 ip = ((DB_TXNHEAD *)info)->thread_info;
1661 REC_PRINT(__bam_merge_print);
1662 REC_INTRO(__bam_merge_read, ip, 1);
1664 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1665 if (ret != DB_PAGE_NOTFOUND) {
1666 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1672 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1673 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1674 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1675 CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
1677 if (cmp_p == 0 && DB_REDO(op)) {
1679 * When pg_copy is set, we are copying onto a new page.
1681 DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0);
1682 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1683 if (argp->pg_copy) {
1684 P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
1685 PREV_PGNO(argp->hdr.data),
1686 NEXT_PGNO(argp->hdr.data),
1687 LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
1689 if (TYPE(pagep) == P_OVERFLOW) {
1690 OV_REF(pagep) = OV_REF(argp->hdr.data);
1691 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1692 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
1693 memcpy(bp, argp->data.data, argp->data.size);
1695 /* Copy the data segment. */
1696 bp = (u_int8_t *)pagep +
1697 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1698 memcpy(bp, argp->data.data, argp->data.size);
1700 /* Copy index table offset past the current entries. */
1701 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1702 ninp = P_INP(file_dbp, argp->hdr.data);
1703 for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
1705 - (file_dbp->pgsize - HOFFSET(pagep));
1706 HOFFSET(pagep) -= argp->data.size;
1707 NUM_ENT(pagep) += i;
1710 } else if (cmp_n == 0 && !DB_REDO(op)) {
1711 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1712 if (TYPE(pagep) == P_OVERFLOW) {
1713 HOFFSET(pagep) = file_dbp->pgsize;
1718 * Since logging is logical at the page level we cannot just
1719 * truncate the data space. Delete the proper number of items
1720 * from the logical end of the page.
1722 for (i = 0; i < NUM_ENT(argp->hdr.data); i++) {
1723 indx = NUM_ENT(pagep) - 1;
1724 if (P_INP(file_dbp, pagep)[indx] ==
1725 P_INP(file_dbp, pagep)[indx - P_INDX]) {
1729 switch (TYPE(pagep)) {
1733 bk = GET_BKEYDATA(file_dbp, pagep, indx);
1734 size = BITEM_SIZE(bk);
1738 size = BINTERNAL_SIZE(
1739 GET_BINTERNAL(file_dbp, pagep, indx)->len);
1742 size = RINTERNAL_SIZE;
1746 ret = __db_pgfmt(env, PGNO(pagep));
1749 if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0)
1752 setlsn: pagep->lsn = argp->lsn;
1755 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1758 next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) {
1759 if (ret != DB_PAGE_NOTFOUND) {
1760 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1766 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1767 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
1768 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
1770 if (cmp_p == 0 && DB_REDO(op)) {
1771 /* Need to truncate the page. */
1772 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1773 HOFFSET(pagep) = file_dbp->pgsize;
1776 } else if (cmp_n == 0 && !DB_REDO(op)) {
1777 /* Need to put the data back on the page. */
1778 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1779 if (TYPE(pagep) == P_OVERFLOW) {
1780 OV_REF(pagep) = OV_REF(argp->hdr.data);
1781 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1782 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
1783 memcpy(bp, argp->data.data, argp->data.size);
1785 bp = (u_int8_t *)pagep +
1786 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1787 memcpy(bp, argp->data.data, argp->data.size);
1789 /* Copy index table. */
1790 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1791 ninp = P_INP(file_dbp, argp->hdr.data);
1792 for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
1794 HOFFSET(pagep) -= argp->data.size;
1795 NUM_ENT(pagep) += i;
1797 pagep->lsn = argp->nlsn;
1800 if ((ret = __memp_fput(mpf,
1801 ip, pagep, dbc->priority)) != 0)
1804 *lsnp = argp->prev_lsn;
1811 * __bam_pgno_recover --
1812 * Recovery function for page number replacment.
1814 * PUBLIC: int __bam_pgno_recover
1815 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1818 __bam_pgno_recover(env, dbtp, lsnp, op, info)
1826 __bam_pgno_args *argp;
1831 PAGE *pagep, *npagep;
1833 int cmp_n, cmp_p, ret;
1835 ip = ((DB_TXNHEAD *)info)->thread_info;
1836 REC_PRINT(__bam_pgno_print);
1837 REC_INTRO(__bam_pgno_read, ip, 0);
1839 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
1841 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1842 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1843 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1844 CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
1846 if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) {
1847 switch (TYPE(pagep)) {
1850 * An internal record can have both a overflow
1851 * and child pointer. Fetch the page to see
1854 bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
1855 if (B_TYPE(bi->type) == B_OVERFLOW) {
1856 REC_FGET(mpf, ip, argp->npgno, &npagep, out);
1858 if (TYPE(npagep) == P_OVERFLOW)
1860 &((BOVERFLOW *)(bi->data))->pgno;
1863 if ((ret = __memp_fput(mpf, ip,
1864 npagep, file_dbp->priority)) != 0)
1872 &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno;
1876 &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno;
1881 /* Need to redo update described. */
1882 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1883 *pgnop = argp->npgno;
1886 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1887 *pgnop = argp->opgno;
1888 pagep->lsn = argp->lsn;
1892 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1896 *lsnp = argp->prev_lsn;
1903 * __bam_relink_43_recover --
1904 * Recovery function for relink.
1906 * PUBLIC: int __bam_relink_43_recover
1907 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1910 __bam_relink_43_recover(env, dbtp, lsnp, op, info)
1917 __bam_relink_43_args *argp;
1923 int cmp_n, cmp_p, modified, ret;
1925 ip = ((DB_TXNHEAD *)info)->thread_info;
1927 REC_PRINT(__bam_relink_43_print);
1928 REC_INTRO(__bam_relink_43_read, ip, 0);
1931 * There are up to three pages we need to check -- the page, and the
1932 * previous and next pages, if they existed. For a page add operation,
1933 * the current page is the result of a split and is being recovered
1934 * elsewhere, so all we need do is recover the next page.
1936 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1937 if (ret != DB_PAGE_NOTFOUND) {
1938 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1944 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1945 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1946 if (cmp_p == 0 && DB_REDO(op)) {
1947 /* Redo the relink. */
1948 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1950 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
1951 /* Undo the relink. */
1952 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1953 pagep->next_pgno = argp->next;
1954 pagep->prev_pgno = argp->prev;
1955 pagep->lsn = argp->lsn;
1957 if ((ret = __memp_fput(mpf,
1958 ip, pagep, file_dbp->priority)) != 0)
1962 next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
1963 if (ret != DB_PAGE_NOTFOUND) {
1964 ret = __db_pgerr(file_dbp, argp->next, ret);
1971 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1972 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
1973 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
1974 if (cmp_p == 0 && DB_REDO(op)) {
1975 /* Redo the remove or undo the add. */
1976 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1977 pagep->prev_pgno = argp->prev;
1979 } else if (cmp_n == 0 && DB_UNDO(op)) {
1980 /* Undo the remove or redo the add. */
1981 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1982 pagep->prev_pgno = argp->pgno;
1987 pagep->lsn = argp->lsn_next;
1991 if ((ret = __memp_fput(mpf,
1992 ip, pagep, file_dbp->priority)) != 0)
1996 prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
1997 if (ret != DB_PAGE_NOTFOUND) {
1998 ret = __db_pgerr(file_dbp, argp->prev, ret);
2005 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2006 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2007 if (cmp_p == 0 && DB_REDO(op)) {
2008 /* Redo the relink. */
2009 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2010 pagep->next_pgno = argp->next;
2012 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2013 /* Undo the relink. */
2014 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2015 pagep->next_pgno = argp->pgno;
2020 pagep->lsn = argp->lsn_prev;
2024 if ((ret = __memp_fput(mpf,
2025 ip, pagep, file_dbp->priority)) != 0)
2029 done: *lsnp = argp->prev_lsn;
2032 out: if (pagep != NULL)
2033 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);