2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
11 static const char sccsid[] = "@(#)bt_rec.c 10.13 (Sleepycat) 9/3/97";
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
30 #include "db_dispatch.h"
31 #include "common_ext.h"
34 * __bam_pg_alloc_recover --
35 * Recovery function for pg_alloc.
37 * PUBLIC: int __bam_pg_alloc_recover
38 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
41 __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
48 __bam_pg_alloc_args *argp;
54 int cmp_n, cmp_p, created, modified, ret;
56 REC_PRINT(__bam_pg_alloc_print);
57 REC_INTRO(__bam_pg_alloc_read);
60 * Fix up the allocated page. If we're redoing the operation, we have
61 * to get the page (creating it if it doesn't exist), and update its
62 * LSN. If we're undoing the operation, we have to reset the page's
63 * LSN and put it on the free list.
65 * Fix up the metadata page. If we're redoing the operation, we have
66 * to get the metadata page and update its LSN and its free pointer.
67 * If we're undoing the operation and the page was ever created, we put
71 if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
72 (void)__db_pgerr(file_dbp, pgno);
75 if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
76 (void)__db_pgerr(file_dbp, argp->pgno);
77 (void)memp_fput(mpf, meta, 0);
81 /* Fix up the allocated page. */
82 created = IS_ZERO_LSN(LSN(pagep));
84 cmp_n = log_compare(lsnp, &LSN(pagep));
85 cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
86 if ((created || cmp_p == 0) && redo) {
87 /* Need to redo update described. */
88 P_INIT(pagep, file_dbp->pgsize,
89 argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
93 } else if ((created || cmp_n == 0) && !redo) {
94 /* Need to undo update described. */
95 P_INIT(pagep, file_dbp->pgsize,
96 argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
98 pagep->lsn = argp->page_lsn;
101 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
102 (void)__db_panic(file_dbp);
103 (void)memp_fput(mpf, meta, 0);
107 /* Fix up the metadata page. */
109 cmp_n = log_compare(lsnp, &LSN(meta));
110 cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
111 if (cmp_p == 0 && redo) {
112 /* Need to redo update described. */
114 meta->free = argp->next;
116 } else if (cmp_n == 0 && !redo) {
117 /* Need to undo update described. */
118 meta->lsn = argp->meta_lsn;
119 meta->free = argp->pgno;
122 if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
123 (void)__db_panic(file_dbp);
127 *lsnp = argp->prev_lsn;
134 * __bam_pg_free_recover --
135 * Recovery function for pg_free.
137 * PUBLIC: int __bam_pg_free_recover
138 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
141 __bam_pg_free_recover(logp, dbtp, lsnp, redo, info)
148 __bam_pg_free_args *argp;
154 int cmp_n, cmp_p, modified, ret;
156 REC_PRINT(__bam_pg_free_print);
157 REC_INTRO(__bam_pg_free_read);
160 * Fix up the freed page. If we're redoing the operation we get the
161 * page and explicitly discard its contents, then update its LSN. If
162 * we're undoing the operation, we get the page and restore its header.
164 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
165 (void)__db_pgerr(file_dbp, argp->pgno);
169 cmp_n = log_compare(lsnp, &LSN(pagep));
170 cmp_p = log_compare(&LSN(pagep), &LSN(argp->header.data));
171 if (cmp_p == 0 && redo) {
172 /* Need to redo update described. */
173 P_INIT(pagep, file_dbp->pgsize,
174 pagep->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
178 } else if (cmp_n == 0 && !redo) {
179 /* Need to undo update described. */
180 memcpy(pagep, argp->header.data, argp->header.size);
184 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
185 (void)__db_panic(file_dbp);
190 * Fix up the metadata page. If we're redoing or undoing the operation
191 * we get the page and update its LSN and free pointer.
193 pgno = PGNO_METADATA;
194 if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
195 (void)__db_pgerr(file_dbp, pgno);
200 cmp_n = log_compare(lsnp, &LSN(meta));
201 cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
202 if (cmp_p == 0 && redo) {
203 /* Need to redo update described. */
204 meta->free = argp->pgno;
208 } else if (cmp_n == 0 && !redo) {
209 /* Need to undo update described. */
210 meta->free = argp->next;
212 meta->lsn = argp->meta_lsn;
215 if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
216 (void)__db_panic(file_dbp);
220 *lsnp = argp->prev_lsn;
227 * __bam_split_recover --
228 * Recovery function for split.
230 * PUBLIC: int __bam_split_recover
231 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
234 __bam_split_recover(logp, dbtp, lsnp, redo, info)
241 __bam_split_args *argp;
244 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
246 int l_update, p_update, r_update, ret, rootsplit, t_ret;
248 REC_PRINT(__bam_split_print);
251 _lp = lp = np = pp = _rp = rp = NULL;
253 REC_INTRO(__bam_split_read);
256 * There are two kinds of splits that we have to recover from. The
257 * first is a root-page split, where the root page is split from a
258 * leaf page into an internal page and two new leaf pages are created.
259 * The second is where a page is split into two pages, and a new key
260 * is inserted into the parent page.
264 rootsplit = pgno == PGNO_ROOT;
265 if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
267 if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
271 l_update = r_update = p_update = 0;
273 * Decide if we need to resplit the page.
275 * If this is a root split, then the root has to exist, it's
276 * the page we're splitting and it gets modified. If this is
277 * not a root split, then the left page has to exist, for the
281 if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
282 (void)__db_pgerr(file_dbp, pgno);
287 log_compare(&LSN(pp), &LSN(argp->pg.data)) == 0;
290 (void)__db_pgerr(file_dbp, argp->left);
293 if (lp == NULL || log_compare(&LSN(lp), &argp->llsn) == 0)
295 if (rp == NULL || log_compare(&LSN(rp), &argp->rlsn) == 0)
297 if (!p_update && !l_update && !r_update)
300 /* Allocate and initialize new left/right child pages. */
301 if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL)
303 if ((_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) {
304 nomem: __set_errno(ENOMEM);
305 __db_err(file_dbp->dbenv, "%s", strerror(errno));
309 P_INIT(_lp, file_dbp->pgsize, argp->left,
311 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
312 LEVEL(sp), TYPE(sp));
313 P_INIT(_rp, file_dbp->pgsize, argp->right,
314 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
315 PGNO_INVALID, LEVEL(sp), TYPE(sp));
317 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
318 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
319 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
320 LEVEL(sp), TYPE(sp));
321 P_INIT(_rp, file_dbp->pgsize, argp->right,
322 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
323 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
324 LEVEL(sp), TYPE(sp));
327 /* Split the page. */
328 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
329 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
333 /* If the left child is wrong, update it. */
334 if (lp == NULL && (ret =
335 memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
336 (void)__db_pgerr(file_dbp, argp->left);
341 memcpy(lp, _lp, file_dbp->pgsize);
343 if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
348 /* If the right child is wrong, update it. */
349 if (rp == NULL && (ret = memp_fget(mpf,
350 &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
351 (void)__db_pgerr(file_dbp, argp->right);
356 memcpy(rp, _rp, file_dbp->pgsize);
358 if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
364 * If the parent page is wrong, update it. This is of interest
365 * only if it was a root split, since root splits create parent
366 * pages. All other splits modify a parent page, but those are
367 * separately logged and recovered.
369 if (rootsplit && p_update) {
370 if (file_dbp->type == DB_BTREE)
371 P_INIT(pp, file_dbp->pgsize,
372 PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
373 _lp->level + 1, P_IBTREE);
375 P_INIT(pp, file_dbp->pgsize,
376 PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
377 _lp->level + 1, P_IRECNO);
379 file_dbp->type == DB_RECNO ||
380 F_ISSET(file_dbp, DB_BT_RECNUM) ?
381 __bam_total(_lp) + __bam_total(_rp) : 0);
383 if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
389 * Finally, redo the next-page link if necessary. This is of
390 * interest only if it wasn't a root split -- inserting a new
391 * page in the tree requires that any following page have its
392 * previous-page pointer updated to our new page. The next
393 * page had better exist.
395 if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
396 if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
397 (void)__db_pgerr(file_dbp, argp->npgno);
401 if (log_compare(&LSN(np), &argp->nlsn) == 0) {
402 PREV_PGNO(np) = argp->right;
404 if ((ret = memp_fput(mpf,
405 np, DB_MPOOL_DIRTY)) != 0)
412 * If the split page is wrong, replace its contents with the
413 * logged page contents. The split page had better exist.
415 if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
416 (void)__db_pgerr(file_dbp, pgno);
420 if (log_compare(lsnp, &LSN(pp)) == 0) {
421 memcpy(pp, argp->pg.data, argp->pg.size);
422 if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
428 * If it's a root split and the left child ever existed, put
429 * it on the free list. (If it's not a root split, we just
430 * updated the left page -- it's the same as the split page.)
431 * If the right child ever existed, root split or not, put it
434 if ((rootsplit && lp != NULL) || rp != NULL) {
435 if (rootsplit && lp != NULL &&
436 log_compare(lsnp, &LSN(lp)) == 0) {
437 lp->lsn = argp->llsn;
439 memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
444 log_compare(lsnp, &LSN(rp)) == 0) {
445 rp->lsn = argp->rlsn;
447 memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
454 * Finally, undo the next-page link if necessary. This is of
455 * interest only if it wasn't a root split -- inserting a new
456 * page in the tree requires that any following page have its
457 * previous-page pointer updated to our new page. The next
458 * page had better exist.
460 if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
461 if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
462 (void)__db_pgerr(file_dbp, argp->npgno);
466 if (log_compare(lsnp, &LSN(np)) == 0) {
467 PREV_PGNO(np) = argp->left;
468 np->lsn = argp->nlsn;
469 if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
477 *lsnp = argp->prev_lsn;
480 fatal: (void)__db_panic(file_dbp);
482 out: /* Free any pages that weren't dirtied. */
483 if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
485 if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
487 if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
489 if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
492 /* Free any allocated space. */
502 * __bam_rsplit_recover --
503 * Recovery function for a reverse split.
505 * PUBLIC: int __bam_rsplit_recover
506 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
509 __bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
516 __bam_rsplit_args *argp;
521 int cmp_n, cmp_p, modified, ret;
523 REC_PRINT(__bam_rsplit_print);
524 REC_INTRO(__bam_rsplit_read);
526 /* Fix the root page. */
528 if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
529 __db_pgerr(file_dbp, pgno);
534 cmp_n = log_compare(lsnp, &LSN(pagep));
535 cmp_p = log_compare(&LSN(pagep), &argp->rootlsn);
536 if (cmp_p == 0 && redo) {
537 /* Need to redo update described. */
538 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
539 pagep->pgno = PGNO_ROOT;
542 } else if (cmp_n == 0 && !redo) {
543 /* Need to undo update described. */
544 P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
545 PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep));
546 if ((ret = __db_pitem(file_dbp, pagep, 0,
547 argp->rootent.size, &argp->rootent, NULL)) != 0)
549 pagep->lsn = argp->rootlsn;
552 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
553 (void)__db_panic(file_dbp);
557 /* Fix the page copied over the root page. */
558 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
559 (void)__db_pgerr(file_dbp, argp->pgno);
564 cmp_n = log_compare(lsnp, &LSN(pagep));
565 cmp_p = log_compare(&LSN(pagep), &LSN(argp->pgdbt.data));
566 if (cmp_p == 0 && redo) {
567 /* Need to redo update described. */
570 } else if (cmp_n == 0 && !redo) {
571 /* Need to undo update described. */
572 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
575 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
576 (void)__db_panic(file_dbp);
581 *lsnp = argp->prev_lsn;
587 * __bam_adj_recover --
588 * Recovery function for adj.
590 * PUBLIC: int __bam_adj_recover
591 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
594 __bam_adj_recover(logp, dbtp, lsnp, redo, info)
601 __bam_adj_args *argp;
605 int cmp_n, cmp_p, modified, ret;
607 REC_PRINT(__bam_adj_print);
608 REC_INTRO(__bam_adj_read);
610 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
611 (void)__db_pgerr(file_dbp, argp->pgno);
617 cmp_n = log_compare(lsnp, &LSN(pagep));
618 cmp_p = log_compare(&LSN(pagep), &argp->lsn);
619 if (cmp_p == 0 && redo) {
620 /* Need to redo update described. */
621 if ((ret = __bam_adjindx(file_dbp,
622 pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
627 } else if (cmp_n == 0 && !redo) {
628 /* Need to undo update described. */
629 if ((ret = __bam_adjindx(file_dbp,
630 pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
633 LSN(pagep) = argp->lsn;
636 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
637 *lsnp = argp->prev_lsn;
640 err: (void)memp_fput(mpf, pagep, 0);
646 * __bam_cadjust_recover --
647 * Recovery function for the adjust of a count change in an internal
650 * PUBLIC: int __bam_cadjust_recover
651 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
654 __bam_cadjust_recover(logp, dbtp, lsnp, redo, info)
661 __bam_cadjust_args *argp;
665 int cmp_n, cmp_p, modified, ret;
667 REC_PRINT(__bam_cadjust_print);
668 REC_INTRO(__bam_cadjust_read);
670 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
671 __set_errno(__db_pgerr(file_dbp, argp->pgno));
677 cmp_n = log_compare(lsnp, &LSN(pagep));
678 cmp_p = log_compare(&LSN(pagep), &argp->lsn);
679 if (cmp_p == 0 && redo) {
680 /* Need to redo update described. */
681 if (file_dbp->type == DB_BTREE &&
682 F_ISSET(file_dbp, DB_BT_RECNUM)) {
683 GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
684 if (argp->total && PGNO(pagep) == PGNO_ROOT)
685 RE_NREC_ADJ(pagep, argp->adjust);
687 if (file_dbp->type == DB_RECNO) {
688 GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
689 if (argp->total && PGNO(pagep) == PGNO_ROOT)
690 RE_NREC_ADJ(pagep, argp->adjust);
695 } else if (cmp_n == 0 && !redo) {
696 /* Need to undo update described. */
697 if (file_dbp->type == DB_BTREE &&
698 F_ISSET(file_dbp, DB_BT_RECNUM)) {
699 GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
700 if (argp->total && PGNO(pagep) == PGNO_ROOT)
701 RE_NREC_ADJ(pagep, argp->adjust);
703 if (file_dbp->type == DB_RECNO) {
704 GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
705 if (argp->total && PGNO(pagep) == PGNO_ROOT)
706 RE_NREC_ADJ(pagep, -(argp->adjust));
708 LSN(pagep) = argp->lsn;
711 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
712 *lsnp = argp->prev_lsn;
718 * __bam_cdel_recover --
719 * Recovery function for the intent-to-delete of a cursor record.
721 * PUBLIC: int __bam_cdel_recover
722 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
725 __bam_cdel_recover(logp, dbtp, lsnp, redo, info)
732 __bam_cdel_args *argp;
736 int cmp_n, cmp_p, modified, ret;
738 REC_PRINT(__bam_cdel_print);
739 REC_INTRO(__bam_cdel_read);
741 if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
742 (void)__db_pgerr(file_dbp, argp->pgno);
748 cmp_n = log_compare(lsnp, &LSN(pagep));
749 cmp_p = log_compare(&LSN(pagep), &argp->lsn);
750 if (cmp_p == 0 && redo) {
751 /* Need to redo update described. */
752 B_DSET(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type);
756 } else if (cmp_n == 0 && !redo) {
757 /* Need to undo update described. */
758 B_DCLR(GET_BKEYDATA(pagep, argp->indx + O_INDX)->type);
760 LSN(pagep) = argp->lsn;
763 if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
764 *lsnp = argp->prev_lsn;