2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
8 * The President and Fellows of Harvard University. All rights reserved.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 #include "db_config.h"
40 #include "dbinc/db_page.h"
41 #include "dbinc/lock.h"
42 #include "dbinc/txn.h"
43 #include "dbinc/db_am.h"
46 * PUBLIC: int __txn_regop_recover
47 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
49 * These records are only ever written for commits. Normally, we redo any
50 * committed transaction, however if we are doing recovery to a timestamp, then
51 * we may treat transactions that committed after the timestamp as aborted.
54 __txn_regop_recover(env, dbtp, lsnp, op, info)
61 __txn_regop_args *argp;
67 (void)__txn_regop_print(env, dbtp, lsnp, op, info);
70 if ((ret = __txn_regop_read(env, dbtp->data, &argp)) != 0)
75 * We are only ever called during FORWARD_ROLL or BACKWARD_ROLL.
76 * We check for the former explicitly and the last two clauses
77 * apply to the BACKWARD_ROLL case.
80 if (op == DB_TXN_FORWARD_ROLL) {
82 * If this was a 2-phase-commit transaction, then it
83 * might already have been removed from the list, and
84 * that's OK. Ignore the return code from remove.
86 if ((ret = __db_txnlist_remove(env,
87 info, argp->txnp->txnid)) != DB_NOTFOUND && ret != 0)
89 } else if ((env->dbenv->tx_timestamp != 0 &&
90 argp->timestamp > (int32_t)env->dbenv->tx_timestamp) ||
91 (!IS_ZERO_LSN(headp->trunc_lsn) &&
92 LOG_COMPARE(&headp->trunc_lsn, lsnp) < 0)) {
94 * We failed either the timestamp check or the trunc_lsn check,
95 * so we treat this as an abort even if it was a commit record.
97 if ((ret = __db_txnlist_update(env, info,
98 argp->txnp->txnid, TXN_ABORT, NULL, &status, 1)) != 0)
100 else if (status != TXN_IGNORE && status != TXN_OK)
103 /* This is a normal commit; mark it appropriately. */
104 if ((ret = __db_txnlist_update(env,
105 info, argp->txnp->txnid, argp->opcode, lsnp,
106 &status, 0)) == DB_NOTFOUND) {
107 if ((ret = __db_txnlist_add(env,
108 info, argp->txnp->txnid,
109 argp->opcode == TXN_ABORT ?
110 TXN_IGNORE : argp->opcode, lsnp)) != 0)
112 } else if (ret != 0 ||
113 (status != TXN_IGNORE && status != TXN_OK))
118 *lsnp = argp->prev_lsn;
121 err: __db_errx(env, DB_STR_A("4514",
122 "txnid %lx commit record found, already on commit list",
123 "%lx"), (u_long)argp->txnp->txnid);
126 __os_free(env, argp);
132 * PUBLIC: int __txn_prepare_recover
133 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
135 * These records are only ever written for prepares.
138 __txn_prepare_recover(env, dbtp, lsnp, op, info)
145 __txn_prepare_args *argp;
153 (void)__txn_prepare_print(env, dbtp, lsnp, op, info);
156 if ((ret = __txn_prepare_read(env, dbtp->data, &argp)) != 0)
159 if (argp->opcode != TXN_PREPARE && argp->opcode != TXN_ABORT) {
166 * The return value here is either a DB_NOTFOUND or it is
167 * the transaction status from the list. It is not a normal
168 * error return, so we must make sure that in each of the
169 * cases below, we overwrite the ret value so we return
172 ret = __db_txnlist_find(env, info, argp->txnp->txnid, &status);
175 * If we are rolling forward, then an aborted prepare
176 * indicates that this may be the last record we'll see for
177 * this transaction ID, so we should remove it from the list.
180 if (op == DB_TXN_FORWARD_ROLL) {
181 if ((ret = __db_txnlist_remove(env,
182 info, argp->txnp->txnid)) != 0)
184 } else if (op == DB_TXN_BACKWARD_ROLL && status == TXN_PREPARE) {
186 * On the backward pass, we have four possibilities:
187 * 1. The transaction is already committed, no-op.
188 * 2. The transaction is already aborted, no-op.
189 * 3. The prepare failed and was aborted, mark as abort.
190 * 4. The transaction is neither committed nor aborted.
191 * Treat this like a commit and roll forward so that
192 * the transaction can be resurrected in the region.
193 * We handle cases 3 and 4 here; cases 1 and 2
194 * are the final clause below.
196 if (argp->opcode == TXN_ABORT) {
197 if ((ret = __db_txnlist_update(env,
198 info, argp->txnp->txnid,
199 TXN_ABORT, NULL, &status, 0)) != 0 &&
200 status != TXN_PREPARE)
205 * This is prepared, but not yet committed transaction. We
206 * need to add it to the transaction list, so that it gets
207 * rolled forward. We also have to add it to the region's
208 * internal state so it can be properly aborted or committed
209 * after recovery (see txn_recover).
211 else if ((ret = __db_txnlist_remove(env,
212 info, argp->txnp->txnid)) != 0) {
213 txn_err: __db_errx(env,
215 "transaction not in list %lx", "%lx"),
216 (u_long)argp->txnp->txnid);
218 } else if (IS_ZERO_LSN(headp->trunc_lsn) ||
219 LOG_COMPARE(&headp->trunc_lsn, lsnp) >= 0) {
220 if ((ret = __db_txnlist_add(env,
221 info, argp->txnp->txnid, TXN_COMMIT, lsnp)) == 0) {
222 /* Re-acquire the locks for this transaction. */
223 lock_dbt = &argp->locks;
224 if (LOCKING_ON(env)) {
226 if ((ret = __lock_getlocker(lt,
227 argp->txnp->txnid, 1,
228 &argp->txnp->locker)) != 0)
230 if ((ret = __lock_get_list(env,
231 argp->txnp->locker, 0,
232 DB_LOCK_WRITE, lock_dbt)) != 0)
236 ret = __txn_restore_txn(env, lsnp, argp);
243 *lsnp = argp->prev_lsn;
245 err: __os_free(env, argp);
251 * PUBLIC: int __txn_ckp_recover
252 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
255 __txn_ckp_recover(env, dbtp, lsnp, op, info)
262 __txn_ckp_args *argp;
266 __txn_ckp_print(env, dbtp, lsnp, op, info);
268 if ((ret = __txn_ckp_read(env, dbtp->data, &argp)) != 0)
271 if (op == DB_TXN_BACKWARD_ROLL)
272 __db_txnlist_ckp(env, info, lsnp);
274 *lsnp = argp->last_ckp;
275 __os_free(env, argp);
280 * __txn_child_recover
281 * Recover a commit record for a child transaction.
283 * PUBLIC: int __txn_child_recover
284 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
287 __txn_child_recover(env, dbtp, lsnp, op, info)
294 __txn_child_args *argp;
295 u_int32_t c_stat, p_stat, tmpstat;
299 (void)__txn_child_print(env, dbtp, lsnp, op, info);
301 if ((ret = __txn_child_read(env, dbtp->data, &argp)) != 0)
305 * This is a record in a PARENT's log trail indicating that a
306 * child committed. If we are aborting, return the childs last
307 * record's LSN. If we are in recovery, then if the
308 * parent is committing, we set ourselves up to commit, else
311 if (op == DB_TXN_ABORT) {
313 ret = __db_txnlist_lsnadd(env, info, &argp->prev_lsn);
315 } else if (op == DB_TXN_BACKWARD_ROLL) {
316 /* Child might exist -- look for it. */
317 ret = __db_txnlist_find(env, info, argp->child, &c_stat);
319 __db_txnlist_find(env, info, argp->txnp->txnid, &p_stat);
320 if (ret != 0 && ret != DB_NOTFOUND)
322 if (t_ret != 0 && t_ret != DB_NOTFOUND) {
327 * If the parent is in state COMMIT or IGNORE, then we apply
328 * that to the child, else we need to abort the child.
331 if (ret == DB_NOTFOUND ||
332 c_stat == TXN_OK || c_stat == TXN_COMMIT) {
333 if (t_ret == DB_NOTFOUND ||
334 (p_stat != TXN_COMMIT && p_stat != TXN_IGNORE))
339 if (ret == DB_NOTFOUND)
340 ret = __db_txnlist_add(env,
341 info, argp->child, c_stat, NULL);
343 ret = __db_txnlist_update(env, info,
344 argp->child, c_stat, NULL, &tmpstat, 0);
345 } else if (c_stat == TXN_EXPECTED) {
347 * The open after this create succeeded. If the
348 * parent succeeded, we don't want to redo; if the
349 * parent aborted, we do want to undo.
359 ret = __db_txnlist_update(env,
360 info, argp->child, c_stat, NULL, &tmpstat, 0);
361 } else if (c_stat == TXN_UNEXPECTED) {
363 * The open after this create failed. If the parent
364 * is rolling forward, we need to roll forward. If
365 * the parent failed, then we do not want to abort
366 * (because the file may not be the one in which we
369 ret = __db_txnlist_update(env, info, argp->child,
370 p_stat == TXN_COMMIT ? TXN_COMMIT : TXN_IGNORE,
373 } else if (op == DB_TXN_OPENFILES) {
375 * If we have a partial subtransaction, then the whole
376 * transaction should be ignored.
378 if ((ret = __db_txnlist_find(env,
379 info, argp->child, &c_stat)) == DB_NOTFOUND)
380 ret = __db_txnlist_update(env, info,
381 argp->txnp->txnid, TXN_IGNORE,
383 } else if (DB_REDO(op)) {
386 __db_txnlist_remove(env, info, argp->child)) != 0)
387 __db_errx(env, DB_STR_A("4516",
388 "Transaction not in list %x", "%x"), argp->child);
392 *lsnp = argp->prev_lsn;
394 out: __os_free(env, argp);
400 * __txn_restore_txn --
401 * Using only during XA recovery. If we find any transactions that are
402 * prepared, but not yet committed, then we need to restore the transaction's
403 * state into the shared region, because the TM is going to issue an abort
404 * or commit and we need to respond correctly.
406 * lsnp is the LSN of the returned LSN
407 * argp is the prepare record (in an appropriate structure)
409 * PUBLIC: int __txn_restore_txn __P((ENV *, DB_LSN *, __txn_prepare_args *));
412 __txn_restore_txn(env, lsnp, argp)
415 __txn_prepare_args *argp;
418 DB_TXNREGION *region;
422 if (argp->gid.size == 0)
425 mgr = env->tx_handle;
426 region = mgr->reginfo.primary;
427 TXN_SYSTEM_LOCK(env);
429 /* Allocate a new transaction detail structure. */
430 if ((ret = __env_alloc(&mgr->reginfo, sizeof(TXN_DETAIL), &td)) != 0) {
431 TXN_SYSTEM_UNLOCK(env);
435 /* Place transaction on active transaction list. */
436 SH_TAILQ_INSERT_HEAD(®ion->active_txn, td, links, __txn_detail);
439 td->txnid = argp->txnp->txnid;
440 __os_id(env->dbenv, &td->pid, &td->tid);
441 td->last_lsn = *lsnp;
442 td->begin_lsn = argp->begin_lsn;
443 td->parent = INVALID_ROFF;
444 td->name = INVALID_ROFF;
445 SH_TAILQ_INIT(&td->kids);
446 MAX_LSN(td->read_lsn);
447 MAX_LSN(td->visible_lsn);
449 td->mvcc_mtx = MUTEX_INVALID;
450 td->status = TXN_PREPARED;
451 td->flags = TXN_DTL_RESTORED;
452 memcpy(td->gid, argp->gid.data, argp->gid.size);
454 td->nlog_slots = TXN_NSLOTS;
455 td->log_dbs = R_OFFSET(&mgr->reginfo, td->slots);
457 region->stat.st_nrestores++;
458 #ifdef HAVE_STATISTICS
459 STAT_INC(env, txn, nactive, region->stat.st_nactive, td->txnid);
460 if (region->stat.st_nactive > region->stat.st_maxnactive)
461 STAT_SET(env, txn, maxnactive, region->stat.st_maxnactive,
462 region->stat.st_nactive, td->txnid);
464 TXN_SYSTEM_UNLOCK(env);
469 * __txn_recycle_recover --
470 * Recovery function for recycle.
472 * PUBLIC: int __txn_recycle_recover
473 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
476 __txn_recycle_recover(env, dbtp, lsnp, op, info)
483 __txn_recycle_args *argp;
487 (void)__txn_child_print(env, dbtp, lsnp, op, info);
489 if ((ret = __txn_recycle_read(env, dbtp->data, &argp)) != 0)
492 COMPQUIET(lsnp, NULL);
494 if ((ret = __db_txnlist_gen(env, info,
495 DB_UNDO(op) ? -1 : 1, argp->min, argp->max)) != 0)
498 __os_free(env, argp);
504 * PUBLIC: int __txn_regop_42_recover
505 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
507 * These records are only ever written for commits. Normally, we redo any
508 * committed transaction, however if we are doing recovery to a timestamp, then
509 * we may treat transactions that committed after the timestamp as aborted.
512 __txn_regop_42_recover(env, dbtp, lsnp, op, info)
519 __txn_regop_42_args *argp;
525 (void)__txn_regop_42_print(env, dbtp, lsnp, op, info);
528 if ((ret = __txn_regop_42_read(env, dbtp->data, &argp)) != 0)
533 * We are only ever called during FORWARD_ROLL or BACKWARD_ROLL.
534 * We check for the former explicitly and the last two clauses
535 * apply to the BACKWARD_ROLL case.
538 if (op == DB_TXN_FORWARD_ROLL) {
540 * If this was a 2-phase-commit transaction, then it
541 * might already have been removed from the list, and
542 * that's OK. Ignore the return code from remove.
544 if ((ret = __db_txnlist_remove(env,
545 info, argp->txnp->txnid)) != DB_NOTFOUND && ret != 0)
547 } else if ((env->dbenv->tx_timestamp != 0 &&
548 argp->timestamp > (int32_t)env->dbenv->tx_timestamp) ||
549 (!IS_ZERO_LSN(headp->trunc_lsn) &&
550 LOG_COMPARE(&headp->trunc_lsn, lsnp) < 0)) {
552 * We failed either the timestamp check or the trunc_lsn check,
553 * so we treat this as an abort even if it was a commit record.
555 if ((ret = __db_txnlist_update(env, info,
556 argp->txnp->txnid, TXN_ABORT, NULL, &status, 1)) != 0)
558 else if (status != TXN_IGNORE && status != TXN_OK)
561 /* This is a normal commit; mark it appropriately. */
562 if ((ret = __db_txnlist_update(env,
563 info, argp->txnp->txnid, argp->opcode, lsnp,
564 &status, 0)) == DB_NOTFOUND) {
565 if ((ret = __db_txnlist_add(env,
566 info, argp->txnp->txnid,
567 argp->opcode == TXN_ABORT ?
568 TXN_IGNORE : argp->opcode, lsnp)) != 0)
570 } else if (ret != 0 ||
571 (status != TXN_IGNORE && status != TXN_OK))
576 *lsnp = argp->prev_lsn;
579 err: __db_errx(env, DB_STR_A("4517",
580 "txnid %lx commit record found, already on commit list",
581 "%lx"), (u_long)argp->txnp->txnid);
584 __os_free(env, argp);
590 * PUBLIC: int __txn_ckp_42_recover
591 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
594 __txn_ckp_42_recover(env, dbtp, lsnp, op, info)
601 __txn_ckp_42_args *argp;
605 __txn_ckp_42_print(env, dbtp, lsnp, op, info);
607 if ((ret = __txn_ckp_42_read(env, dbtp->data, &argp)) != 0)
610 if (op == DB_TXN_BACKWARD_ROLL)
611 __db_txnlist_ckp(env, info, lsnp);
613 *lsnp = argp->last_ckp;
614 __os_free(env, argp);