2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
14 static const char copyright[] =
15 "Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved.\n";
18 int main __P((int, char *[]));
20 #ifndef HAVE_REPLICATION_THREADS
26 fprintf(stderr, DB_STR_A("5092",
27 "Cannot run %s without Replication Manager.\n", "%s\n"),
34 static int usage __P((void));
35 static int version_check __P((void));
36 static void event_callback __P((DB_ENV *, u_int32_t, void *));
37 static int db_replicate_logmsg __P((DB_ENV *, const char *));
38 static void prog_close __P((DB_ENV *, int));
40 /* * Buffer for logging messages. */
42 char log_msg[MSG_SIZE];
47 char progname[MSG_SIZE];
49 #define REP_NTHREADS 3
63 u_int32_t flags, repmgr_th, seconds, start_state;
64 int ch, count, done, exitval, ret, verbose;
65 char *home, *passwd, *prog, time_buf[CTIME_BUFLEN];
69 log_msg[MSG_SIZE - 1] = '\0';
70 __os_id(NULL, &pid, NULL);
73 if ((prog = __db_rpath(argv[0])) == NULL)
78 if ((size_t)(count = snprintf(progname, sizeof(progname), "%s(%lu)",
79 prog, (u_long)pid)) >= sizeof(progname)) {
80 fprintf(stderr, DB_STR("5093", "Program name too long\n"));
83 if ((ret = version_check()) != 0)
88 * Don't allow a fully unsigned 32-bit number, some compilers get
89 * upset and require it to be specified in hexadecimal and so on.
91 #define MAX_UINT32_T 2147483647
95 * Create an environment object and initialize it for error
96 * reporting. Create it before parsing args so that we can
97 * call methods to set the values directly.
99 if ((ret = db_env_create(&dbenv, 0)) != 0) {
101 "%s: db_env_create: %s\n", progname, db_strerror(ret));
105 (void)dbenv->set_event_notify(dbenv, event_callback);
106 dbenv->set_errfile(dbenv, stderr);
107 dbenv->set_errpfx(dbenv, progname);
109 exitval = verbose = 0;
111 home = logfile = passwd = NULL;
113 start_state = DB_REP_ELECTION;
114 repmgr_th = REP_NTHREADS;
115 while ((ch = getopt(argc, argv, "h:L:MP:T:t:Vv")) != EOF)
124 start_state = DB_REP_MASTER;
127 passwd = strdup(optarg);
128 memset(optarg, 0, strlen(optarg));
129 if (passwd == NULL) {
130 fprintf(stderr, DB_STR_A("5094",
131 "%s: strdup: %s\n", "%s %s\n"),
132 progname, strerror(errno));
133 return (EXIT_FAILURE);
135 ret = dbenv->set_encrypt(dbenv, passwd, DB_ENCRYPT_AES);
138 dbenv->err(dbenv, ret, "set_passwd");
143 if (__db_getlong(NULL, progname,
144 optarg, 1, (long)MAX_UINT32_T, &argval))
145 return (EXIT_FAILURE);
146 repmgr_th = (u_int32_t)argval;
149 if (__db_getlong(NULL, progname,
150 optarg, 1, (long)MAX_UINT32_T, &argval))
151 return (EXIT_FAILURE);
152 seconds = (u_int32_t)argval;
155 printf("%s\n", db_version(NULL, NULL, NULL));
156 return (EXIT_SUCCESS);
170 /* Handle possible interruptions. */
174 * Log our process ID. This is a specialized case of
175 * __db_util_logset because we retain the logfp and keep
176 * the file open for additional logging.
178 if (logfile != NULL) {
179 if ((logfp = fopen(logfile, "w")) == NULL)
181 if ((ret = db_replicate_logmsg(dbenv, "STARTED")) != 0)
186 * If attaching to a pre-existing environment fails, error.
188 #define ENV_FLAGS (DB_THREAD | DB_USE_ENVIRON)
189 if ((ret = dbenv->open(dbenv, home, ENV_FLAGS, 0)) != 0) {
190 dbenv->err(dbenv, ret, "DB_ENV->open");
195 * Confirm that replication is configured in the underlying
196 * environment. We need the max request value anyway and
197 * the method to get the value returns an error if replication
200 if ((ret = dbenv->rep_get_request(dbenv, NULL, &max_req)) != 0) {
201 dbenv->err(dbenv, ret, "rep_get_request");
208 if (verbose && ((ret = dbenv->set_verbose(dbenv,
209 DB_VERB_REPLICATION, 1)) != 0)) {
210 dbenv->err(dbenv, ret, "set_verbose");
214 while (!done && count < MAX_RETRY) {
216 * Retry if we get an error that indicates that the port is
217 * in use. An old version of this program could still be
218 * running. The application restarts with recovery, and that
219 * should panic the old environment, but it may take a little
220 * bit of time for the old program to notice the panic.
222 * We wait the max_req time because at worst the rerequest
223 * thread runs every max_req time and should notice a panic. On
224 * the other hand, if we're joining the replication group for
225 * the first time and the master is not available
226 * (DB_REP_UNAVAIL), it makes sense to pause a bit longer before
229 if ((ret = dbenv->repmgr_start(dbenv,
230 repmgr_th, start_state)) == DB_REP_UNAVAIL) {
232 __os_yield(dbenv->env, 5, 0);
233 } else if (ret != 0) {
235 __os_yield(dbenv->env, 0, max_req);
240 dbenv->err(dbenv, ret, "repmgr_start");
244 /* Main loop of the program. */
245 while (!__db_util_interrupted() && !panic_exit) {
247 * The program itself does not have much to do. All the
248 * interesting replication stuff is happening underneath.
249 * Each period, we'll wake up and call rep_flush just to
250 * force a log record and cause any gaps to fill as well as
251 * check program status to see if it was interrupted.
253 __os_yield(dbenv->env, seconds, 0);
256 dbenv->errx(dbenv, DB_STR_A("5095",
257 "db_replicate begin: %s", "%s"),
258 __os_ctime(&now, time_buf));
262 * Hmm, do we really want to exit on error here? This is
263 * a non-essential piece of the program, so if it gets
264 * an error, we may just want to ignore it. Note we call
265 * rep_flush without checking if we're a master or client.
267 if ((ret = dbenv->rep_flush(dbenv)) != 0) {
268 dbenv->err(dbenv, ret, "rep_flush");
276 prog_close(dbenv, exitval);
277 return (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
281 prog_close(dbenv, exitval)
289 (void)remove(logfile);
291 /* Clean up the environment. */
292 if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) {
295 "%s: dbenv->close: %s\n", progname, db_strerror(ret));
298 /* Resend any caught signal. */
299 __db_util_sigresend();
301 exit (exitval == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
305 event_callback(dbenv, which, info)
310 COMPQUIET(info, NULL);
314 * If the app restarted with recovery, and we're an old
315 * program running against the old regions, we'll discover
316 * the panic and want to exit quickly to give a new
317 * instantiation of the program access to the port.
319 printf(DB_STR("5096", "received panic event\n"));
320 db_replicate_logmsg(dbenv, "PANIC");
324 case DB_EVENT_REP_CLIENT:
325 db_replicate_logmsg(dbenv, "CLIENT");
328 case DB_EVENT_REP_CONNECT_BROKEN:
329 db_replicate_logmsg(dbenv, "CONNECTIONBROKEN");
332 case DB_EVENT_REP_DUPMASTER:
333 db_replicate_logmsg(dbenv, "DUPMASTER");
336 case DB_EVENT_REP_ELECTED:
337 db_replicate_logmsg(dbenv, "ELECTED");
340 case DB_EVENT_REP_MASTER:
341 db_replicate_logmsg(dbenv, "MASTER");
344 case DB_EVENT_REP_NEWMASTER:
345 db_replicate_logmsg(dbenv, "NEWMASTER");
348 case DB_EVENT_REP_STARTUPDONE:
349 db_replicate_logmsg(dbenv, "STARTUPDONE");
352 case DB_EVENT_REP_CONNECT_ESTD:
353 case DB_EVENT_REP_CONNECT_TRY_FAILED:
354 case DB_EVENT_REP_INIT_DONE:
355 case DB_EVENT_REP_LOCAL_SITE_REMOVED:
356 case DB_EVENT_REP_PERM_FAILED:
357 case DB_EVENT_REP_SITE_ADDED:
358 case DB_EVENT_REP_SITE_REMOVED:
359 /* We don't care about these, for now. */
363 db_replicate_logmsg(dbenv, "IGNORED");
364 dbenv->errx(dbenv, DB_STR_A("5097", "ignoring event %d",
372 (void)fprintf(stderr, "usage: %s [-MVv]\n\t%s\n", progname,
373 "[-h home] [-P password] [-T nthreads] [-t seconds]");
374 return (EXIT_FAILURE);
380 int v_major, v_minor, v_patch;
382 /* Make sure we're loaded with the right version of the DB library. */
383 (void)db_version(&v_major, &v_minor, &v_patch);
384 if (v_major != DB_VERSION_MAJOR || v_minor != DB_VERSION_MINOR) {
385 fprintf(stderr, DB_STR_A("5098",
386 "%s: version %d.%d doesn't match library version %d.%d\n",
387 "%s %d %d %d %d\n"), progname,
388 DB_VERSION_MAJOR, DB_VERSION_MINOR,
390 return (EXIT_FAILURE);
396 db_replicate_logmsg(dbenv, msg)
402 char time_buf[CTIME_BUFLEN];
408 (void)__os_ctime(&now, time_buf);
409 if ((size_t)(cnt = snprintf(log_msg, sizeof(log_msg), "%s: %lu %s %s",
410 progname, (u_long)pid, time_buf, msg)) >= sizeof(log_msg)) {
411 dbenv->errx(dbenv, DB_STR_A("5099",
412 "%s: %lu %s %s: message too long", "%s %lu %s %s"),
413 progname, (u_long)pid, time_buf, msg);
416 fprintf(logfp, "%s\n", log_msg);