2 * Some code borrowed from sg-utils.
4 * Copyright (c) 2004 Christophe Varoqui
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
18 #include <urcu/uatomic.h>
22 #include "../libmultipath/debug.h"
23 #include "../libmultipath/sg_include.h"
24 #include "../libmultipath/util.h"
25 #include "../libmultipath/time-util.h"
26 #include "../libmultipath/util.h"
29 #define HEAVY_CHECK_COUNT 10
32 MSG_TUR_RUNNING = CHECKER_FIRST_MSGID,
37 #define _IDX(x) (MSG_ ## x - CHECKER_FIRST_MSGID)
38 const char *libcheck_msgtable[] = {
39 [_IDX(TUR_RUNNING)] = " still running",
40 [_IDX(TUR_TIMEOUT)] = " timed out",
41 [_IDX(TUR_FAILED)] = " failed to initialize",
45 struct tur_checker_context {
48 int running; /* uatomic access only */
54 pthread_cond_t active;
55 int holders; /* uatomic access only */
57 struct checker_context ctx;
60 int libcheck_init (struct checker * c)
62 struct tur_checker_context *ct;
65 ct = malloc(sizeof(struct tur_checker_context));
68 memset(ct, 0, sizeof(struct tur_checker_context));
70 ct->state = PATH_UNCHECKED;
72 uatomic_set(&ct->holders, 1);
73 pthread_cond_init_mono(&ct->active);
74 pthread_mutex_init(&ct->lock, NULL);
75 if (fstat(c->fd, &sb) == 0)
76 ct->devt = sb.st_rdev;
83 static void cleanup_context(struct tur_checker_context *ct)
85 pthread_mutex_destroy(&ct->lock);
86 pthread_cond_destroy(&ct->active);
90 void libcheck_free (struct checker * c)
93 struct tur_checker_context *ct = c->context;
97 running = uatomic_xchg(&ct->running, 0);
99 pthread_cancel(ct->thread);
101 holders = uatomic_sub_return(&ct->holders, 1);
110 tur_check(int fd, unsigned int timeout, short *msgid)
112 struct sg_io_hdr io_hdr;
113 unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
114 unsigned char sense_buffer[32];
118 memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
119 memset(&sense_buffer, 0, 32);
120 io_hdr.interface_id = 'S';
121 io_hdr.cmd_len = sizeof (turCmdBlk);
122 io_hdr.mx_sb_len = sizeof (sense_buffer);
123 io_hdr.dxfer_direction = SG_DXFER_NONE;
124 io_hdr.cmdp = turCmdBlk;
125 io_hdr.sbp = sense_buffer;
126 io_hdr.timeout = timeout * 1000;
128 if (ioctl(fd, SG_IO, &io_hdr) < 0) {
129 if (errno == ENOTTY) {
130 *msgid = CHECKER_MSGID_UNSUPPORTED;
133 *msgid = CHECKER_MSGID_DOWN;
136 if ((io_hdr.status & 0x7e) == 0x18) {
138 * SCSI-3 arrays might return
139 * reservation conflict on TUR
141 *msgid = CHECKER_MSGID_UP;
144 if (io_hdr.info & SG_INFO_OK_MASK) {
145 int key = 0, asc, ascq;
147 switch (io_hdr.host_status) {
152 case DID_TRANSPORT_FAILFAST:
155 /* Driver error, retry */
160 if (io_hdr.sb_len_wr > 3) {
161 if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
162 key = io_hdr.sbp[1] & 0x0f;
164 ascq = io_hdr.sbp[3];
165 } else if (io_hdr.sb_len_wr > 13 &&
166 ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
167 (io_hdr.sbp[0] & 0x7f) == 0x71)) {
168 key = io_hdr.sbp[2] & 0x0f;
169 asc = io_hdr.sbp[12];
170 ascq = io_hdr.sbp[13];
174 /* Unit Attention, retry */
178 else if (key == 0x2) {
180 /* Note: Other ALUA states are either UP or DOWN */
181 if( asc == 0x04 && ascq == 0x0b){
183 * LOGICAL UNIT NOT ACCESSIBLE,
184 * TARGET PORT IN STANDBY STATE
186 *msgid = CHECKER_MSGID_GHOST;
190 *msgid = CHECKER_MSGID_DOWN;
193 *msgid = CHECKER_MSGID_UP;
197 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
198 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
200 static void cleanup_func(void *data)
203 struct tur_checker_context *ct = data;
205 holders = uatomic_sub_return(&ct->holders, 1);
211 * Test code for "zombie tur thread" handling.
212 * Compile e.g. with CFLAGS=-DTUR_TEST_MAJOR=8
213 * Additional parameters can be configure with the macros below.
215 * Everty nth started TUR thread will hang in non-cancellable state
216 * for given number of seconds, for device given by major/minor.
218 #ifdef TUR_TEST_MAJOR
220 #ifndef TUR_TEST_MINOR
221 #define TUR_TEST_MINOR 0
223 #ifndef TUR_SLEEP_INTERVAL
224 #define TUR_SLEEP_INTERVAL 3
226 #ifndef TUR_SLEEP_SECS
227 #define TUR_SLEEP_SECS 60
230 static void tur_deep_sleep(const struct tur_checker_context *ct)
232 static int sleep_cnt;
233 const struct timespec ts = { .tv_sec = TUR_SLEEP_SECS, .tv_nsec = 0 };
236 if (ct->devt != makedev(TUR_TEST_MAJOR, TUR_TEST_MINOR) ||
237 ++sleep_cnt % TUR_SLEEP_INTERVAL != 0)
240 condlog(1, "tur thread going to sleep for %ld seconds", ts.tv_sec);
241 if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)
242 condlog(0, "pthread_setcancelstate: %m");
243 if (nanosleep(&ts, NULL) != 0)
244 condlog(0, "nanosleep: %m");
245 condlog(1, "tur zombie thread woke up");
246 if (pthread_setcancelstate(oldstate, NULL) != 0)
247 condlog(0, "pthread_setcancelstate (2): %m");
248 pthread_testcancel();
251 #define tur_deep_sleep(x) do {} while (0)
252 #endif /* TUR_TEST_MAJOR */
254 void *libcheck_thread(struct checker_context *ctx)
256 struct tur_checker_context *ct =
257 container_of(ctx, struct tur_checker_context, ctx);
261 /* This thread can be canceled, so setup clean up */
262 tur_thread_cleanup_push(ct);
264 condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
268 state = tur_check(ct->fd, ct->timeout, &msgid);
269 pthread_testcancel();
271 /* TUR checker done */
272 pthread_mutex_lock(&ct->lock);
275 pthread_cond_signal(&ct->active);
276 pthread_mutex_unlock(&ct->lock);
278 condlog(4, "%d:%d : tur checker finished, state %s", major(ct->devt),
279 minor(ct->devt), checker_state_name(state));
281 running = uatomic_xchg(&ct->running, 0);
285 tur_thread_cleanup_pop(ct);
291 static void tur_timeout(struct timespec *tsp)
293 get_monotonic_time(tsp);
294 tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
295 normalize_timespec(tsp);
298 static void tur_set_async_timeout(struct checker *c)
300 struct tur_checker_context *ct = c->context;
303 get_monotonic_time(&now);
304 ct->time = now.tv_sec + c->timeout;
307 static int tur_check_async_timeout(struct checker *c)
309 struct tur_checker_context *ct = c->context;
312 get_monotonic_time(&now);
313 return (now.tv_sec > ct->time);
316 int libcheck_check(struct checker * c)
318 struct tur_checker_context *ct = c->context;
324 return PATH_UNCHECKED;
326 if (checker_is_sync(c))
327 return tur_check(c->fd, c->timeout, &c->msgid);
333 if (tur_check_async_timeout(c)) {
334 int running = uatomic_xchg(&ct->running, 0);
336 pthread_cancel(ct->thread);
337 condlog(3, "%d:%d : tur checker timeout",
338 major(ct->devt), minor(ct->devt));
339 c->msgid = MSG_TUR_TIMEOUT;
340 tur_status = PATH_TIMEOUT;
342 pthread_mutex_lock(&ct->lock);
343 tur_status = ct->state;
344 c->msgid = ct->msgid;
345 pthread_mutex_unlock(&ct->lock);
348 } else if (uatomic_read(&ct->running) != 0) {
349 condlog(3, "%d:%d : tur checker not finished",
350 major(ct->devt), minor(ct->devt));
351 tur_status = PATH_PENDING;
353 /* TUR checker done */
355 pthread_mutex_lock(&ct->lock);
356 tur_status = ct->state;
357 c->msgid = ct->msgid;
358 pthread_mutex_unlock(&ct->lock);
361 if (uatomic_read(&ct->holders) > 1) {
363 * The thread has been cancelled but hasn't quit.
364 * We have to prevent it from interfering with the new
365 * thread. We create a new context and leave the old
366 * one with the stale thread, hoping it will clean up
369 condlog(3, "%d:%d : tur thread not responding",
370 major(ct->devt), minor(ct->devt));
373 * libcheck_init will replace c->context.
374 * It fails only in OOM situations. In this case, return
375 * PATH_UNCHECKED to avoid prematurely failing the path.
377 if (libcheck_init(c) != 0)
378 return PATH_UNCHECKED;
380 if (!uatomic_sub_return(&ct->holders, 1))
381 /* It did terminate, eventually */
386 /* Start new TUR checker */
387 pthread_mutex_lock(&ct->lock);
388 tur_status = ct->state = PATH_PENDING;
389 ct->msgid = CHECKER_MSGID_NONE;
390 pthread_mutex_unlock(&ct->lock);
392 ct->timeout = c->timeout;
393 uatomic_add(&ct->holders, 1);
394 uatomic_set(&ct->running, 1);
395 tur_set_async_timeout(c);
396 setup_thread_attr(&attr, 32 * 1024, 1);
397 r = start_checker_thread(&ct->thread, &attr, &ct->ctx);
398 pthread_attr_destroy(&attr);
400 uatomic_sub(&ct->holders, 1);
401 uatomic_set(&ct->running, 0);
403 condlog(3, "%d:%d : failed to start tur thread, using"
404 " sync mode", major(ct->devt), minor(ct->devt));
405 return tur_check(c->fd, c->timeout, &c->msgid);
408 pthread_mutex_lock(&ct->lock);
409 if (ct->state == PATH_PENDING)
410 r = pthread_cond_timedwait(&ct->active, &ct->lock,
413 tur_status = ct->state;
414 c->msgid = ct->msgid;
416 pthread_mutex_unlock(&ct->lock);
417 if (tur_status == PATH_PENDING) {
418 condlog(4, "%d:%d : tur checker still running",
419 major(ct->devt), minor(ct->devt));
421 int running = uatomic_xchg(&ct->running, 0);
423 pthread_cancel(ct->thread);