a4b4a2130d5e6ccfff8ac7513db091b9b9c31b9d
[platform/upstream/multipath-tools.git] / libmultipath / checkers / tur.c
1 /*
2  * Some code borrowed from sg-utils.
3  *
4  * Copyright (c) 2004 Christophe Varoqui
5  */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <errno.h>
16 #include <sys/time.h>
17 #include <pthread.h>
18 #include <urcu/uatomic.h>
19
20 #include "checkers.h"
21
22 #include "../libmultipath/debug.h"
23 #include "../libmultipath/sg_include.h"
24 #include "../libmultipath/util.h"
25 #include "../libmultipath/time-util.h"
26 #include "../libmultipath/util.h"
27
28 #define TUR_CMD_LEN 6
29 #define HEAVY_CHECK_COUNT       10
30
31 enum {
32         MSG_TUR_RUNNING = CHECKER_FIRST_MSGID,
33         MSG_TUR_TIMEOUT,
34         MSG_TUR_FAILED,
35 };
36
37 #define _IDX(x) (MSG_ ## x - CHECKER_FIRST_MSGID)
38 const char *libcheck_msgtable[] = {
39         [_IDX(TUR_RUNNING)] = " still running",
40         [_IDX(TUR_TIMEOUT)] = " timed out",
41         [_IDX(TUR_FAILED)] = " failed to initialize",
42         NULL,
43 };
44
45 struct tur_checker_context {
46         dev_t devt;
47         int state;
48         int running; /* uatomic access only */
49         int fd;
50         unsigned int timeout;
51         time_t time;
52         pthread_t thread;
53         pthread_mutex_t lock;
54         pthread_cond_t active;
55         int holders; /* uatomic access only */
56         int msgid;
57         struct checker_context ctx;
58 };
59
60 int libcheck_init (struct checker * c)
61 {
62         struct tur_checker_context *ct;
63         struct stat sb;
64
65         ct = malloc(sizeof(struct tur_checker_context));
66         if (!ct)
67                 return 1;
68         memset(ct, 0, sizeof(struct tur_checker_context));
69
70         ct->state = PATH_UNCHECKED;
71         ct->fd = -1;
72         uatomic_set(&ct->holders, 1);
73         pthread_cond_init_mono(&ct->active);
74         pthread_mutex_init(&ct->lock, NULL);
75         if (fstat(c->fd, &sb) == 0)
76                 ct->devt = sb.st_rdev;
77         ct->ctx.cls = c->cls;
78         c->context = ct;
79
80         return 0;
81 }
82
83 static void cleanup_context(struct tur_checker_context *ct)
84 {
85         pthread_mutex_destroy(&ct->lock);
86         pthread_cond_destroy(&ct->active);
87         free(ct);
88 }
89
90 void libcheck_free (struct checker * c)
91 {
92         if (c->context) {
93                 struct tur_checker_context *ct = c->context;
94                 int holders;
95                 int running;
96
97                 running = uatomic_xchg(&ct->running, 0);
98                 if (running)
99                         pthread_cancel(ct->thread);
100                 ct->thread = 0;
101                 holders = uatomic_sub_return(&ct->holders, 1);
102                 if (!holders)
103                         cleanup_context(ct);
104                 c->context = NULL;
105         }
106         return;
107 }
108
109 static int
110 tur_check(int fd, unsigned int timeout, short *msgid)
111 {
112         struct sg_io_hdr io_hdr;
113         unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
114         unsigned char sense_buffer[32];
115         int retry_tur = 5;
116
117 retry:
118         memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
119         memset(&sense_buffer, 0, 32);
120         io_hdr.interface_id = 'S';
121         io_hdr.cmd_len = sizeof (turCmdBlk);
122         io_hdr.mx_sb_len = sizeof (sense_buffer);
123         io_hdr.dxfer_direction = SG_DXFER_NONE;
124         io_hdr.cmdp = turCmdBlk;
125         io_hdr.sbp = sense_buffer;
126         io_hdr.timeout = timeout * 1000;
127         io_hdr.pack_id = 0;
128         if (ioctl(fd, SG_IO, &io_hdr) < 0) {
129                 if (errno == ENOTTY) {
130                         *msgid = CHECKER_MSGID_UNSUPPORTED;
131                         return PATH_WILD;
132                 }
133                 *msgid = CHECKER_MSGID_DOWN;
134                 return PATH_DOWN;
135         }
136         if ((io_hdr.status & 0x7e) == 0x18) {
137                 /*
138                  * SCSI-3 arrays might return
139                  * reservation conflict on TUR
140                  */
141                 *msgid = CHECKER_MSGID_UP;
142                 return PATH_UP;
143         }
144         if (io_hdr.info & SG_INFO_OK_MASK) {
145                 int key = 0, asc, ascq;
146
147                 switch (io_hdr.host_status) {
148                 case DID_OK:
149                 case DID_NO_CONNECT:
150                 case DID_BAD_TARGET:
151                 case DID_ABORT:
152                 case DID_TRANSPORT_FAILFAST:
153                         break;
154                 default:
155                         /* Driver error, retry */
156                         if (--retry_tur)
157                                 goto retry;
158                         break;
159                 }
160                 if (io_hdr.sb_len_wr > 3) {
161                         if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) {
162                                 key = io_hdr.sbp[1] & 0x0f;
163                                 asc = io_hdr.sbp[2];
164                                 ascq = io_hdr.sbp[3];
165                         } else if (io_hdr.sb_len_wr > 13 &&
166                                    ((io_hdr.sbp[0] & 0x7f) == 0x70 ||
167                                     (io_hdr.sbp[0] & 0x7f) == 0x71)) {
168                                 key = io_hdr.sbp[2] & 0x0f;
169                                 asc = io_hdr.sbp[12];
170                                 ascq = io_hdr.sbp[13];
171                         }
172                 }
173                 if (key == 0x6) {
174                         /* Unit Attention, retry */
175                         if (--retry_tur)
176                                 goto retry;
177                 }
178                 else if (key == 0x2) {
179                         /* Not Ready */
180                         /* Note: Other ALUA states are either UP or DOWN */
181                         if( asc == 0x04 && ascq == 0x0b){
182                                 /*
183                                  * LOGICAL UNIT NOT ACCESSIBLE,
184                                  * TARGET PORT IN STANDBY STATE
185                                  */
186                                 *msgid = CHECKER_MSGID_GHOST;
187                                 return PATH_GHOST;
188                         }
189                 }
190                 *msgid = CHECKER_MSGID_DOWN;
191                 return PATH_DOWN;
192         }
193         *msgid = CHECKER_MSGID_UP;
194         return PATH_UP;
195 }
196
197 #define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
198 #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
199
200 static void cleanup_func(void *data)
201 {
202         int holders;
203         struct tur_checker_context *ct = data;
204
205         holders = uatomic_sub_return(&ct->holders, 1);
206         if (!holders)
207                 cleanup_context(ct);
208 }
209
210 /*
211  * Test code for "zombie tur thread" handling.
212  * Compile e.g. with CFLAGS=-DTUR_TEST_MAJOR=8
213  * Additional parameters can be configure with the macros below.
214  *
215  * Everty nth started TUR thread will hang in non-cancellable state
216  * for given number of seconds, for device given by major/minor.
217  */
218 #ifdef TUR_TEST_MAJOR
219
220 #ifndef TUR_TEST_MINOR
221 #define TUR_TEST_MINOR 0
222 #endif
223 #ifndef TUR_SLEEP_INTERVAL
224 #define TUR_SLEEP_INTERVAL 3
225 #endif
226 #ifndef TUR_SLEEP_SECS
227 #define TUR_SLEEP_SECS 60
228 #endif
229
230 static void tur_deep_sleep(const struct tur_checker_context *ct)
231 {
232         static int sleep_cnt;
233         const struct timespec ts = { .tv_sec = TUR_SLEEP_SECS, .tv_nsec = 0 };
234         int oldstate;
235
236         if (ct->devt != makedev(TUR_TEST_MAJOR, TUR_TEST_MINOR) ||
237             ++sleep_cnt % TUR_SLEEP_INTERVAL != 0)
238                 return;
239
240         condlog(1, "tur thread going to sleep for %ld seconds", ts.tv_sec);
241         if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)
242                 condlog(0, "pthread_setcancelstate: %m");
243         if (nanosleep(&ts, NULL) != 0)
244                 condlog(0, "nanosleep: %m");
245         condlog(1, "tur zombie thread woke up");
246         if (pthread_setcancelstate(oldstate, NULL) != 0)
247                 condlog(0, "pthread_setcancelstate (2): %m");
248         pthread_testcancel();
249 }
250 #else
251 #define tur_deep_sleep(x) do {} while (0)
252 #endif /* TUR_TEST_MAJOR */
253
254 void *libcheck_thread(struct checker_context *ctx)
255 {
256         struct tur_checker_context *ct =
257                 container_of(ctx, struct tur_checker_context, ctx);
258         int state, running;
259         short msgid;
260
261         /* This thread can be canceled, so setup clean up */
262         tur_thread_cleanup_push(ct);
263
264         condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
265                 minor(ct->devt));
266
267         tur_deep_sleep(ct);
268         state = tur_check(ct->fd, ct->timeout, &msgid);
269         pthread_testcancel();
270
271         /* TUR checker done */
272         pthread_mutex_lock(&ct->lock);
273         ct->state = state;
274         ct->msgid = msgid;
275         pthread_cond_signal(&ct->active);
276         pthread_mutex_unlock(&ct->lock);
277
278         condlog(4, "%d:%d : tur checker finished, state %s", major(ct->devt),
279                 minor(ct->devt), checker_state_name(state));
280
281         running = uatomic_xchg(&ct->running, 0);
282         if (!running)
283                 pause();
284
285         tur_thread_cleanup_pop(ct);
286
287         return ((void *)0);
288 }
289
290
291 static void tur_timeout(struct timespec *tsp)
292 {
293         get_monotonic_time(tsp);
294         tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */
295         normalize_timespec(tsp);
296 }
297
298 static void tur_set_async_timeout(struct checker *c)
299 {
300         struct tur_checker_context *ct = c->context;
301         struct timespec now;
302
303         get_monotonic_time(&now);
304         ct->time = now.tv_sec + c->timeout;
305 }
306
307 static int tur_check_async_timeout(struct checker *c)
308 {
309         struct tur_checker_context *ct = c->context;
310         struct timespec now;
311
312         get_monotonic_time(&now);
313         return (now.tv_sec > ct->time);
314 }
315
316 int libcheck_check(struct checker * c)
317 {
318         struct tur_checker_context *ct = c->context;
319         struct timespec tsp;
320         pthread_attr_t attr;
321         int tur_status, r;
322
323         if (!ct)
324                 return PATH_UNCHECKED;
325
326         if (checker_is_sync(c))
327                 return tur_check(c->fd, c->timeout, &c->msgid);
328
329         /*
330          * Async mode
331          */
332         if (ct->thread) {
333                 if (tur_check_async_timeout(c)) {
334                         int running = uatomic_xchg(&ct->running, 0);
335                         if (running) {
336                                 pthread_cancel(ct->thread);
337                                 condlog(3, "%d:%d : tur checker timeout",
338                                         major(ct->devt), minor(ct->devt));
339                                 c->msgid = MSG_TUR_TIMEOUT;
340                                 tur_status = PATH_TIMEOUT;
341                         } else {
342                                 pthread_mutex_lock(&ct->lock);
343                                 tur_status = ct->state;
344                                 c->msgid = ct->msgid;
345                                 pthread_mutex_unlock(&ct->lock);
346                         }
347                         ct->thread = 0;
348                 } else if (uatomic_read(&ct->running) != 0) {
349                         condlog(3, "%d:%d : tur checker not finished",
350                                 major(ct->devt), minor(ct->devt));
351                         tur_status = PATH_PENDING;
352                 } else {
353                         /* TUR checker done */
354                         ct->thread = 0;
355                         pthread_mutex_lock(&ct->lock);
356                         tur_status = ct->state;
357                         c->msgid = ct->msgid;
358                         pthread_mutex_unlock(&ct->lock);
359                 }
360         } else {
361                 if (uatomic_read(&ct->holders) > 1) {
362                         /*
363                          * The thread has been cancelled but hasn't quit.
364                          * We have to prevent it from interfering with the new
365                          * thread. We create a new context and leave the old
366                          * one with the stale thread, hoping it will clean up
367                          * eventually.
368                          */
369                         condlog(3, "%d:%d : tur thread not responding",
370                                 major(ct->devt), minor(ct->devt));
371
372                         /*
373                          * libcheck_init will replace c->context.
374                          * It fails only in OOM situations. In this case, return
375                          * PATH_UNCHECKED to avoid prematurely failing the path.
376                          */
377                         if (libcheck_init(c) != 0)
378                                 return PATH_UNCHECKED;
379
380                         if (!uatomic_sub_return(&ct->holders, 1))
381                                 /* It did terminate, eventually */
382                                 cleanup_context(ct);
383
384                         ct = c->context;
385                 }
386                 /* Start new TUR checker */
387                 pthread_mutex_lock(&ct->lock);
388                 tur_status = ct->state = PATH_PENDING;
389                 ct->msgid = CHECKER_MSGID_NONE;
390                 pthread_mutex_unlock(&ct->lock);
391                 ct->fd = c->fd;
392                 ct->timeout = c->timeout;
393                 uatomic_add(&ct->holders, 1);
394                 uatomic_set(&ct->running, 1);
395                 tur_set_async_timeout(c);
396                 setup_thread_attr(&attr, 32 * 1024, 1);
397                 r = start_checker_thread(&ct->thread, &attr, &ct->ctx);
398                 pthread_attr_destroy(&attr);
399                 if (r) {
400                         uatomic_sub(&ct->holders, 1);
401                         uatomic_set(&ct->running, 0);
402                         ct->thread = 0;
403                         condlog(3, "%d:%d : failed to start tur thread, using"
404                                 " sync mode", major(ct->devt), minor(ct->devt));
405                         return tur_check(c->fd, c->timeout, &c->msgid);
406                 }
407                 tur_timeout(&tsp);
408                 pthread_mutex_lock(&ct->lock);
409                 if (ct->state == PATH_PENDING)
410                         r = pthread_cond_timedwait(&ct->active, &ct->lock,
411                                                    &tsp);
412                 if (!r) {
413                         tur_status = ct->state;
414                         c->msgid = ct->msgid;
415                 }
416                 pthread_mutex_unlock(&ct->lock);
417                 if (tur_status == PATH_PENDING) {
418                         condlog(4, "%d:%d : tur checker still running",
419                                 major(ct->devt), minor(ct->devt));
420                 } else {
421                         int running = uatomic_xchg(&ct->running, 0);
422                         if (running)
423                                 pthread_cancel(ct->thread);
424                         ct->thread = 0;
425                 }
426         }
427
428         return tur_status;
429 }