import source from lvm2 2.02.79
[external/device-mapper.git] / daemons / clvmd / clvmd-openais.c
1 /*
2  * Copyright (C) 2007-2009 Red Hat, Inc. All rights reserved.
3  *
4  * This file is part of LVM2.
5  *
6  * This copyrighted material is made available to anyone wishing to use,
7  * modify, copy, or redistribute it subject to the terms and conditions
8  * of the GNU Lesser General Public License v.2.1.
9  *
10  * You should have received a copy of the GNU Lesser General Public License
11  * along with this program; if not, write to the Free Software Foundation,
12  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
13  */
14
15 /*
16  * This provides the interface between clvmd and OpenAIS as the cluster
17  * and lock manager.
18  */
19
20 #include "clvmd-common.h"
21
22 #include <pthread.h>
23 #include <fcntl.h>
24 #include <syslog.h>
25
26 #include <openais/saAis.h>
27 #include <openais/saLck.h>
28
29 #include <corosync/corotypes.h>
30 #include <corosync/cpg.h>
31
32 #include "locking.h"
33 #include "clvm.h"
34 #include "clvmd-comms.h"
35 #include "lvm-functions.h"
36 #include "clvmd.h"
37
38 /* Timeout value for several openais calls */
39 #define TIMEOUT 10
40
41 static void openais_cpg_deliver_callback (cpg_handle_t handle,
42                                   const struct cpg_name *groupName,
43                                   uint32_t nodeid,
44                                   uint32_t pid,
45                                   void *msg,
46                                   size_t msg_len);
47 static void openais_cpg_confchg_callback(cpg_handle_t handle,
48                                  const struct cpg_name *groupName,
49                                  const struct cpg_address *member_list, size_t member_list_entries,
50                                  const struct cpg_address *left_list, size_t left_list_entries,
51                                  const struct cpg_address *joined_list, size_t joined_list_entries);
52
53 static void _cluster_closedown(void);
54
55 /* Hash list of nodes in the cluster */
56 static struct dm_hash_table *node_hash;
57
58 /* For associating lock IDs & resource handles */
59 static struct dm_hash_table *lock_hash;
60
61 /* Number of active nodes */
62 static int num_nodes;
63 static unsigned int our_nodeid;
64
65 static struct local_client *cluster_client;
66
67 /* OpenAIS handles */
68 static cpg_handle_t cpg_handle;
69 static SaLckHandleT lck_handle;
70
71 static struct cpg_name cpg_group_name;
72
73 /* Openais callback structs */
74 cpg_callbacks_t openais_cpg_callbacks = {
75         .cpg_deliver_fn =            openais_cpg_deliver_callback,
76         .cpg_confchg_fn =            openais_cpg_confchg_callback,
77 };
78
79 struct node_info
80 {
81         enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
82         int nodeid;
83 };
84
85 struct lock_info
86 {
87         SaLckResourceHandleT res_handle;
88         SaLckLockIdT         lock_id;
89         SaNameT              lock_name;
90 };
91
92 /* Set errno to something approximating the right value and return 0 or -1 */
93 static int ais_to_errno(SaAisErrorT err)
94 {
95         switch(err)
96         {
97         case SA_AIS_OK:
98                 return 0;
99         case SA_AIS_ERR_LIBRARY:
100                 errno = EINVAL;
101                 break;
102         case SA_AIS_ERR_VERSION:
103                 errno = EINVAL;
104                 break;
105         case SA_AIS_ERR_INIT:
106                 errno = EINVAL;
107                 break;
108         case SA_AIS_ERR_TIMEOUT:
109                 errno = ETIME;
110                 break;
111         case SA_AIS_ERR_TRY_AGAIN:
112                 errno = EAGAIN;
113                 break;
114         case SA_AIS_ERR_INVALID_PARAM:
115                 errno = EINVAL;
116                 break;
117         case SA_AIS_ERR_NO_MEMORY:
118                 errno = ENOMEM;
119                 break;
120         case SA_AIS_ERR_BAD_HANDLE:
121                 errno = EINVAL;
122                 break;
123         case SA_AIS_ERR_BUSY:
124                 errno = EBUSY;
125                 break;
126         case SA_AIS_ERR_ACCESS:
127                 errno = EPERM;
128                 break;
129         case SA_AIS_ERR_NOT_EXIST:
130                 errno = ENOENT;
131                 break;
132         case SA_AIS_ERR_NAME_TOO_LONG:
133                 errno = ENAMETOOLONG;
134                 break;
135         case SA_AIS_ERR_EXIST:
136                 errno = EEXIST;
137                 break;
138         case SA_AIS_ERR_NO_SPACE:
139                 errno = ENOSPC;
140                 break;
141         case SA_AIS_ERR_INTERRUPT:
142                 errno = EINTR;
143                 break;
144         case SA_AIS_ERR_NAME_NOT_FOUND:
145                 errno = ENOENT;
146                 break;
147         case SA_AIS_ERR_NO_RESOURCES:
148                 errno = ENOMEM;
149                 break;
150         case SA_AIS_ERR_NOT_SUPPORTED:
151                 errno = EOPNOTSUPP;
152                 break;
153         case SA_AIS_ERR_BAD_OPERATION:
154                 errno = EINVAL;
155                 break;
156         case SA_AIS_ERR_FAILED_OPERATION:
157                 errno = EIO;
158                 break;
159         case SA_AIS_ERR_MESSAGE_ERROR:
160                 errno = EIO;
161                 break;
162         case SA_AIS_ERR_QUEUE_FULL:
163                 errno = EXFULL;
164                 break;
165         case SA_AIS_ERR_QUEUE_NOT_AVAILABLE:
166                 errno = EINVAL;
167                 break;
168         case SA_AIS_ERR_BAD_FLAGS:
169                 errno = EINVAL;
170                 break;
171         case SA_AIS_ERR_TOO_BIG:
172                 errno = E2BIG;
173                 break;
174         case SA_AIS_ERR_NO_SECTIONS:
175                 errno = ENOMEM;
176                 break;
177         default:
178                 errno = EINVAL;
179                 break;
180         }
181         return -1;
182 }
183
184 static char *print_openais_csid(const char *csid)
185 {
186         static char buf[128];
187         int id;
188
189         memcpy(&id, csid, sizeof(int));
190         sprintf(buf, "%d", id);
191         return buf;
192 }
193
194 static int add_internal_client(int fd, fd_callback_t callback)
195 {
196         struct local_client *client;
197
198         DEBUGLOG("Add_internal_client, fd = %d\n", fd);
199
200         client = malloc(sizeof(struct local_client));
201         if (!client)
202         {
203                 DEBUGLOG("malloc failed\n");
204                 return -1;
205         }
206
207         memset(client, 0, sizeof(struct local_client));
208         client->fd = fd;
209         client->type = CLUSTER_INTERNAL;
210         client->callback = callback;
211         add_client(client);
212
213         /* Set Close-on-exec */
214         fcntl(fd, F_SETFD, 1);
215
216         return 0;
217 }
218
219 static void openais_cpg_deliver_callback (cpg_handle_t handle,
220                                   const struct cpg_name *groupName,
221                                   uint32_t nodeid,
222                                   uint32_t pid,
223                                   void *msg,
224                                   size_t msg_len)
225 {
226         int target_nodeid;
227
228         memcpy(&target_nodeid, msg, OPENAIS_CSID_LEN);
229
230         DEBUGLOG("%u got message from nodeid %d for %d. len %d\n",
231                  our_nodeid, nodeid, target_nodeid, msg_len-4);
232
233         if (nodeid != our_nodeid)
234                 if (target_nodeid == our_nodeid || target_nodeid == 0)
235                         process_message(cluster_client, (char *)msg+OPENAIS_CSID_LEN,
236                                         msg_len-OPENAIS_CSID_LEN, (char*)&nodeid);
237 }
238
239 static void openais_cpg_confchg_callback(cpg_handle_t handle,
240                                  const struct cpg_name *groupName,
241                                  const struct cpg_address *member_list, size_t member_list_entries,
242                                  const struct cpg_address *left_list, size_t left_list_entries,
243                                  const struct cpg_address *joined_list, size_t joined_list_entries)
244 {
245         int i;
246         struct node_info *ninfo;
247
248         DEBUGLOG("confchg callback. %d joined, %d left, %d members\n",
249                  joined_list_entries, left_list_entries, member_list_entries);
250
251         for (i=0; i<joined_list_entries; i++) {
252                 ninfo = dm_hash_lookup_binary(node_hash,
253                                               (char *)&joined_list[i].nodeid,
254                                               OPENAIS_CSID_LEN);
255                 if (!ninfo) {
256                         ninfo = malloc(sizeof(struct node_info));
257                         if (!ninfo) {
258                                 break;
259                         }
260                         else {
261                                 ninfo->nodeid = joined_list[i].nodeid;
262                                 dm_hash_insert_binary(node_hash,
263                                                       (char *)&ninfo->nodeid,
264                                                       OPENAIS_CSID_LEN, ninfo);
265                         }
266                 }
267                 ninfo->state = NODE_CLVMD;
268         }
269
270         for (i=0; i<left_list_entries; i++) {
271                 ninfo = dm_hash_lookup_binary(node_hash,
272                                               (char *)&left_list[i].nodeid,
273                                               OPENAIS_CSID_LEN);
274                 if (ninfo)
275                         ninfo->state = NODE_DOWN;
276         }
277
278         for (i=0; i<member_list_entries; i++) {
279                 if (member_list[i].nodeid == 0) continue;
280                 ninfo = dm_hash_lookup_binary(node_hash,
281                                 (char *)&member_list[i].nodeid,
282                                 OPENAIS_CSID_LEN);
283                 if (!ninfo) {
284                         ninfo = malloc(sizeof(struct node_info));
285                         if (!ninfo) {
286                                 break;
287                         }
288                         else {
289                                 ninfo->nodeid = member_list[i].nodeid;
290                                 dm_hash_insert_binary(node_hash,
291                                                 (char *)&ninfo->nodeid,
292                                                 OPENAIS_CSID_LEN, ninfo);
293                         }
294                 }
295                 ninfo->state = NODE_CLVMD;
296         }
297
298         num_nodes = member_list_entries;
299 }
300
301 static int lck_dispatch(struct local_client *client, char *buf, int len,
302                         const char *csid, struct local_client **new_client)
303 {
304         *new_client = NULL;
305         saLckDispatch(lck_handle, SA_DISPATCH_ONE);
306         return 1;
307 }
308
309 static int _init_cluster(void)
310 {
311         SaAisErrorT err;
312         SaVersionT  ver = { 'B', 1, 1 };
313         int select_fd;
314
315         node_hash = dm_hash_create(100);
316         lock_hash = dm_hash_create(10);
317
318         err = cpg_initialize(&cpg_handle,
319                              &openais_cpg_callbacks);
320         if (err != SA_AIS_OK) {
321                 syslog(LOG_ERR, "Cannot initialise OpenAIS CPG service: %d",
322                        err);
323                 DEBUGLOG("Cannot initialise OpenAIS CPG service: %d", err);
324                 return ais_to_errno(err);
325         }
326
327         err = saLckInitialize(&lck_handle,
328                                         NULL,
329                               &ver);
330         if (err != SA_AIS_OK) {
331                 cpg_initialize(&cpg_handle, &openais_cpg_callbacks);
332                 syslog(LOG_ERR, "Cannot initialise OpenAIS lock service: %d",
333                        err);
334                 DEBUGLOG("Cannot initialise OpenAIS lock service: %d\n\n", err);
335                 return ais_to_errno(err);
336         }
337
338         /* Connect to the clvmd group */
339         strcpy((char *)cpg_group_name.value, "clvmd");
340         cpg_group_name.length = strlen((char *)cpg_group_name.value);
341         err = cpg_join(cpg_handle, &cpg_group_name);
342         if (err != SA_AIS_OK) {
343                 cpg_finalize(cpg_handle);
344                 saLckFinalize(lck_handle);
345                 syslog(LOG_ERR, "Cannot join clvmd process group");
346                 DEBUGLOG("Cannot join clvmd process group: %d\n", err);
347                 return ais_to_errno(err);
348         }
349
350         err = cpg_local_get(cpg_handle,
351                             &our_nodeid);
352         if (err != SA_AIS_OK) {
353                 cpg_finalize(cpg_handle);
354                 saLckFinalize(lck_handle);
355                 syslog(LOG_ERR, "Cannot get local node id\n");
356                 return ais_to_errno(err);
357         }
358         DEBUGLOG("Our local node id is %d\n", our_nodeid);
359
360         saLckSelectionObjectGet(lck_handle, (SaSelectionObjectT *)&select_fd);
361         add_internal_client(select_fd, lck_dispatch);
362
363         DEBUGLOG("Connected to OpenAIS\n");
364
365         return 0;
366 }
367
368 static void _cluster_closedown(void)
369 {
370         DEBUGLOG("cluster_closedown\n");
371         destroy_lvhash();
372
373         saLckFinalize(lck_handle);
374         cpg_finalize(cpg_handle);
375 }
376
377 static void _get_our_csid(char *csid)
378 {
379         memcpy(csid, &our_nodeid, sizeof(int));
380 }
381
382 /* OpenAIS doesn't really have nmode names so we
383    just use the node ID in hex instead */
384 static int _csid_from_name(char *csid, const char *name)
385 {
386         int nodeid;
387         struct node_info *ninfo;
388
389         if (sscanf(name, "%x", &nodeid) == 1) {
390                 ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN);
391                 if (ninfo)
392                         return nodeid;
393         }
394         return -1;
395 }
396
397 static int _name_from_csid(const char *csid, char *name)
398 {
399         struct node_info *ninfo;
400
401         ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN);
402         if (!ninfo)
403         {
404                 sprintf(name, "UNKNOWN %s", print_openais_csid(csid));
405                 return -1;
406         }
407
408         sprintf(name, "%x", ninfo->nodeid);
409         return 0;
410 }
411
412 static int _get_num_nodes()
413 {
414         DEBUGLOG("num_nodes = %d\n", num_nodes);
415         return num_nodes;
416 }
417
418 /* Node is now known to be running a clvmd */
419 static void _add_up_node(const char *csid)
420 {
421         struct node_info *ninfo;
422
423         ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN);
424         if (!ninfo) {
425                 DEBUGLOG("openais_add_up_node no node_hash entry for csid %s\n",
426                          print_openais_csid(csid));
427                 return;
428         }
429
430         DEBUGLOG("openais_add_up_node %d\n", ninfo->nodeid);
431
432         ninfo->state = NODE_CLVMD;
433
434         return;
435 }
436
437 /* Call a callback for each node, so the caller knows whether it's up or down */
438 static int _cluster_do_node_callback(struct local_client *master_client,
439                                      void (*callback)(struct local_client *,
440                                                       const char *csid, int node_up))
441 {
442         struct dm_hash_node *hn;
443         struct node_info *ninfo;
444         int somedown = 0;
445
446         dm_hash_iterate(hn, node_hash)
447         {
448                 char csid[OPENAIS_CSID_LEN];
449
450                 ninfo = dm_hash_get_data(node_hash, hn);
451                 memcpy(csid, dm_hash_get_key(node_hash, hn), OPENAIS_CSID_LEN);
452
453                 DEBUGLOG("down_callback. node %d, state = %d\n", ninfo->nodeid,
454                          ninfo->state);
455
456                 if (ninfo->state != NODE_DOWN)
457                         callback(master_client, csid, ninfo->state == NODE_CLVMD);
458                 if (ninfo->state != NODE_CLVMD)
459                         somedown = -1;
460         }
461         return somedown;
462 }
463
464 /* Real locking */
465 static int _lock_resource(char *resource, int mode, int flags, int *lockid)
466 {
467         struct lock_info *linfo;
468         SaLckResourceHandleT res_handle;
469         SaAisErrorT err;
470         SaLckLockIdT lock_id;
471         SaLckLockStatusT lockStatus;
472
473         /* This needs to be converted from DLM/LVM2 value for OpenAIS LCK */
474         if (flags & LCK_NONBLOCK) flags = SA_LCK_LOCK_NO_QUEUE;
475
476         linfo = malloc(sizeof(struct lock_info));
477         if (!linfo)
478                 return -1;
479
480         DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
481
482         linfo->lock_name.length = strlen(resource)+1;
483         strcpy((char *)linfo->lock_name.value, resource);
484
485         err = saLckResourceOpen(lck_handle, &linfo->lock_name,
486                                 SA_LCK_RESOURCE_CREATE, TIMEOUT, &res_handle);
487         if (err != SA_AIS_OK)
488         {
489                 DEBUGLOG("ResourceOpen returned %d\n", err);
490                 free(linfo);
491                 return ais_to_errno(err);
492         }
493
494         err = saLckResourceLock(
495                         res_handle,
496                         &lock_id,
497                         mode,
498                         flags,
499                         0,
500                         SA_TIME_END,
501                         &lockStatus);
502         if (err != SA_AIS_OK && lockStatus != SA_LCK_LOCK_GRANTED)
503         {
504                 free(linfo);
505                 saLckResourceClose(res_handle);
506                 return ais_to_errno(err);
507         }
508                         
509         /* Wait for it to complete */
510
511         DEBUGLOG("lock_resource returning %d, lock_id=%llx\n", err,
512                  lock_id);
513
514         linfo->lock_id = lock_id;
515         linfo->res_handle = res_handle;
516
517         dm_hash_insert(lock_hash, resource, linfo);
518
519         return ais_to_errno(err);
520 }
521
522
523 static int _unlock_resource(char *resource, int lockid)
524 {
525         SaAisErrorT err;
526         struct lock_info *linfo;
527
528         DEBUGLOG("unlock_resource %s\n", resource);
529         linfo = dm_hash_lookup(lock_hash, resource);
530         if (!linfo)
531                 return 0;
532
533         DEBUGLOG("unlock_resource: lockid: %llx\n", linfo->lock_id);
534         err = saLckResourceUnlock(linfo->lock_id, SA_TIME_END);
535         if (err != SA_AIS_OK)
536         {
537                 DEBUGLOG("Unlock returned %d\n", err);
538                 return ais_to_errno(err);
539         }
540
541         /* Release the resource */
542         dm_hash_remove(lock_hash, resource);
543         saLckResourceClose(linfo->res_handle);
544         free(linfo);
545
546         return ais_to_errno(err);
547 }
548
549 static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
550 {
551         int status;
552         char lock1[strlen(resource)+3];
553         char lock2[strlen(resource)+3];
554
555         snprintf(lock1, sizeof(lock1), "%s-1", resource);
556         snprintf(lock2, sizeof(lock2), "%s-2", resource);
557
558         switch (mode)
559         {
560         case LCK_EXCL:
561                 status = _lock_resource(lock1, SA_LCK_EX_LOCK_MODE, flags, lockid);
562                 if (status)
563                         goto out;
564
565                 /* If we can't get this lock too then bail out */
566                 status = _lock_resource(lock2, SA_LCK_EX_LOCK_MODE, LCK_NONBLOCK,
567                                         lockid);
568                 if (status == SA_LCK_LOCK_NOT_QUEUED)
569                 {
570                         _unlock_resource(lock1, *lockid);
571                         status = -1;
572                         errno = EAGAIN;
573                 }
574                 break;
575
576         case LCK_PREAD:
577         case LCK_READ:
578                 status = _lock_resource(lock1, SA_LCK_PR_LOCK_MODE, flags, lockid);
579                 if (status)
580                         goto out;
581                 _unlock_resource(lock2, *lockid);
582                 break;
583
584         case LCK_WRITE:
585                 status = _lock_resource(lock2, SA_LCK_EX_LOCK_MODE, flags, lockid);
586                 if (status)
587                         goto out;
588                 _unlock_resource(lock1, *lockid);
589                 break;
590
591         default:
592                 status = -1;
593                 errno = EINVAL;
594                 break;
595         }
596 out:
597         *lockid = mode;
598         return status;
599 }
600
601 static int _sync_unlock(const char *resource, int lockid)
602 {
603         int status = 0;
604         char lock1[strlen(resource)+3];
605         char lock2[strlen(resource)+3];
606
607         snprintf(lock1, sizeof(lock1), "%s-1", resource);
608         snprintf(lock2, sizeof(lock2), "%s-2", resource);
609
610         _unlock_resource(lock1, lockid);
611         _unlock_resource(lock2, lockid);
612
613         return status;
614 }
615
616 /* We are always quorate ! */
617 static int _is_quorate()
618 {
619         return 1;
620 }
621
622 static int _get_main_cluster_fd(void)
623 {
624         int select_fd;
625
626         cpg_fd_get(cpg_handle, &select_fd);
627         return select_fd;
628 }
629
630 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len,
631                                 const char *csid,
632                                 struct local_client **new_client)
633 {
634         cluster_client = fd;
635         *new_client = NULL;
636         cpg_dispatch(cpg_handle, SA_DISPATCH_ONE);
637         return 1;
638 }
639
640 static int _cluster_send_message(const void *buf, int msglen, const char *csid,
641                                  const char *errtext)
642 {
643         struct iovec iov[2];
644         SaAisErrorT err;
645         int target_node;
646
647         if (csid)
648                 memcpy(&target_node, csid, OPENAIS_CSID_LEN);
649         else
650                 target_node = 0;
651
652         iov[0].iov_base = &target_node;
653         iov[0].iov_len = sizeof(int);
654         iov[1].iov_base = (char *)buf;
655         iov[1].iov_len = msglen;
656
657         err = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 2);
658         return ais_to_errno(err);
659 }
660
661 /* We don't have a cluster name to report here */
662 static int _get_cluster_name(char *buf, int buflen)
663 {
664         strncpy(buf, "OpenAIS", buflen);
665         return 0;
666 }
667
668 static struct cluster_ops _cluster_openais_ops = {
669         .cluster_init_completed   = NULL,
670         .cluster_send_message     = _cluster_send_message,
671         .name_from_csid           = _name_from_csid,
672         .csid_from_name           = _csid_from_name,
673         .get_num_nodes            = _get_num_nodes,
674         .cluster_fd_callback      = _cluster_fd_callback,
675         .get_main_cluster_fd      = _get_main_cluster_fd,
676         .cluster_do_node_callback = _cluster_do_node_callback,
677         .is_quorate               = _is_quorate,
678         .get_our_csid             = _get_our_csid,
679         .add_up_node              = _add_up_node,
680         .reread_config            = NULL,
681         .cluster_closedown        = _cluster_closedown,
682         .get_cluster_name         = _get_cluster_name,
683         .sync_lock                = _sync_lock,
684         .sync_unlock              = _sync_unlock,
685 };
686
687 struct cluster_ops *init_openais_cluster(void)
688 {
689         if (!_init_cluster())
690                 return &_cluster_openais_ops;
691         else
692                 return NULL;
693 }