1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
35 #include <arpa/inet.h>
37 # include <sys/epoll.h>
40 #include <sys/param.h>
42 #include <sys/socket.h>
51 /* Number of bytes of data we initially reserve for each hash table bucket. */
52 #define DEFAULT_DATASIZE_PER_BUCKET 1024
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
66 static gid_t *server_groups;
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
95 [INITGROUPS] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs[lastdb] =
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .filename = "/etc/passwd",
108 .db_filename = _PATH_NSCD_PASSWD_DB,
109 .disabled_iov = &pwd_iov_disabled,
117 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
122 .filename = "/etc/group",
123 .db_filename = _PATH_NSCD_GROUP_DB,
124 .disabled_iov = &grp_iov_disabled,
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
137 .filename = "/etc/hosts",
138 .db_filename = _PATH_NSCD_HOSTS_DB,
139 .disabled_iov = &hst_iov_disabled,
149 /* Mapping of request type to database. */
150 static struct database_dyn *const serv2db[LASTREQ] =
152 [GETPWBYNAME] = &dbs[pwddb],
153 [GETPWBYUID] = &dbs[pwddb],
154 [GETGRBYNAME] = &dbs[grpdb],
155 [GETGRBYGID] = &dbs[grpdb],
156 [GETHOSTBYNAME] = &dbs[hstdb],
157 [GETHOSTBYNAMEv6] = &dbs[hstdb],
158 [GETHOSTBYADDR] = &dbs[hstdb],
159 [GETHOSTBYADDRv6] = &dbs[hstdb],
160 [GETFDPW] = &dbs[pwddb],
161 [GETFDGR] = &dbs[grpdb],
162 [GETFDHST] = &dbs[hstdb],
163 [GETAI] = &dbs[hstdb],
164 [INITGROUPS] = &dbs[grpdb]
168 /* Number of seconds between two cache pruning runs. */
169 #define CACHE_PRUNE_INTERVAL 15
172 /* Initial number of threads to use. */
174 /* Maximum number of threads to use. */
175 int max_nthreads = 32;
177 /* Socket for incoming connections. */
180 /* Number of times clients had to wait. */
181 unsigned long int client_queued;
185 writeall (int fd, const void *buf, size_t len)
191 ret = TEMP_FAILURE_RETRY (write (fd, buf, n));
194 buf = (const char *) buf + ret;
198 return ret < 0 ? ret : len - n;
205 /* The following three are not really used, they are symbolic constants. */
211 use_he_begin = use_he | use_begin,
212 use_he_end = use_he | use_end,
215 use_key_begin = use_key | use_begin,
216 use_key_end = use_key | use_end,
217 use_key_first = use_key_begin | use_first,
220 use_data_begin = use_data | use_begin,
221 use_data_end = use_data | use_end,
222 use_data_first = use_data_begin | use_first
227 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
228 enum usekey use, ref_t start, size_t len)
232 if (start > first_free || start + len > first_free
233 || (start & BLOCK_ALIGN_M1))
236 if (usemap[start] == use_not)
238 /* Add the start marker. */
239 usemap[start] = use | use_begin;
243 if (usemap[++start] != use_not)
248 /* Add the end marker. */
249 usemap[start] = use | use_end;
251 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
253 /* Hash entries can't be shared. */
257 usemap[start] |= (use & use_first);
261 if (usemap[++start] != use)
264 if (usemap[++start] != (use | use_end))
268 /* Points to a wrong object or somewhere in the middle. */
275 /* Verify data in persistent database. */
277 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
279 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
281 time_t now = time (NULL);
283 struct database_pers_head *head = mem;
284 struct database_pers_head head_copy = *head;
286 /* Check that the header that was read matches the head in the database. */
287 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
290 /* First some easy tests: make sure the database header is sane. */
291 if (head->version != DB_VERSION
292 || head->header_size != sizeof (*head)
293 /* We allow a timestamp to be one hour ahead of the current time.
294 This should cover daylight saving time changes. */
295 || head->timestamp > now + 60 * 60 + 60
296 || (head->gc_cycle & 1)
297 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
298 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
299 || head->first_free < 0
300 || head->first_free > head->data_size
301 || (head->first_free & BLOCK_ALIGN_M1) != 0
302 || head->maxnentries < 0
303 || head->maxnsearched < 0)
306 uint8_t *usemap = calloc (head->first_free, 1);
310 const char *data = (char *) &head->array[roundup (head->module,
311 ALIGN / sizeof (ref_t))];
313 nscd_ssize_t he_cnt = 0;
314 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
316 ref_t work = head->array[cnt];
318 while (work != ENDREF)
320 if (! check_use (data, head->first_free, usemap, use_he, work,
321 sizeof (struct hashentry)))
324 /* Now we know we can dereference the record. */
325 struct hashentry *here = (struct hashentry *) (data + work);
329 /* Make sure the record is for this type of service. */
330 if (here->type >= LASTREQ
331 || serv2db[here->type] != &dbs[dbnr])
334 /* Validate boolean field value. */
335 if (here->first != false && here->first != true)
343 || here->packet > head->first_free
344 || here->packet + sizeof (struct datahead) > head->first_free)
347 struct datahead *dh = (struct datahead *) (data + here->packet);
349 if (! check_use (data, head->first_free, usemap,
350 use_data | (here->first ? use_first : 0),
351 here->packet, dh->allocsize))
354 if (dh->allocsize < sizeof (struct datahead)
355 || dh->recsize > dh->allocsize
356 || (dh->notfound != false && dh->notfound != true)
357 || (dh->usable != false && dh->usable != true))
360 if (here->key < here->packet + sizeof (struct datahead)
361 || here->key > here->packet + dh->allocsize
362 || here->key + here->len > here->packet + dh->allocsize)
365 /* If keys can appear outside of data, this should be done
366 instead. But gc doesn't mark the data in that case. */
367 if (! check_use (data, head->first_free, usemap,
368 use_key | (here->first ? use_first : 0),
369 here->key, here->len))
378 if (he_cnt != head->nentries)
381 /* See if all data and keys had at least one reference from
382 he->first == true hashentry. */
383 for (ref_t idx = 0; idx < head->first_free; ++idx)
386 if (usemap[idx] == use_key_begin)
389 if (usemap[idx] == use_data_begin)
393 /* Finally, make sure the database hasn't changed since the first test. */
394 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
406 /* Initialize database information structures. */
410 /* Secure mode and unprivileged mode are incompatible */
411 if (server_user != NULL && secure_in_use)
413 dbg_log (_("Cannot run nscd in secure mode as unprivileged user"));
417 /* Look up unprivileged uid/gid/groups before we start listening on the
419 if (server_user != NULL)
420 begin_drop_privileges ();
423 /* No configuration for this value, assume a default. */
424 nthreads = 2 * lastdb;
426 for (size_t cnt = 0; cnt < lastdb; ++cnt)
427 if (dbs[cnt].enabled)
429 pthread_rwlock_init (&dbs[cnt].lock, NULL);
430 pthread_mutex_init (&dbs[cnt].memlock, NULL);
432 if (dbs[cnt].persistent)
434 /* Try to open the appropriate file on disk. */
435 int fd = open (dbs[cnt].db_filename, O_RDWR);
441 struct database_pers_head head;
442 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
444 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
447 dbg_log (_("invalid persistent database file \"%s\": %s"),
448 dbs[cnt].db_filename, strerror (errno));
449 unlink (dbs[cnt].db_filename);
451 else if (head.module == 0 && head.data_size == 0)
453 /* The file has been created, but the head has not been
454 initialized yet. Remove the old file. */
455 unlink (dbs[cnt].db_filename);
457 else if (head.header_size != (int) sizeof (head))
459 dbg_log (_("invalid persistent database file \"%s\": %s"),
460 dbs[cnt].db_filename,
461 _("header size does not match"));
462 unlink (dbs[cnt].db_filename);
464 else if ((total = (sizeof (head)
465 + roundup (head.module * sizeof (ref_t),
469 || total < sizeof (head))
471 dbg_log (_("invalid persistent database file \"%s\": %s"),
472 dbs[cnt].db_filename,
473 _("file size does not match"));
474 unlink (dbs[cnt].db_filename);
476 else if ((mem = mmap (NULL, total, PROT_READ | PROT_WRITE,
477 MAP_SHARED, fd, 0)) == MAP_FAILED)
479 else if (!verify_persistent_db (mem, &head, cnt))
482 dbg_log (_("invalid persistent database file \"%s\": %s"),
483 dbs[cnt].db_filename,
484 _("verification failed"));
485 unlink (dbs[cnt].db_filename);
489 /* Success. We have the database. */
491 dbs[cnt].memsize = total;
492 dbs[cnt].data = (char *)
493 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
494 ALIGN / sizeof (ref_t))];
495 dbs[cnt].mmap_used = true;
497 if (dbs[cnt].suggested_module > head.module)
498 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
503 /* We also need a read-only descriptor. */
506 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
507 if (dbs[cnt].ro_fd == -1)
509 cannot create read-only descriptor for \"%s\"; no mmap"),
510 dbs[cnt].db_filename);
513 // XXX Shall we test whether the descriptors actually
514 // XXX point to the same file?
517 /* Close the file descriptors in case something went
518 wrong in which case the variable have not been
525 if (dbs[cnt].head == NULL)
527 /* No database loaded. Allocate the data structure,
529 struct database_pers_head head;
530 size_t total = (sizeof (head)
531 + roundup (dbs[cnt].suggested_module
532 * sizeof (ref_t), ALIGN)
533 + (dbs[cnt].suggested_module
534 * DEFAULT_DATASIZE_PER_BUCKET));
536 /* Try to create the database. If we do not need a
537 persistent database create a temporary file. */
540 if (dbs[cnt].persistent)
542 fd = open (dbs[cnt].db_filename,
543 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
545 if (fd != -1 && dbs[cnt].shared)
546 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
550 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
551 fd = mkstemp (fname);
553 /* We do not need the file name anymore after we
554 opened another file descriptor in read-only mode. */
558 ro_fd = open (fname, O_RDONLY);
568 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
569 dbnames[cnt], dbs[cnt].db_filename);
570 // XXX Correct way to terminate?
574 if (dbs[cnt].persistent)
575 dbg_log (_("cannot create %s; no persistent database used"),
576 dbs[cnt].db_filename);
578 dbg_log (_("cannot create %s; no sharing possible"),
579 dbs[cnt].db_filename);
581 dbs[cnt].persistent = 0;
582 // XXX remember: no mmap
586 /* Tell the user if we could not create the read-only
588 if (ro_fd == -1 && dbs[cnt].shared)
590 cannot create read-only descriptor for \"%s\"; no mmap"),
591 dbs[cnt].db_filename);
593 /* Before we create the header, initialiye the hash
594 table. So that if we get interrupted if writing
595 the header we can recognize a partially initialized
597 size_t ps = sysconf (_SC_PAGESIZE);
599 assert (~ENDREF == 0);
600 memset (tmpbuf, '\xff', ps);
602 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
603 off_t offset = sizeof (head);
606 if (offset % ps != 0)
608 towrite = MIN (remaining, ps - (offset % ps));
609 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
612 remaining -= towrite;
615 while (remaining > ps)
617 if (pwrite (fd, tmpbuf, ps, offset) == -1)
624 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
627 /* Create the header of the file. */
628 struct database_pers_head head =
630 .version = DB_VERSION,
631 .header_size = sizeof (head),
632 .module = dbs[cnt].suggested_module,
633 .data_size = (dbs[cnt].suggested_module
634 * DEFAULT_DATASIZE_PER_BUCKET),
639 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
641 || ftruncate (fd, total) != 0
642 || (mem = mmap (NULL, total, PROT_READ | PROT_WRITE,
643 MAP_SHARED, fd, 0)) == MAP_FAILED)
646 unlink (dbs[cnt].db_filename);
647 dbg_log (_("cannot write to database file %s: %s"),
648 dbs[cnt].db_filename, strerror (errno));
649 dbs[cnt].persistent = 0;
655 dbs[cnt].data = (char *)
656 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
657 ALIGN / sizeof (ref_t))];
658 dbs[cnt].memsize = total;
659 dbs[cnt].mmap_used = true;
661 /* Remember the descriptors. */
663 dbs[cnt].ro_fd = ro_fd;
676 && ((dbs[cnt].wr_fd != -1
677 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
678 || (dbs[cnt].ro_fd != -1
679 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
682 cannot set socket to close on exec: %s; disabling paranoia mode"),
687 if (dbs[cnt].head == NULL)
689 /* We do not use the persistent database. Just
690 create an in-memory data structure. */
691 assert (! dbs[cnt].persistent);
693 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
694 + (dbs[cnt].suggested_module
696 memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head));
697 assert (~ENDREF == 0);
698 memset (dbs[cnt].head->array, '\xff',
699 dbs[cnt].suggested_module * sizeof (ref_t));
700 dbs[cnt].head->module = dbs[cnt].suggested_module;
701 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
702 * dbs[cnt].head->module);
703 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
704 dbs[cnt].head->first_free = 0;
707 assert (dbs[cnt].ro_fd == -1);
710 if (dbs[cnt].check_file)
712 /* We need the modification date of the file. */
715 if (stat (dbs[cnt].filename, &st) < 0)
717 /* We cannot stat() the file, disable file checking. */
718 dbg_log (_("cannot stat() file `%s': %s"),
719 dbs[cnt].filename, strerror (errno));
720 dbs[cnt].check_file = 0;
723 dbs[cnt].file_mtime = st.st_mtime;
727 /* Create the socket. */
728 sock = socket (AF_UNIX, SOCK_STREAM, 0);
731 dbg_log (_("cannot open socket: %s"), strerror (errno));
732 exit (errno == EACCES ? 4 : 1);
734 /* Bind a name to the socket. */
735 struct sockaddr_un sock_addr;
736 sock_addr.sun_family = AF_UNIX;
737 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
738 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
740 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
741 exit (errno == EACCES ? 4 : 1);
744 /* We don't want to get stuck on accept. */
745 int fl = fcntl (sock, F_GETFL);
746 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
748 dbg_log (_("cannot change socket to nonblocking mode: %s"),
753 /* The descriptor needs to be closed on exec. */
754 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
756 dbg_log (_("cannot set socket to close on exec: %s"),
761 /* Set permissions for the socket. */
762 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
764 /* Set the socket up to accept connections. */
765 if (listen (sock, SOMAXCONN) < 0)
767 dbg_log (_("cannot enable socket to accept connections: %s"),
772 /* Change to unprivileged uid/gid/groups if specifed in config file */
773 if (server_user != NULL)
774 finish_drop_privileges ();
778 /* Close the connections. */
787 invalidate_cache (char *key)
791 if (strcmp (key, "passwd") == 0)
793 else if (strcmp (key, "group") == 0)
795 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
799 /* Re-initialize the resolver. resolv.conf might have changed. */
805 if (dbs[number].enabled)
806 prune_cache (&dbs[number], LONG_MAX);
812 send_ro_fd (struct database_dyn *db, char *key, int fd)
814 /* If we do not have an read-only file descriptor do nothing. */
818 /* We need to send some data along with the descriptor. */
820 iov[0].iov_base = key;
821 iov[0].iov_len = strlen (key) + 1;
823 /* Prepare the control message to transfer the descriptor. */
827 char bytes[CMSG_SPACE (sizeof (int))];
829 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
830 .msg_control = buf.bytes,
831 .msg_controllen = sizeof (buf) };
832 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
834 cmsg->cmsg_level = SOL_SOCKET;
835 cmsg->cmsg_type = SCM_RIGHTS;
836 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
838 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
840 msg.msg_controllen = cmsg->cmsg_len;
842 /* Send the control message. We repeat when we are interrupted but
843 everything else is ignored. */
845 # define MSG_NOSIGNAL 0
847 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
849 if (__builtin_expect (debug_level > 0, 0))
850 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
852 #endif /* SCM_RIGHTS */
855 /* Handle new request. */
857 handle_request (int fd, request_header *req, void *key, uid_t uid)
859 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
863 cannot handle old request version %d; current version is %d"),
864 req->version, NSCD_VERSION);
868 /* Make the SELinux check before we go on to the standard checks. We
869 need to verify that the request type is valid, since it has not
870 yet been checked at this point. */
872 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
873 && __builtin_expect (req->type, LASTREQ) < LASTREQ
874 && nscd_request_avc_has_perm (fd, req->type) != 0)
877 struct database_dyn *db = serv2db[req->type];
879 // XXX Clean up so that each new command need not introduce a
880 // XXX new conditional.
881 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
882 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
883 || req->type == GETAI || req->type == INITGROUPS)
885 if (__builtin_expect (debug_level, 0) > 0)
887 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
889 char buf[INET6_ADDRSTRLEN];
891 dbg_log ("\t%s (%s)", serv2str[req->type],
892 inet_ntop (req->type == GETHOSTBYADDR
893 ? AF_INET : AF_INET6,
894 key, buf, sizeof (buf)));
897 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
900 /* Is this service enabled? */
903 /* No, sent the prepared record. */
904 if (TEMP_FAILURE_RETRY (write (fd, db->disabled_iov->iov_base,
905 db->disabled_iov->iov_len))
906 != (ssize_t) db->disabled_iov->iov_len
907 && __builtin_expect (debug_level, 0) > 0)
909 /* We have problems sending the result. */
911 dbg_log (_("cannot write result: %s"),
912 strerror_r (errno, buf, sizeof (buf)));
918 /* Be sure we can read the data. */
919 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
921 ++db->head->rdlockdelayed;
922 pthread_rwlock_rdlock (&db->lock);
925 /* See whether we can handle it from the cache. */
926 struct datahead *cached;
927 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
931 /* Hurray it's in the cache. */
932 if (writeall (fd, cached->data, cached->recsize)
934 && __builtin_expect (debug_level, 0) > 0)
936 /* We have problems sending the result. */
938 dbg_log (_("cannot write result: %s"),
939 strerror_r (errno, buf, sizeof (buf)));
942 pthread_rwlock_unlock (&db->lock);
947 pthread_rwlock_unlock (&db->lock);
949 else if (__builtin_expect (debug_level, 0) > 0)
951 if (req->type == INVALIDATE)
952 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
954 dbg_log ("\t%s", serv2str[req->type]);
957 /* Handle the request. */
961 addpwbyname (db, fd, req, key, uid);
965 addpwbyuid (db, fd, req, key, uid);
969 addgrbyname (db, fd, req, key, uid);
973 addgrbygid (db, fd, req, key, uid);
977 addhstbyname (db, fd, req, key, uid);
980 case GETHOSTBYNAMEv6:
981 addhstbynamev6 (db, fd, req, key, uid);
985 addhstbyaddr (db, fd, req, key, uid);
988 case GETHOSTBYADDRv6:
989 addhstbyaddrv6 (db, fd, req, key, uid);
993 addhstai (db, fd, req, key, uid);
997 addinitgroups (db, fd, req, key, uid);
1003 if (! secure_in_use)
1005 /* Get the callers credentials. */
1007 struct ucred caller;
1008 socklen_t optlen = sizeof (caller);
1010 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1014 dbg_log (_("error getting callers id: %s"),
1015 strerror_r (errno, buf, sizeof (buf)));
1021 /* Some systems have no SO_PEERCRED implementation. They don't
1022 care about security so we don't as well. */
1027 /* Accept shutdown, getstat and invalidate only from root. For
1028 the stat call also allow the user specified in the config file. */
1029 if (req->type == GETSTAT)
1031 if (uid == 0 || uid == stat_uid)
1032 send_stats (fd, dbs);
1036 if (req->type == INVALIDATE)
1037 invalidate_cache (key);
1039 termination_handler (0);
1047 send_ro_fd (serv2db[req->type], key, fd);
1052 /* Ignore the command, it's nothing we know. */
1058 /* Restart the process. */
1062 /* First determine the parameters. We do not use the parameters
1063 passed to main() since in case nscd is started by running the
1064 dynamic linker this will not work. Yes, this is not the usual
1065 case but nscd is part of glibc and we occasionally do this. */
1066 size_t buflen = 1024;
1067 char *buf = alloca (buflen);
1069 int fd = open ("/proc/self/cmdline", O_RDONLY);
1073 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1082 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1087 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1097 if (readlen < buflen)
1100 /* We might have to extend the buffer. */
1101 size_t old_buflen = buflen;
1102 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1103 buf = memmove (newp, buf, old_buflen);
1108 /* Parse the command line. Worst case scenario: every two
1109 characters form one parameter (one character plus NUL). */
1110 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1114 while (cp < buf + readlen)
1117 cp = (char *) rawmemchr (cp, '\0') + 1;
1121 /* Second, change back to the old user if we changed it. */
1122 if (server_user != NULL)
1124 if (setuid (old_uid) != 0)
1127 cannot change to old UID: %s; disabling paranoia mode"),
1134 if (setgid (old_gid) != 0)
1137 cannot change to old GID: %s; disabling paranoia mode"),
1140 setuid (server_uid);
1146 /* Next change back to the old working directory. */
1147 if (chdir (oldcwd) == -1)
1150 cannot change to old working directory: %s; disabling paranoia mode"),
1153 if (server_user != NULL)
1155 setuid (server_uid);
1156 setgid (server_gid);
1162 /* Synchronize memory. */
1163 for (int cnt = 0; cnt < lastdb; ++cnt)
1165 /* Make sure nobody keeps using the database. */
1166 dbs[cnt].head->timestamp = 0;
1168 if (dbs[cnt].persistent)
1170 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1173 /* The preparations are done. */
1174 execv ("/proc/self/exe", argv);
1176 /* If we come here, we will never be able to re-exec. */
1177 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1180 if (server_user != NULL)
1182 setuid (server_uid);
1183 setgid (server_gid);
1185 if (chdir ("/") != 0)
1186 dbg_log (_("cannot change current working directory to \"/\": %s"),
1192 /* List of file descriptors. */
1196 struct fdlist *next;
1198 /* Memory allocated for the list. */
1199 static struct fdlist *fdlist;
1200 /* List of currently ready-to-read file descriptors. */
1201 static struct fdlist *readylist;
1203 /* Conditional variable and mutex to signal availability of entries in
1204 READYLIST. The condvar is initialized dynamically since we might
1205 use a different clock depending on availability. */
1206 static pthread_cond_t readylist_cond;
1207 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1209 /* The clock to use with the condvar. */
1210 static clockid_t timeout_clock = CLOCK_REALTIME;
1212 /* Number of threads ready to handle the READYLIST. */
1213 static unsigned long int nready;
1216 /* This is the main loop. It is replicated in different threads but the
1217 `poll' call makes sure only one thread handles an incoming connection. */
1219 __attribute__ ((__noreturn__))
1222 const long int my_number = (long int) p;
1223 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1224 struct timespec prune_ts;
1230 setup_thread (&dbs[my_number]);
1232 /* We are running. */
1233 dbs[my_number].head->timestamp = time (NULL);
1235 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1236 /* Should never happen. */
1239 /* Compute timeout time. */
1240 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1243 /* Initial locking. */
1244 pthread_mutex_lock (&readylist_lock);
1246 /* One more thread available. */
1251 while (readylist == NULL)
1255 /* Wait, but not forever. */
1256 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1259 /* If we were woken and there is no work to be done,
1260 just start pruning. */
1261 if (readylist == NULL && to == ETIMEDOUT)
1264 pthread_mutex_unlock (&readylist_lock);
1269 /* No need to timeout. */
1270 pthread_cond_wait (&readylist_cond, &readylist_lock);
1273 struct fdlist *it = readylist->next;
1274 if (readylist->next == readylist)
1275 /* Just one entry on the list. */
1278 readylist->next = it->next;
1280 /* Extract the information and mark the record ready to be used
1285 /* One more thread available. */
1288 /* We are done with the list. */
1289 pthread_mutex_unlock (&readylist_lock);
1291 /* We do not want to block on a short read or so. */
1292 int fl = fcntl (fd, F_GETFL);
1293 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1296 /* Now read the request. */
1298 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1299 != sizeof (req), 0))
1301 /* We failed to read data. Note that this also might mean we
1302 failed because we would have blocked. */
1303 if (debug_level > 0)
1304 dbg_log (_("short read while reading request: %s"),
1305 strerror_r (errno, buf, sizeof (buf)));
1309 /* Check whether this is a valid request type. */
1310 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1313 /* Some systems have no SO_PEERCRED implementation. They don't
1314 care about security so we don't as well. */
1321 struct ucred caller;
1322 socklen_t optlen = sizeof (caller);
1324 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1326 dbg_log (_("error getting callers id: %s"),
1327 strerror_r (errno, buf, sizeof (buf)));
1331 if (req.type < GETPWBYNAME || req.type > LASTDBREQ
1332 || serv2db[req.type]->secure)
1337 else if (__builtin_expect (debug_level > 0, 0))
1339 struct ucred caller;
1340 socklen_t optlen = sizeof (caller);
1342 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1347 /* It should not be possible to crash the nscd with a silly
1348 request (i.e., a terribly large key). We limit the size to 1kb. */
1349 #define MAXKEYLEN 1024
1350 if (__builtin_expect (req.key_len, 1) < 0
1351 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1353 if (debug_level > 0)
1354 dbg_log (_("key length in request too long: %d"), req.key_len);
1359 char keybuf[MAXKEYLEN];
1361 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1365 /* Again, this can also mean we would have blocked. */
1366 if (debug_level > 0)
1367 dbg_log (_("short read while reading request key: %s"),
1368 strerror_r (errno, buf, sizeof (buf)));
1372 if (__builtin_expect (debug_level, 0) > 0)
1377 handle_request: request received (Version = %d) from PID %ld"),
1378 req.version, (long int) pid);
1382 handle_request: request received (Version = %d)"), req.version);
1385 /* Phew, we got all the data, now process it. */
1386 handle_request (fd, &req, keybuf, uid);
1393 /* Check whether we should be pruning the cache. */
1394 assert (run_prune || to == 0);
1395 if (to == ETIMEDOUT)
1398 /* The pthread_cond_timedwait() call timed out. It is time
1399 to clean up the cache. */
1400 assert (my_number < lastdb);
1401 prune_cache (&dbs[my_number], time (NULL));
1403 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1404 /* Should never happen. */
1407 /* Compute next timeout time. */
1408 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1410 /* In case the list is emtpy we do not want to run the prune
1411 code right away again. */
1416 pthread_mutex_lock (&readylist_lock);
1418 /* One more thread available. */
1424 static unsigned int nconns;
1429 pthread_mutex_lock (&readylist_lock);
1431 /* Find an empty entry in FDLIST. */
1433 for (inner = 0; inner < nconns; ++inner)
1434 if (fdlist[inner].next == NULL)
1436 assert (inner < nconns);
1438 fdlist[inner].fd = fd;
1440 if (readylist == NULL)
1441 readylist = fdlist[inner].next = &fdlist[inner];
1444 fdlist[inner].next = readylist->next;
1445 readylist = readylist->next = &fdlist[inner];
1448 bool do_signal = true;
1449 if (__builtin_expect (nready == 0, 0))
1454 /* Try to start another thread to help out. */
1456 if (nthreads < max_nthreads
1457 && pthread_create (&th, &attr, nscd_run,
1458 (void *) (long int) nthreads) == 0)
1460 /* We got another thread. */
1462 /* The new thread might new a kick. */
1468 pthread_mutex_unlock (&readylist_lock);
1470 /* Tell one of the worker threads there is work to do. */
1472 pthread_cond_signal (&readylist_cond);
1476 /* Check whether restarting should happen. */
1478 restart_p (time_t now)
1480 return (paranoia && readylist == NULL && nready == nthreads
1481 && now >= restart_time);
1485 /* Array for times a connection was accepted. */
1486 static time_t *starttime;
1490 __attribute__ ((__noreturn__))
1491 main_loop_poll (void)
1493 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1494 * sizeof (conns[0]));
1497 conns[0].events = POLLRDNORM;
1499 size_t firstfree = 1;
1503 /* Wait for any event. We wait at most a couple of seconds so
1504 that we can check whether we should close any of the accepted
1505 connections since we have not received a request. */
1506 #define MAX_ACCEPT_TIMEOUT 30
1507 #define MIN_ACCEPT_TIMEOUT 5
1508 #define MAIN_THREAD_TIMEOUT \
1509 (MAX_ACCEPT_TIMEOUT * 1000 \
1510 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1512 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1514 time_t now = time (NULL);
1516 /* If there is a descriptor ready for reading or there is a new
1517 connection, process this now. */
1520 if (conns[0].revents != 0)
1522 /* We have a new incoming connection. Accept the connection. */
1523 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1525 /* use the descriptor if we have not reached the limit. */
1526 if (fd >= 0 && firstfree < nconns)
1528 conns[firstfree].fd = fd;
1529 conns[firstfree].events = POLLRDNORM;
1530 starttime[firstfree] = now;
1531 if (firstfree >= nused)
1532 nused = firstfree + 1;
1536 while (firstfree < nused && conns[firstfree].fd != -1);
1542 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1543 if (conns[cnt].revents != 0)
1545 fd_ready (conns[cnt].fd);
1547 /* Clean up the CONNS array. */
1549 if (cnt < firstfree)
1551 if (cnt == nused - 1)
1554 while (conns[nused - 1].fd == -1);
1560 /* Now find entries which have timed out. */
1563 /* We make the timeout length depend on the number of file
1564 descriptors currently used. */
1565 #define ACCEPT_TIMEOUT \
1566 (MAX_ACCEPT_TIMEOUT \
1567 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1568 time_t laststart = now - ACCEPT_TIMEOUT;
1570 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1572 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1574 /* Remove the entry, it timed out. */
1575 (void) close (conns[cnt].fd);
1578 if (cnt < firstfree)
1580 if (cnt == nused - 1)
1583 while (conns[nused - 1].fd == -1);
1587 if (restart_p (now))
1595 main_loop_epoll (int efd)
1597 struct epoll_event ev = { 0, };
1601 /* Add the socket. */
1602 ev.events = EPOLLRDNORM;
1604 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1605 /* We cannot use epoll. */
1610 struct epoll_event revs[100];
1611 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1613 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1615 time_t now = time (NULL);
1617 for (int cnt = 0; cnt < n; ++cnt)
1618 if (revs[cnt].data.fd == sock)
1620 /* A new connection. */
1621 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1625 /* Try to add the new descriptor. */
1628 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1629 /* The descriptor is too large or something went
1630 wrong. Close the descriptor. */
1634 /* Remember when we accepted the connection. */
1635 starttime[fd] = now;
1646 /* Remove the descriptor from the epoll descriptor. */
1647 struct epoll_event ev = { 0, };
1648 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, &ev);
1650 /* Get a worked to handle the request. */
1651 fd_ready (revs[cnt].data.fd);
1653 /* Reset the time. */
1654 starttime[revs[cnt].data.fd] = 0;
1655 if (revs[cnt].data.fd == highest)
1658 while (highest > 0 && starttime[highest] == 0);
1663 /* Now look for descriptors for accepted connections which have
1664 no reply in too long of a time. */
1665 time_t laststart = now - ACCEPT_TIMEOUT;
1666 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1667 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1669 /* We are waiting for this one for too long. Close it. */
1670 struct epoll_event ev = {0, };
1671 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, &ev);
1679 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1682 if (restart_p (now))
1689 /* Start all the threads we want. The initial process is thread no. 1. */
1691 start_threads (void)
1693 /* Initialize the conditional variable we will use. The only
1694 non-standard attribute we might use is the clock selection. */
1695 pthread_condattr_t condattr;
1696 pthread_condattr_init (&condattr);
1698 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1699 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1700 /* Determine whether the monotonous clock is available. */
1701 struct timespec dummy;
1702 # if _POSIX_MONOTONIC_CLOCK == 0
1703 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1705 # if _POSIX_CLOCK_SELECTION == 0
1706 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1708 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1709 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1710 timeout_clock = CLOCK_MONOTONIC;
1713 pthread_cond_init (&readylist_cond, &condattr);
1714 pthread_condattr_destroy (&condattr);
1717 /* Create the attribute for the threads. They are all created
1719 pthread_attr_init (&attr);
1720 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1721 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1722 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1724 /* We allow less than LASTDB threads only for debugging. */
1725 if (debug_level == 0)
1726 nthreads = MAX (nthreads, lastdb);
1729 for (long int i = 0; i < nthreads; ++i)
1732 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1735 if (nthreads - nfailed < lastdb)
1737 /* We could not start enough threads. */
1738 dbg_log (_("could only start %d threads; terminating"),
1739 nthreads - nfailed);
1743 /* Determine how much room for descriptors we should initially
1744 allocate. This might need to change later if we cap the number
1746 const long int nfds = sysconf (_SC_OPEN_MAX);
1748 #define MAXCONN 16384
1749 if (nfds == -1 || nfds > MAXCONN)
1751 else if (nfds < MINCONN)
1756 /* We need memory to pass descriptors on to the worker threads. */
1757 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1758 /* Array to keep track when connection was accepted. */
1759 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1761 /* In the main thread we execute the loop which handles incoming
1764 int efd = epoll_create (100);
1767 main_loop_epoll (efd);
1776 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1777 this function is called, we are not listening on the nscd socket yet so
1778 we can just use the ordinary lookup functions without causing a lockup */
1780 begin_drop_privileges (void)
1782 struct passwd *pwd = getpwnam (server_user);
1786 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1787 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1791 server_uid = pwd->pw_uid;
1792 server_gid = pwd->pw_gid;
1794 /* Save the old UID/GID if we have to change back. */
1797 old_uid = getuid ();
1798 old_gid = getgid ();
1801 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1803 /* This really must never happen. */
1804 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1805 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1808 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1810 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1813 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1814 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1819 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1820 run nscd as the user specified in the configuration file. */
1822 finish_drop_privileges (void)
1824 if (setgroups (server_ngroups, server_groups) == -1)
1826 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1827 error (EXIT_FAILURE, errno, _("setgroups failed"));
1830 if (setgid (server_gid) == -1)
1832 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1837 if (setuid (server_uid) == -1)
1839 dbg_log (_("Failed to run nscd as user '%s'"), server_user);