1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include <arpa/inet.h>
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
41 # include <sys/epoll.h>
44 # include <sys/inotify.h>
47 #include <sys/param.h>
50 # include <sys/sendfile.h>
52 #include <sys/socket.h>
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
64 /* Support to run nscd as an unprivileged user */
65 const char *server_user;
66 static uid_t server_uid;
67 static gid_t server_gid;
68 const char *stat_user;
70 static gid_t *server_groups;
74 static int server_ngroups;
76 static pthread_attr_t attr;
78 static void begin_drop_privileges (void);
79 static void finish_drop_privileges (void);
81 /* Map request type to a string. */
82 const char *const serv2str[LASTREQ] =
84 [GETPWBYNAME] = "GETPWBYNAME",
85 [GETPWBYUID] = "GETPWBYUID",
86 [GETGRBYNAME] = "GETGRBYNAME",
87 [GETGRBYGID] = "GETGRBYGID",
88 [GETHOSTBYNAME] = "GETHOSTBYNAME",
89 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
90 [GETHOSTBYADDR] = "GETHOSTBYADDR",
91 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
92 [SHUTDOWN] = "SHUTDOWN",
93 [GETSTAT] = "GETSTAT",
94 [INVALIDATE] = "INVALIDATE",
95 [GETFDPW] = "GETFDPW",
96 [GETFDGR] = "GETFDGR",
97 [GETFDHST] = "GETFDHST",
99 [INITGROUPS] = "INITGROUPS",
100 [GETSERVBYNAME] = "GETSERVBYNAME",
101 [GETSERVBYPORT] = "GETSERVBYPORT",
102 [GETFDSERV] = "GETFDSERV",
103 [GETNETGRENT] = "GETNETGRENT",
104 [INNETGR] = "INNETGR",
105 [GETFDNETGR] = "GETFDNETGR"
108 /* The control data structures for the services. */
109 struct database_dyn dbs[lastdb] =
112 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
113 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
114 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
120 .max_db_size = DEFAULT_MAX_DB_SIZE,
121 .suggested_module = DEFAULT_SUGGESTED_MODULE,
122 .db_filename = _PATH_NSCD_PASSWD_DB,
123 .disabled_iov = &pwd_iov_disabled,
131 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
132 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
133 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .suggested_module = DEFAULT_SUGGESTED_MODULE,
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
156 .propagate = 0, /* Not used. */
158 .max_db_size = DEFAULT_MAX_DB_SIZE,
159 .suggested_module = DEFAULT_SUGGESTED_MODULE,
160 .db_filename = _PATH_NSCD_HOSTS_DB,
161 .disabled_iov = &hst_iov_disabled,
169 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
170 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
171 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
175 .propagate = 0, /* Not used. */
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .db_filename = _PATH_NSCD_SERVICES_DB,
180 .disabled_iov = &serv_iov_disabled,
188 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
189 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
190 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
194 .propagate = 0, /* Not used. */
196 .max_db_size = DEFAULT_MAX_DB_SIZE,
197 .suggested_module = DEFAULT_SUGGESTED_MODULE,
198 .db_filename = _PATH_NSCD_NETGROUP_DB,
199 .disabled_iov = &netgroup_iov_disabled,
209 /* Mapping of request type to database. */
213 struct database_dyn *db;
214 } const reqinfo[LASTREQ] =
216 [GETPWBYNAME] = { true, &dbs[pwddb] },
217 [GETPWBYUID] = { true, &dbs[pwddb] },
218 [GETGRBYNAME] = { true, &dbs[grpdb] },
219 [GETGRBYGID] = { true, &dbs[grpdb] },
220 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
221 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
224 [SHUTDOWN] = { false, NULL },
225 [GETSTAT] = { false, NULL },
226 [SHUTDOWN] = { false, NULL },
227 [GETFDPW] = { false, &dbs[pwddb] },
228 [GETFDGR] = { false, &dbs[grpdb] },
229 [GETFDHST] = { false, &dbs[hstdb] },
230 [GETAI] = { true, &dbs[hstdb] },
231 [INITGROUPS] = { true, &dbs[grpdb] },
232 [GETSERVBYNAME] = { true, &dbs[servdb] },
233 [GETSERVBYPORT] = { true, &dbs[servdb] },
234 [GETFDSERV] = { false, &dbs[servdb] },
235 [GETNETGRENT] = { true, &dbs[netgrdb] },
236 [INNETGR] = { true, &dbs[netgrdb] },
237 [GETFDNETGR] = { false, &dbs[netgrdb] }
241 /* Initial number of threads to use. */
243 /* Maximum number of threads to use. */
244 int max_nthreads = 32;
246 /* Socket for incoming connections. */
250 /* Inotify descriptor. */
255 /* Descriptor for netlink status updates. */
256 static int nl_status_fd = -1;
259 #ifndef __ASSUME_SOCK_CLOEXEC
260 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
261 before be know the result. */
262 static int have_sock_cloexec;
264 #ifndef __ASSUME_ACCEPT4
265 static int have_accept4;
268 /* Number of times clients had to wait. */
269 unsigned long int client_queued;
273 writeall (int fd, const void *buf, size_t len)
279 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
282 buf = (const char *) buf + ret;
286 return ret < 0 ? ret : len - n;
292 sendfileall (int tofd, int fromfd, off_t off, size_t len)
299 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
305 return ret < 0 ? ret : len - n;
313 /* The following three are not really used, they are symbolic constants. */
319 use_he_begin = use_he | use_begin,
320 use_he_end = use_he | use_end,
322 use_data_begin = use_data | use_begin,
323 use_data_end = use_data | use_end,
324 use_data_first = use_data_begin | use_first
329 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
330 enum usekey use, ref_t start, size_t len)
334 if (start > first_free || start + len > first_free
335 || (start & BLOCK_ALIGN_M1))
338 if (usemap[start] == use_not)
340 /* Add the start marker. */
341 usemap[start] = use | use_begin;
345 if (usemap[++start] != use_not)
350 /* Add the end marker. */
351 usemap[start] = use | use_end;
353 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
355 /* Hash entries can't be shared. */
359 usemap[start] |= (use & use_first);
363 if (usemap[++start] != use)
366 if (usemap[++start] != (use | use_end))
370 /* Points to a wrong object or somewhere in the middle. */
377 /* Verify data in persistent database. */
379 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
381 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
384 time_t now = time (NULL);
386 struct database_pers_head *head = mem;
387 struct database_pers_head head_copy = *head;
389 /* Check that the header that was read matches the head in the database. */
390 if (memcmp (head, readhead, sizeof (*head)) != 0)
393 /* First some easy tests: make sure the database header is sane. */
394 if (head->version != DB_VERSION
395 || head->header_size != sizeof (*head)
396 /* We allow a timestamp to be one hour ahead of the current time.
397 This should cover daylight saving time changes. */
398 || head->timestamp > now + 60 * 60 + 60
399 || (head->gc_cycle & 1)
401 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
402 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
403 || head->first_free < 0
404 || head->first_free > head->data_size
405 || (head->first_free & BLOCK_ALIGN_M1) != 0
406 || head->maxnentries < 0
407 || head->maxnsearched < 0)
410 uint8_t *usemap = calloc (head->first_free, 1);
414 const char *data = (char *) &head->array[roundup (head->module,
415 ALIGN / sizeof (ref_t))];
417 nscd_ssize_t he_cnt = 0;
418 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
420 ref_t trail = head->array[cnt];
424 while (work != ENDREF)
426 if (! check_use (data, head->first_free, usemap, use_he, work,
427 sizeof (struct hashentry)))
430 /* Now we know we can dereference the record. */
431 struct hashentry *here = (struct hashentry *) (data + work);
435 /* Make sure the record is for this type of service. */
436 if (here->type >= LASTREQ
437 || reqinfo[here->type].db != &dbs[dbnr])
440 /* Validate boolean field value. */
441 if (here->first != false && here->first != true)
449 || here->packet > head->first_free
450 || here->packet + sizeof (struct datahead) > head->first_free)
453 struct datahead *dh = (struct datahead *) (data + here->packet);
455 if (! check_use (data, head->first_free, usemap,
456 use_data | (here->first ? use_first : 0),
457 here->packet, dh->allocsize))
460 if (dh->allocsize < sizeof (struct datahead)
461 || dh->recsize > dh->allocsize
462 || (dh->notfound != false && dh->notfound != true)
463 || (dh->usable != false && dh->usable != true))
466 if (here->key < here->packet + sizeof (struct datahead)
467 || here->key > here->packet + dh->allocsize
468 || here->key + here->len > here->packet + dh->allocsize)
474 /* A circular list, this must not happen. */
477 trail = ((struct hashentry *) (data + trail))->next;
482 if (he_cnt != head->nentries)
485 /* See if all data and keys had at least one reference from
486 he->first == true hashentry. */
487 for (ref_t idx = 0; idx < head->first_free; ++idx)
489 if (usemap[idx] == use_data_begin)
493 /* Finally, make sure the database hasn't changed since the first test. */
494 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
507 # define EXTRA_O_FLAGS O_CLOEXEC
509 # define EXTRA_O_FLAGS 0
513 /* Initialize database information structures. */
517 /* Look up unprivileged uid/gid/groups before we start listening on the
519 if (server_user != NULL)
520 begin_drop_privileges ();
523 /* No configuration for this value, assume a default. */
526 for (size_t cnt = 0; cnt < lastdb; ++cnt)
527 if (dbs[cnt].enabled)
529 pthread_rwlock_init (&dbs[cnt].lock, NULL);
530 pthread_mutex_init (&dbs[cnt].memlock, NULL);
532 if (dbs[cnt].persistent)
534 /* Try to open the appropriate file on disk. */
535 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
542 struct database_pers_head head;
543 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
545 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
548 /* The code is single-threaded at this point so
549 using strerror is just fine. */
550 msg = strerror (errno);
552 dbg_log (_("invalid persistent database file \"%s\": %s"),
553 dbs[cnt].db_filename, msg);
554 unlink (dbs[cnt].db_filename);
556 else if (head.module == 0 && head.data_size == 0)
558 /* The file has been created, but the head has not
559 been initialized yet. */
560 msg = _("uninitialized header");
563 else if (head.header_size != (int) sizeof (head))
565 msg = _("header size does not match");
568 else if ((total = (sizeof (head)
569 + roundup (head.module * sizeof (ref_t),
573 || total < sizeof (head))
575 msg = _("file size does not match");
578 /* Note we map with the maximum size allowed for the
579 database. This is likely much larger than the
580 actual file size. This is OK on most OSes since
581 extensions of the underlying file will
582 automatically translate more pages available for
584 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
585 PROT_READ | PROT_WRITE,
589 else if (!verify_persistent_db (mem, &head, cnt))
592 msg = _("verification failed");
597 /* Success. We have the database. */
599 dbs[cnt].memsize = total;
600 dbs[cnt].data = (char *)
601 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
602 ALIGN / sizeof (ref_t))];
603 dbs[cnt].mmap_used = true;
605 if (dbs[cnt].suggested_module > head.module)
606 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
611 /* We also need a read-only descriptor. */
614 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
615 O_RDONLY | EXTRA_O_FLAGS);
616 if (dbs[cnt].ro_fd == -1)
618 cannot create read-only descriptor for \"%s\"; no mmap"),
619 dbs[cnt].db_filename);
622 // XXX Shall we test whether the descriptors actually
623 // XXX point to the same file?
626 /* Close the file descriptors in case something went
627 wrong in which case the variable have not been
632 else if (errno == EACCES)
633 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
634 dbs[cnt].db_filename);
637 if (dbs[cnt].head == NULL)
639 /* No database loaded. Allocate the data structure,
641 struct database_pers_head head;
642 size_t total = (sizeof (head)
643 + roundup (dbs[cnt].suggested_module
644 * sizeof (ref_t), ALIGN)
645 + (dbs[cnt].suggested_module
646 * DEFAULT_DATASIZE_PER_BUCKET));
648 /* Try to create the database. If we do not need a
649 persistent database create a temporary file. */
652 if (dbs[cnt].persistent)
654 fd = open (dbs[cnt].db_filename,
655 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
657 if (fd != -1 && dbs[cnt].shared)
658 ro_fd = open (dbs[cnt].db_filename,
659 O_RDONLY | EXTRA_O_FLAGS);
663 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
664 fd = mkostemp (fname, EXTRA_O_FLAGS);
666 /* We do not need the file name anymore after we
667 opened another file descriptor in read-only mode. */
671 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
681 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
682 dbnames[cnt], dbs[cnt].db_filename);
683 do_exit (1, 0, NULL);
686 if (dbs[cnt].persistent)
687 dbg_log (_("cannot create %s; no persistent database used"),
688 dbs[cnt].db_filename);
690 dbg_log (_("cannot create %s; no sharing possible"),
691 dbs[cnt].db_filename);
693 dbs[cnt].persistent = 0;
694 // XXX remember: no mmap
698 /* Tell the user if we could not create the read-only
700 if (ro_fd == -1 && dbs[cnt].shared)
702 cannot create read-only descriptor for \"%s\"; no mmap"),
703 dbs[cnt].db_filename);
705 /* Before we create the header, initialize the hash
706 table. That way if we get interrupted while writing
707 the header we can recognize a partially initialized
709 size_t ps = sysconf (_SC_PAGESIZE);
711 assert (~ENDREF == 0);
712 memset (tmpbuf, '\xff', ps);
714 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
715 off_t offset = sizeof (head);
718 if (offset % ps != 0)
720 towrite = MIN (remaining, ps - (offset % ps));
721 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
724 remaining -= towrite;
727 while (remaining > ps)
729 if (pwrite (fd, tmpbuf, ps, offset) == -1)
736 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
739 /* Create the header of the file. */
740 struct database_pers_head head =
742 .version = DB_VERSION,
743 .header_size = sizeof (head),
744 .module = dbs[cnt].suggested_module,
745 .data_size = (dbs[cnt].suggested_module
746 * DEFAULT_DATASIZE_PER_BUCKET),
751 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
753 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
755 || (mem = mmap (NULL, dbs[cnt].max_db_size,
756 PROT_READ | PROT_WRITE,
757 MAP_SHARED, fd, 0)) == MAP_FAILED)
760 unlink (dbs[cnt].db_filename);
761 dbg_log (_("cannot write to database file %s: %s"),
762 dbs[cnt].db_filename, strerror (errno));
763 dbs[cnt].persistent = 0;
769 dbs[cnt].data = (char *)
770 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
771 ALIGN / sizeof (ref_t))];
772 dbs[cnt].memsize = total;
773 dbs[cnt].mmap_used = true;
775 /* Remember the descriptors. */
777 dbs[cnt].ro_fd = ro_fd;
789 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
790 /* We do not check here whether the O_CLOEXEC provided to the
791 open call was successful or not. The two fcntl calls are
792 only performed once each per process start-up and therefore
793 is not noticeable at all. */
795 && ((dbs[cnt].wr_fd != -1
796 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
797 || (dbs[cnt].ro_fd != -1
798 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
801 cannot set socket to close on exec: %s; disabling paranoia mode"),
807 if (dbs[cnt].head == NULL)
809 /* We do not use the persistent database. Just
810 create an in-memory data structure. */
811 assert (! dbs[cnt].persistent);
813 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
814 + (dbs[cnt].suggested_module
816 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
817 assert (~ENDREF == 0);
818 memset (dbs[cnt].head->array, '\xff',
819 dbs[cnt].suggested_module * sizeof (ref_t));
820 dbs[cnt].head->module = dbs[cnt].suggested_module;
821 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
822 * dbs[cnt].head->module);
823 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
824 dbs[cnt].head->first_free = 0;
827 assert (dbs[cnt].ro_fd == -1);
831 /* Create the socket. */
832 #ifndef __ASSUME_SOCK_CLOEXEC
834 if (have_sock_cloexec >= 0)
837 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
838 #ifndef __ASSUME_SOCK_CLOEXEC
839 if (have_sock_cloexec == 0)
840 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
843 #ifndef __ASSUME_SOCK_CLOEXEC
844 if (have_sock_cloexec < 0)
845 sock = socket (AF_UNIX, SOCK_STREAM, 0);
849 dbg_log (_("cannot open socket: %s"), strerror (errno));
850 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
852 /* Bind a name to the socket. */
853 struct sockaddr_un sock_addr;
854 sock_addr.sun_family = AF_UNIX;
855 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
856 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
858 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
859 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec < 0)
865 /* We don't want to get stuck on accept. */
866 int fl = fcntl (sock, F_GETFL);
867 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
869 dbg_log (_("cannot change socket to nonblocking mode: %s"),
871 do_exit (1, 0, NULL);
874 /* The descriptor needs to be closed on exec. */
875 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
877 dbg_log (_("cannot set socket to close on exec: %s"),
879 do_exit (1, 0, NULL);
884 /* Set permissions for the socket. */
885 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
887 /* Set the socket up to accept connections. */
888 if (listen (sock, SOMAXCONN) < 0)
890 dbg_log (_("cannot enable socket to accept connections: %s"),
892 do_exit (1, 0, NULL);
896 if (dbs[hstdb].enabled)
898 /* Try to open netlink socket to monitor network setting changes. */
899 nl_status_fd = socket (AF_NETLINK,
900 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
902 if (nl_status_fd != -1)
904 struct sockaddr_nl snl;
905 memset (&snl, '\0', sizeof (snl));
906 snl.nl_family = AF_NETLINK;
907 /* XXX Is this the best set to use? */
908 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
909 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
910 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
911 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
912 | RTMGRP_IPV6_PREFIX);
914 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
916 close (nl_status_fd);
921 /* Start the timestamp process. */
922 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
923 = __bump_nl_timestamp ();
925 # ifndef __ASSUME_SOCK_CLOEXEC
926 if (have_sock_cloexec < 0)
928 /* We don't want to get stuck on accept. */
929 int fl = fcntl (nl_status_fd, F_GETFL);
931 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
934 cannot change socket to nonblocking mode: %s"),
936 do_exit (1, 0, NULL);
939 /* The descriptor needs to be closed on exec. */
941 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
943 dbg_log (_("cannot set socket to close on exec: %s"),
945 do_exit (1, 0, NULL);
954 /* Change to unprivileged uid/gid/groups if specified in config file */
955 if (server_user != NULL)
956 finish_drop_privileges ();
960 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
962 We support registering multiple files per database. Each call to
963 register_traced_file adds to the list of registered files.
965 When we prune the database, either through timeout or a request to
966 invalidate, we will check to see if any of the registered files has changed.
967 When we accept new connections to handle a cache request we will also
968 check to see if any of the registered files has changed.
970 If we have inotify support then we install an inotify fd to notify us of
971 file deletion or modification, both of which will require we invalidate
972 the cache for the database. Without inotify support we stat the file and
973 store st_mtime to determine if the file has been modified. */
975 register_traced_file (size_t dbidx, struct traced_file *finfo)
977 /* If the database is disabled or file checking is disabled
978 then ignore the registration. */
979 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
982 if (__glibc_unlikely (debug_level > 0))
983 dbg_log (_("register trace file %s for database %s"),
984 finfo->fname, dbnames[dbidx]);
988 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
993 /* We need the modification date of the file. */
996 if (stat64 (finfo->fname, &st) < 0)
998 /* We cannot stat() the file, disable file checking. */
999 dbg_log (_("cannot stat() file `%s': %s"),
1000 finfo->fname, strerror (errno));
1004 finfo->inotify_descr = -1;
1005 finfo->mtime = st.st_mtime;
1008 /* Queue up the file name. */
1009 finfo->next = dbs[dbidx].traced_files;
1010 dbs[dbidx].traced_files = finfo;
1014 /* Close the connections. */
1016 close_sockets (void)
1023 invalidate_cache (char *key, int fd)
1028 for (number = pwddb; number < lastdb; ++number)
1029 if (strcmp (key, dbnames[number]) == 0)
1031 if (number == hstdb)
1033 struct traced_file *runp = dbs[hstdb].traced_files;
1034 while (runp != NULL)
1035 if (runp->call_res_init)
1046 if (number == lastdb)
1049 writeall (fd, &resp, sizeof (resp));
1053 if (dbs[number].enabled)
1055 pthread_mutex_lock (&dbs[number].prune_run_lock);
1056 prune_cache (&dbs[number], LONG_MAX, fd);
1057 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1062 writeall (fd, &resp, sizeof (resp));
1069 send_ro_fd (struct database_dyn *db, char *key, int fd)
1071 /* If we do not have an read-only file descriptor do nothing. */
1072 if (db->ro_fd == -1)
1075 /* We need to send some data along with the descriptor. */
1076 uint64_t mapsize = (db->head->data_size
1077 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1078 + sizeof (struct database_pers_head));
1079 struct iovec iov[2];
1080 iov[0].iov_base = key;
1081 iov[0].iov_len = strlen (key) + 1;
1082 iov[1].iov_base = &mapsize;
1083 iov[1].iov_len = sizeof (mapsize);
1085 /* Prepare the control message to transfer the descriptor. */
1089 char bytes[CMSG_SPACE (sizeof (int))];
1091 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1092 .msg_control = buf.bytes,
1093 .msg_controllen = sizeof (buf) };
1094 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1096 cmsg->cmsg_level = SOL_SOCKET;
1097 cmsg->cmsg_type = SCM_RIGHTS;
1098 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1100 int *ip = (int *) CMSG_DATA (cmsg);
1103 msg.msg_controllen = cmsg->cmsg_len;
1105 /* Send the control message. We repeat when we are interrupted but
1106 everything else is ignored. */
1107 #ifndef MSG_NOSIGNAL
1108 # define MSG_NOSIGNAL 0
1110 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1112 if (__glibc_unlikely (debug_level > 0))
1113 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1115 #endif /* SCM_RIGHTS */
1118 /* Handle new request. */
1120 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1122 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1124 if (debug_level > 0)
1126 cannot handle old request version %d; current version is %d"),
1127 req->version, NSCD_VERSION);
1131 /* Perform the SELinux check before we go on to the standard checks. */
1132 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1134 if (debug_level > 0)
1143 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1144 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1148 request from %ld not handled due to missing permission"), (long int) pid);
1153 request from '%s' [%ld] not handled due to missing permission"),
1154 buf, (long int) pid);
1157 dbg_log (_("request not handled due to missing permission"));
1163 struct database_dyn *db = reqinfo[req->type].db;
1165 /* See whether we can service the request from the cache. */
1166 if (__builtin_expect (reqinfo[req->type].data_request, true))
1168 if (__builtin_expect (debug_level, 0) > 0)
1170 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1172 char buf[INET6_ADDRSTRLEN];
1174 dbg_log ("\t%s (%s)", serv2str[req->type],
1175 inet_ntop (req->type == GETHOSTBYADDR
1176 ? AF_INET : AF_INET6,
1177 key, buf, sizeof (buf)));
1180 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1183 /* Is this service enabled? */
1184 if (__glibc_unlikely (!db->enabled))
1186 /* No, sent the prepared record. */
1187 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1188 db->disabled_iov->iov_len,
1190 != (ssize_t) db->disabled_iov->iov_len
1191 && __builtin_expect (debug_level, 0) > 0)
1193 /* We have problems sending the result. */
1195 dbg_log (_("cannot write result: %s"),
1196 strerror_r (errno, buf, sizeof (buf)));
1202 /* Be sure we can read the data. */
1203 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1205 ++db->head->rdlockdelayed;
1206 pthread_rwlock_rdlock (&db->lock);
1209 /* See whether we can handle it from the cache. */
1210 struct datahead *cached;
1211 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1215 /* Hurray it's in the cache. */
1218 #ifdef HAVE_SENDFILE
1219 if (__glibc_likely (db->mmap_used))
1221 assert (db->wr_fd != -1);
1222 assert ((char *) cached->data > (char *) db->data);
1223 assert ((char *) cached->data - (char *) db->head
1225 <= (sizeof (struct database_pers_head)
1226 + db->head->module * sizeof (ref_t)
1227 + db->head->data_size));
1228 nwritten = sendfileall (fd, db->wr_fd,
1229 (char *) cached->data
1230 - (char *) db->head, cached->recsize);
1231 # ifndef __ASSUME_SENDFILE
1232 if (nwritten == -1 && errno == ENOSYS)
1237 # ifndef __ASSUME_SENDFILE
1241 nwritten = writeall (fd, cached->data, cached->recsize);
1243 if (nwritten != cached->recsize
1244 && __builtin_expect (debug_level, 0) > 0)
1246 /* We have problems sending the result. */
1248 dbg_log (_("cannot write result: %s"),
1249 strerror_r (errno, buf, sizeof (buf)));
1252 pthread_rwlock_unlock (&db->lock);
1257 pthread_rwlock_unlock (&db->lock);
1259 else if (__builtin_expect (debug_level, 0) > 0)
1261 if (req->type == INVALIDATE)
1262 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1264 dbg_log ("\t%s", serv2str[req->type]);
1267 /* Handle the request. */
1271 addpwbyname (db, fd, req, key, uid);
1275 addpwbyuid (db, fd, req, key, uid);
1279 addgrbyname (db, fd, req, key, uid);
1283 addgrbygid (db, fd, req, key, uid);
1287 addhstbyname (db, fd, req, key, uid);
1290 case GETHOSTBYNAMEv6:
1291 addhstbynamev6 (db, fd, req, key, uid);
1295 addhstbyaddr (db, fd, req, key, uid);
1298 case GETHOSTBYADDRv6:
1299 addhstbyaddrv6 (db, fd, req, key, uid);
1303 addhstai (db, fd, req, key, uid);
1307 addinitgroups (db, fd, req, key, uid);
1311 addservbyname (db, fd, req, key, uid);
1315 addservbyport (db, fd, req, key, uid);
1319 addgetnetgrent (db, fd, req, key, uid);
1323 addinnetgr (db, fd, req, key, uid);
1330 /* Get the callers credentials. */
1332 struct ucred caller;
1333 socklen_t optlen = sizeof (caller);
1335 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1339 dbg_log (_("error getting caller's id: %s"),
1340 strerror_r (errno, buf, sizeof (buf)));
1346 /* Some systems have no SO_PEERCRED implementation. They don't
1347 care about security so we don't as well. */
1352 /* Accept shutdown, getstat and invalidate only from root. For
1353 the stat call also allow the user specified in the config file. */
1354 if (req->type == GETSTAT)
1356 if (uid == 0 || uid == stat_uid)
1357 send_stats (fd, dbs);
1361 if (req->type == INVALIDATE)
1362 invalidate_cache (key, fd);
1364 termination_handler (0);
1374 send_ro_fd (reqinfo[req->type].db, key, fd);
1379 /* Ignore the command, it's nothing we know. */
1385 /* Restart the process. */
1389 /* First determine the parameters. We do not use the parameters
1390 passed to main() since in case nscd is started by running the
1391 dynamic linker this will not work. Yes, this is not the usual
1392 case but nscd is part of glibc and we occasionally do this. */
1393 size_t buflen = 1024;
1394 char *buf = alloca (buflen);
1396 int fd = open ("/proc/self/cmdline", O_RDONLY);
1400 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1409 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1414 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1424 if (readlen < buflen)
1427 /* We might have to extend the buffer. */
1428 size_t old_buflen = buflen;
1429 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1430 buf = memmove (newp, buf, old_buflen);
1435 /* Parse the command line. Worst case scenario: every two
1436 characters form one parameter (one character plus NUL). */
1437 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1441 while (cp < buf + readlen)
1444 cp = (char *) rawmemchr (cp, '\0') + 1;
1448 /* Second, change back to the old user if we changed it. */
1449 if (server_user != NULL)
1451 if (setresuid (old_uid, old_uid, old_uid) != 0)
1454 cannot change to old UID: %s; disabling paranoia mode"),
1461 if (setresgid (old_gid, old_gid, old_gid) != 0)
1464 cannot change to old GID: %s; disabling paranoia mode"),
1467 ignore_value (setuid (server_uid));
1473 /* Next change back to the old working directory. */
1474 if (chdir (oldcwd) == -1)
1477 cannot change to old working directory: %s; disabling paranoia mode"),
1480 if (server_user != NULL)
1482 ignore_value (setuid (server_uid));
1483 ignore_value (setgid (server_gid));
1489 /* Synchronize memory. */
1490 int32_t certainly[lastdb];
1491 for (int cnt = 0; cnt < lastdb; ++cnt)
1492 if (dbs[cnt].enabled)
1494 /* Make sure nobody keeps using the database. */
1495 dbs[cnt].head->timestamp = 0;
1496 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1497 dbs[cnt].head->nscd_certainly_running = 0;
1499 if (dbs[cnt].persistent)
1501 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1504 /* The preparations are done. */
1506 char pathbuf[PATH_MAX];
1510 /* Try to exec the real nscd program so the process name (as reported
1511 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1512 if readlink or the exec with the result of the readlink call fails. */
1513 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1517 execv (pathbuf, argv);
1519 execv ("/proc/self/exe", argv);
1521 /* If we come here, we will never be able to re-exec. */
1522 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1525 if (server_user != NULL)
1527 ignore_value (setuid (server_uid));
1528 ignore_value (setgid (server_gid));
1530 if (chdir ("/") != 0)
1531 dbg_log (_("cannot change current working directory to \"/\": %s"),
1535 /* Reenable the databases. */
1536 time_t now = time (NULL);
1537 for (int cnt = 0; cnt < lastdb; ++cnt)
1538 if (dbs[cnt].enabled)
1540 dbs[cnt].head->timestamp = now;
1541 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1546 /* List of file descriptors. */
1550 struct fdlist *next;
1552 /* Memory allocated for the list. */
1553 static struct fdlist *fdlist;
1554 /* List of currently ready-to-read file descriptors. */
1555 static struct fdlist *readylist;
1557 /* Conditional variable and mutex to signal availability of entries in
1558 READYLIST. The condvar is initialized dynamically since we might
1559 use a different clock depending on availability. */
1560 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1561 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1563 /* The clock to use with the condvar. */
1564 static clockid_t timeout_clock = CLOCK_REALTIME;
1566 /* Number of threads ready to handle the READYLIST. */
1567 static unsigned long int nready;
1570 /* Function for the clean-up threads. */
1572 __attribute__ ((__noreturn__))
1573 nscd_run_prune (void *p)
1575 const long int my_number = (long int) p;
1576 assert (dbs[my_number].enabled);
1578 int dont_need_update = setup_thread (&dbs[my_number]);
1580 time_t now = time (NULL);
1582 /* We are running. */
1583 dbs[my_number].head->timestamp = now;
1585 struct timespec prune_ts;
1586 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1587 /* Should never happen. */
1590 /* Compute the initial timeout time. Prevent all the timers to go
1591 off at the same time by adding a db-based value. */
1592 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1593 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1595 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1596 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1597 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1599 pthread_mutex_lock (prune_lock);
1602 /* Wait, but not forever. */
1604 if (! dbs[my_number].clear_cache)
1605 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1606 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1610 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1611 || dbs[my_number].clear_cache)
1613 /* We will determine the new timout values based on the
1614 cache content. Should there be concurrent additions to
1615 the cache which are not accounted for in the cache
1616 pruning we want to know about it. Therefore set the
1617 timeout to the maximum. It will be descreased when adding
1618 new entries to the cache, if necessary. */
1619 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1621 /* Unconditionally reset the flag. */
1622 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1623 dbs[my_number].clear_cache = 0;
1625 pthread_mutex_unlock (prune_lock);
1627 /* We use a separate lock for running the prune function (instead
1628 of keeping prune_lock locked) because this enables concurrent
1629 invocations of cache_add which might modify the timeout value. */
1630 pthread_mutex_lock (prune_run_lock);
1631 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1632 pthread_mutex_unlock (prune_run_lock);
1634 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1635 /* If clients cannot determine for sure whether nscd is running
1636 we need to wake up occasionally to update the timestamp.
1637 Wait 90% of the update period. */
1638 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1639 if (__glibc_unlikely (! dont_need_update))
1641 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1642 dbs[my_number].head->timestamp = now;
1645 pthread_mutex_lock (prune_lock);
1647 /* Make it known when we will wake up again. */
1648 if (now + next_wait < dbs[my_number].wakeup_time)
1649 dbs[my_number].wakeup_time = now + next_wait;
1651 next_wait = dbs[my_number].wakeup_time - now;
1654 /* The cache was just pruned. Do not do it again now. Just
1655 use the new timeout value. */
1656 next_wait = dbs[my_number].wakeup_time - now;
1658 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1659 /* Should never happen. */
1662 /* Compute next timeout time. */
1663 prune_ts.tv_sec += next_wait;
1668 /* This is the main loop. It is replicated in different threads but
1669 the use of the ready list makes sure only one thread handles an
1670 incoming connection. */
1672 __attribute__ ((__noreturn__))
1673 nscd_run_worker (void *p)
1677 /* Initial locking. */
1678 pthread_mutex_lock (&readylist_lock);
1680 /* One more thread available. */
1685 while (readylist == NULL)
1686 pthread_cond_wait (&readylist_cond, &readylist_lock);
1688 struct fdlist *it = readylist->next;
1689 if (readylist->next == readylist)
1690 /* Just one entry on the list. */
1693 readylist->next = it->next;
1695 /* Extract the information and mark the record ready to be used
1700 /* One more thread available. */
1703 /* We are done with the list. */
1704 pthread_mutex_unlock (&readylist_lock);
1706 #ifndef __ASSUME_ACCEPT4
1707 if (have_accept4 < 0)
1709 /* We do not want to block on a short read or so. */
1710 int fl = fcntl (fd, F_GETFL);
1711 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1716 /* Now read the request. */
1718 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1719 != sizeof (req), 0))
1721 /* We failed to read data. Note that this also might mean we
1722 failed because we would have blocked. */
1723 if (debug_level > 0)
1724 dbg_log (_("short read while reading request: %s"),
1725 strerror_r (errno, buf, sizeof (buf)));
1729 /* Check whether this is a valid request type. */
1730 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1733 /* Some systems have no SO_PEERCRED implementation. They don't
1734 care about security so we don't as well. */
1739 if (__glibc_unlikely (debug_level > 0))
1741 struct ucred caller;
1742 socklen_t optlen = sizeof (caller);
1744 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1748 const pid_t pid = 0;
1751 /* It should not be possible to crash the nscd with a silly
1752 request (i.e., a terribly large key). We limit the size to 1kb. */
1753 if (__builtin_expect (req.key_len, 1) < 0
1754 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1756 if (debug_level > 0)
1757 dbg_log (_("key length in request too long: %d"), req.key_len);
1762 char keybuf[MAXKEYLEN + 1];
1764 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1768 /* Again, this can also mean we would have blocked. */
1769 if (debug_level > 0)
1770 dbg_log (_("short read while reading request key: %s"),
1771 strerror_r (errno, buf, sizeof (buf)));
1774 keybuf[req.key_len] = '\0';
1776 if (__builtin_expect (debug_level, 0) > 0)
1781 handle_request: request received (Version = %d) from PID %ld"),
1782 req.version, (long int) pid);
1786 handle_request: request received (Version = %d)"), req.version);
1789 /* Phew, we got all the data, now process it. */
1790 handle_request (fd, &req, keybuf, uid, pid);
1798 pthread_mutex_lock (&readylist_lock);
1800 /* One more thread available. */
1807 static unsigned int nconns;
1812 pthread_mutex_lock (&readylist_lock);
1814 /* Find an empty entry in FDLIST. */
1816 for (inner = 0; inner < nconns; ++inner)
1817 if (fdlist[inner].next == NULL)
1819 assert (inner < nconns);
1821 fdlist[inner].fd = fd;
1823 if (readylist == NULL)
1824 readylist = fdlist[inner].next = &fdlist[inner];
1827 fdlist[inner].next = readylist->next;
1828 readylist = readylist->next = &fdlist[inner];
1831 bool do_signal = true;
1832 if (__glibc_unlikely (nready == 0))
1837 /* Try to start another thread to help out. */
1839 if (nthreads < max_nthreads
1840 && pthread_create (&th, &attr, nscd_run_worker,
1841 (void *) (long int) nthreads) == 0)
1843 /* We got another thread. */
1845 /* The new thread might need a kick. */
1851 pthread_mutex_unlock (&readylist_lock);
1853 /* Tell one of the worker threads there is work to do. */
1855 pthread_cond_signal (&readylist_cond);
1859 /* Check whether restarting should happen. */
1861 restart_p (time_t now)
1863 return (paranoia && readylist == NULL && nready == nthreads
1864 && now >= restart_time);
1868 /* Array for times a connection was accepted. */
1869 static time_t *starttime;
1872 /* Inotify event for changed file. */
1875 struct inotify_event i;
1877 # define PATH_MAX 1024
1879 char buf[sizeof (struct inotify_event) + PATH_MAX];
1882 /* Process the inotify event in INEV. If the event matches any of the files
1883 registered with a database then mark that database as requiring its cache
1884 to be cleared. We indicate the cache needs clearing by setting
1885 TO_CLEAR[DBCNT] to true for the matching database. */
1887 inotify_check_files (bool *to_clear, union __inev *inev)
1889 /* Check which of the files changed. */
1890 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1892 struct traced_file *finfo = dbs[dbcnt].traced_files;
1894 while (finfo != NULL)
1896 /* Inotify event watch descriptor matches. */
1897 if (finfo->inotify_descr == inev->i.wd)
1899 /* Mark cache as needing to be cleared and reinitialize. */
1900 to_clear[dbcnt] = true;
1901 if (finfo->call_res_init)
1906 finfo = finfo->next;
1911 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1912 for the associated database, otherwise do nothing. The TO_CLEAR array must
1913 have LASTDB entries. */
1915 clear_db_cache (bool *to_clear)
1917 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1918 if (to_clear[dbcnt])
1920 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1921 dbs[dbcnt].clear_cache = 1;
1922 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1923 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1930 __attribute__ ((__noreturn__))
1931 main_loop_poll (void)
1933 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1934 * sizeof (conns[0]));
1937 conns[0].events = POLLRDNORM;
1939 size_t firstfree = 1;
1942 if (inotify_fd != -1)
1944 conns[1].fd = inotify_fd;
1945 conns[1].events = POLLRDNORM;
1952 size_t idx_nl_status_fd = 0;
1953 if (nl_status_fd != -1)
1955 idx_nl_status_fd = nused;
1956 conns[nused].fd = nl_status_fd;
1957 conns[nused].events = POLLRDNORM;
1965 /* Wait for any event. We wait at most a couple of seconds so
1966 that we can check whether we should close any of the accepted
1967 connections since we have not received a request. */
1968 #define MAX_ACCEPT_TIMEOUT 30
1969 #define MIN_ACCEPT_TIMEOUT 5
1970 #define MAIN_THREAD_TIMEOUT \
1971 (MAX_ACCEPT_TIMEOUT * 1000 \
1972 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1974 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1976 time_t now = time (NULL);
1978 /* If there is a descriptor ready for reading or there is a new
1979 connection, process this now. */
1982 if (conns[0].revents != 0)
1984 /* We have a new incoming connection. Accept the connection. */
1987 #ifndef __ASSUME_ACCEPT4
1989 if (have_accept4 >= 0)
1992 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1994 #ifndef __ASSUME_ACCEPT4
1995 if (have_accept4 == 0)
1996 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1999 #ifndef __ASSUME_ACCEPT4
2000 if (have_accept4 < 0)
2001 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2004 /* Use the descriptor if we have not reached the limit. */
2007 if (firstfree < nconns)
2009 conns[firstfree].fd = fd;
2010 conns[firstfree].events = POLLRDNORM;
2011 starttime[firstfree] = now;
2012 if (firstfree >= nused)
2013 nused = firstfree + 1;
2017 while (firstfree < nused && conns[firstfree].fd != -1);
2020 /* We cannot use the connection so close it. */
2029 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2031 if (conns[1].revents != 0)
2033 bool to_clear[lastdb] = { false, };
2036 /* Read all inotify events for files registered via
2037 register_traced_file(). */
2040 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2042 if (nb < (ssize_t) sizeof (struct inotify_event))
2044 if (__builtin_expect (nb == -1 && errno != EAGAIN,
2047 /* Something went wrong when reading the inotify
2048 data. Better disable inotify. */
2050 disabled inotify after read error %d"),
2062 /* Check which of the files changed. */
2063 inotify_check_files (to_clear, &inev);
2066 /* Actually perform the cache clearing. */
2067 clear_db_cache (to_clear);
2077 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2080 /* Read all the data. We do not interpret it here. */
2081 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2082 sizeof (buf))) != -1)
2085 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2086 = __bump_nl_timestamp ();
2090 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2091 if (conns[cnt].revents != 0)
2093 fd_ready (conns[cnt].fd);
2095 /* Clean up the CONNS array. */
2097 if (cnt < firstfree)
2099 if (cnt == nused - 1)
2102 while (conns[nused - 1].fd == -1);
2108 /* Now find entries which have timed out. */
2111 /* We make the timeout length depend on the number of file
2112 descriptors currently used. */
2113 #define ACCEPT_TIMEOUT \
2114 (MAX_ACCEPT_TIMEOUT \
2115 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2116 time_t laststart = now - ACCEPT_TIMEOUT;
2118 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2120 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2122 /* Remove the entry, it timed out. */
2123 (void) close (conns[cnt].fd);
2126 if (cnt < firstfree)
2128 if (cnt == nused - 1)
2131 while (conns[nused - 1].fd == -1);
2135 if (restart_p (now))
2143 main_loop_epoll (int efd)
2145 struct epoll_event ev = { 0, };
2149 /* Add the socket. */
2150 ev.events = EPOLLRDNORM;
2152 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2153 /* We cannot use epoll. */
2156 # ifdef HAVE_INOTIFY
2157 if (inotify_fd != -1)
2159 ev.events = EPOLLRDNORM;
2160 ev.data.fd = inotify_fd;
2161 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2162 /* We cannot use epoll. */
2168 # ifdef HAVE_NETLINK
2169 if (nl_status_fd != -1)
2171 ev.events = EPOLLRDNORM;
2172 ev.data.fd = nl_status_fd;
2173 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2174 /* We cannot use epoll. */
2181 struct epoll_event revs[100];
2182 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2184 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2186 time_t now = time (NULL);
2188 for (int cnt = 0; cnt < n; ++cnt)
2189 if (revs[cnt].data.fd == sock)
2191 /* A new connection. */
2194 # ifndef __ASSUME_ACCEPT4
2196 if (have_accept4 >= 0)
2199 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2201 # ifndef __ASSUME_ACCEPT4
2202 if (have_accept4 == 0)
2203 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2206 # ifndef __ASSUME_ACCEPT4
2207 if (have_accept4 < 0)
2208 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2211 /* Use the descriptor if we have not reached the limit. */
2214 /* Try to add the new descriptor. */
2217 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2218 /* The descriptor is too large or something went
2219 wrong. Close the descriptor. */
2223 /* Remember when we accepted the connection. */
2224 starttime[fd] = now;
2233 # ifdef HAVE_INOTIFY
2234 else if (revs[cnt].data.fd == inotify_fd)
2236 bool to_clear[lastdb] = { false, };
2239 /* Read all inotify events for files registered via
2240 register_traced_file(). */
2243 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2245 if (nb < (ssize_t) sizeof (struct inotify_event))
2247 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2249 /* Something went wrong when reading the inotify
2250 data. Better disable inotify. */
2251 dbg_log (_("disabled inotify after read error %d"),
2253 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2261 /* Check which of the files changed. */
2262 inotify_check_files(to_clear, &inev);
2265 /* Actually perform the cache clearing. */
2266 clear_db_cache (to_clear);
2269 # ifdef HAVE_NETLINK
2270 else if (revs[cnt].data.fd == nl_status_fd)
2273 /* Read all the data. We do not interpret it here. */
2274 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2275 sizeof (buf))) != -1)
2278 __bump_nl_timestamp ();
2283 /* Remove the descriptor from the epoll descriptor. */
2284 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2286 /* Get a worker to handle the request. */
2287 fd_ready (revs[cnt].data.fd);
2289 /* Reset the time. */
2290 starttime[revs[cnt].data.fd] = 0;
2291 if (revs[cnt].data.fd == highest)
2294 while (highest > 0 && starttime[highest] == 0);
2299 /* Now look for descriptors for accepted connections which have
2300 no reply in too long of a time. */
2301 time_t laststart = now - ACCEPT_TIMEOUT;
2302 assert (starttime[sock] == 0);
2303 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2304 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2305 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2306 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2308 /* We are waiting for this one for too long. Close it. */
2309 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2317 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2320 if (restart_p (now))
2327 /* Start all the threads we want. The initial process is thread no. 1. */
2329 start_threads (void)
2331 /* Initialize the conditional variable we will use. The only
2332 non-standard attribute we might use is the clock selection. */
2333 pthread_condattr_t condattr;
2334 pthread_condattr_init (&condattr);
2336 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2337 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2338 /* Determine whether the monotonous clock is available. */
2339 struct timespec dummy;
2340 # if _POSIX_MONOTONIC_CLOCK == 0
2341 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2343 # if _POSIX_CLOCK_SELECTION == 0
2344 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2346 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2347 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2348 timeout_clock = CLOCK_MONOTONIC;
2351 /* Create the attribute for the threads. They are all created
2353 pthread_attr_init (&attr);
2354 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2355 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2356 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2358 /* We allow less than LASTDB threads only for debugging. */
2359 if (debug_level == 0)
2360 nthreads = MAX (nthreads, lastdb);
2362 /* Create the threads which prune the databases. */
2363 // XXX Ideally this work would be done by some of the worker threads.
2364 // XXX But this is problematic since we would need to be able to wake
2365 // XXX them up explicitly as well as part of the group handling the
2366 // XXX ready-list. This requires an operation where we can wait on
2367 // XXX two conditional variables at the same time. This operation
2368 // XXX does not exist (yet).
2369 for (long int i = 0; i < lastdb; ++i)
2371 /* Initialize the conditional variable. */
2372 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2374 dbg_log (_("could not initialize conditional variable"));
2375 do_exit (1, 0, NULL);
2380 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2382 dbg_log (_("could not start clean-up thread; terminating"));
2383 do_exit (1, 0, NULL);
2387 pthread_condattr_destroy (&condattr);
2389 for (long int i = 0; i < nthreads; ++i)
2392 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2396 dbg_log (_("could not start any worker thread; terminating"));
2397 do_exit (1, 0, NULL);
2404 /* Now it is safe to let the parent know that we're doing fine and it can
2408 /* Determine how much room for descriptors we should initially
2409 allocate. This might need to change later if we cap the number
2411 const long int nfds = sysconf (_SC_OPEN_MAX);
2413 #define MAXCONN 16384
2414 if (nfds == -1 || nfds > MAXCONN)
2416 else if (nfds < MINCONN)
2421 /* We need memory to pass descriptors on to the worker threads. */
2422 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2423 /* Array to keep track when connection was accepted. */
2424 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2426 /* In the main thread we execute the loop which handles incoming
2429 int efd = epoll_create (100);
2432 main_loop_epoll (efd);
2441 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2442 this function is called, we are not listening on the nscd socket yet so
2443 we can just use the ordinary lookup functions without causing a lockup */
2445 begin_drop_privileges (void)
2447 struct passwd *pwd = getpwnam (server_user);
2451 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2452 do_exit (EXIT_FAILURE, 0,
2453 _("Failed to run nscd as user '%s'"), server_user);
2456 server_uid = pwd->pw_uid;
2457 server_gid = pwd->pw_gid;
2459 /* Save the old UID/GID if we have to change back. */
2462 old_uid = getuid ();
2463 old_gid = getgid ();
2466 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2468 /* This really must never happen. */
2469 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2470 do_exit (EXIT_FAILURE, errno,
2471 _("initial getgrouplist failed"));
2474 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2476 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2479 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2480 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2485 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2486 run nscd as the user specified in the configuration file. */
2488 finish_drop_privileges (void)
2490 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2491 /* We need to preserve the capabilities to connect to the audit daemon. */
2492 cap_t new_caps = preserve_capabilities ();
2495 if (setgroups (server_ngroups, server_groups) == -1)
2497 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2498 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2503 res = setresgid (server_gid, server_gid, old_gid);
2505 res = setgid (server_gid);
2508 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2509 do_exit (4, errno, "setgid");
2513 res = setresuid (server_uid, server_uid, old_uid);
2515 res = setuid (server_uid);
2518 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2519 do_exit (4, errno, "setuid");
2522 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2523 /* Remove the temporary capabilities. */
2524 install_real_capabilities (new_caps);