2 /* Copyright 1998 by the Massachusetts Institute of Technology.
3 * Copyright (C) 2004-2016 by Daniel Stenberg
5 * Permission to use, copy, modify, and distribute this
6 * software and its documentation for any purpose and without
7 * fee is hereby granted, provided that the above copyright
8 * notice appear in all copies and that both that copyright
9 * notice and this permission notice appear in supporting
10 * documentation, and that the name of M.I.T. not be used in
11 * advertising or publicity pertaining to distribution of the
12 * software without specific, written prior permission.
13 * M.I.T. makes no representations about the suitability of
14 * this software for any purpose. It is provided "as is"
15 * without express or implied warranty.
18 #include "ares_setup.h"
23 #ifdef HAVE_NETINET_IN_H
24 # include <netinet/in.h>
26 #ifdef HAVE_NETINET_TCP_H
27 # include <netinet/tcp.h>
32 #ifdef HAVE_ARPA_NAMESER_H
33 # include <arpa/nameser.h>
37 #ifdef HAVE_ARPA_NAMESER_COMPAT_H
38 # include <arpa/nameser_compat.h>
44 #ifdef HAVE_SYS_IOCTL_H
45 # include <sys/ioctl.h>
48 # include <sys/filio.h>
56 #include "ares_nowarn.h"
57 #include "ares_private.h"
60 static int try_again(int errnum);
61 static void write_tcp_data(ares_channel channel, fd_set *write_fds,
62 ares_socket_t write_fd, struct timeval *now);
63 static void read_tcp_data(ares_channel channel, fd_set *read_fds,
64 ares_socket_t read_fd, struct timeval *now);
65 static void read_udp_packets(ares_channel channel, fd_set *read_fds,
66 ares_socket_t read_fd, struct timeval *now);
67 static void advance_tcp_send_queue(ares_channel channel, int whichserver,
69 static void process_timeouts(ares_channel channel, struct timeval *now);
70 static void process_broken_connections(ares_channel channel,
72 static void process_answer(ares_channel channel, unsigned char *abuf,
73 int alen, int whichserver, int tcp,
75 static void handle_error(ares_channel channel, int whichserver,
77 static void skip_server(ares_channel channel, struct query *query,
79 static void next_server(ares_channel channel, struct query *query,
81 static int open_tcp_socket(ares_channel channel, struct server_state *server);
82 static int open_udp_socket(ares_channel channel, struct server_state *server);
83 static int same_questions(const unsigned char *qbuf, int qlen,
84 const unsigned char *abuf, int alen);
85 static int same_address(struct sockaddr *sa, struct ares_addr *aa);
86 static void end_query(ares_channel channel, struct query *query, int status,
87 unsigned char *abuf, int alen);
89 /* return true if now is exactly check time or later */
90 int ares__timedout(struct timeval *now,
91 struct timeval *check)
93 long secs = (now->tv_sec - check->tv_sec);
96 return 1; /* yes, timed out */
98 return 0; /* nope, not timed out */
100 /* if the full seconds were identical, check the sub second parts */
101 return (now->tv_usec - check->tv_usec >= 0);
104 /* add the specific number of milliseconds to the time in the first argument */
105 static void timeadd(struct timeval *now, int millisecs)
107 now->tv_sec += millisecs/1000;
108 now->tv_usec += (millisecs%1000)*1000;
110 if(now->tv_usec >= 1000000) {
112 now->tv_usec -= 1000000;
117 * generic process function
119 static void processfds(ares_channel channel,
120 fd_set *read_fds, ares_socket_t read_fd,
121 fd_set *write_fds, ares_socket_t write_fd)
123 struct timeval now = ares__tvnow();
125 write_tcp_data(channel, write_fds, write_fd, &now);
126 read_tcp_data(channel, read_fds, read_fd, &now);
127 read_udp_packets(channel, read_fds, read_fd, &now);
128 process_timeouts(channel, &now);
129 process_broken_connections(channel, &now);
132 /* Something interesting happened on the wire, or there was a timeout.
133 * See what's up and respond accordingly.
135 void ares_process(ares_channel channel, fd_set *read_fds, fd_set *write_fds)
137 processfds(channel, read_fds, ARES_SOCKET_BAD, write_fds, ARES_SOCKET_BAD);
140 /* Something interesting happened on the wire, or there was a timeout.
141 * See what's up and respond accordingly.
143 void ares_process_fd(ares_channel channel,
144 ares_socket_t read_fd, /* use ARES_SOCKET_BAD or valid
146 ares_socket_t write_fd)
148 processfds(channel, NULL, read_fd, NULL, write_fd);
152 /* Return 1 if the specified error number describes a readiness error, or 0
153 * otherwise. This is mostly for HP-UX, which could return EAGAIN or
154 * EWOULDBLOCK. See this man page
156 * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html?
157 * manpage=/usr/share/man/man2.Z/send.2
159 static int try_again(int errnum)
161 #if !defined EWOULDBLOCK && !defined EAGAIN
162 #error "Neither EWOULDBLOCK nor EAGAIN defined"
170 #if defined EAGAIN && EAGAIN != EWOULDBLOCK
178 /* If any TCP sockets select true for writing, write out queued data
181 static void write_tcp_data(ares_channel channel,
183 ares_socket_t write_fd,
186 struct server_state *server;
187 struct send_request *sendreq;
194 if(!write_fds && (write_fd == ARES_SOCKET_BAD))
195 /* no possible action */
198 for (i = 0; i < channel->nservers; i++)
200 /* Make sure server has data to send and is selected in write_fds or
202 server = &channel->servers[i];
203 if (!server->qhead || server->tcp_socket == ARES_SOCKET_BAD ||
208 if(!FD_ISSET(server->tcp_socket, write_fds))
212 if(server->tcp_socket != write_fd)
217 /* If there's an error and we close this socket, then open
218 * another with the same fd to talk to another server, then we
219 * don't want to think that it was the new socket that was
220 * ready. This is not disastrous, but is likely to result in
221 * extra system calls and confusion. */
222 FD_CLR(server->tcp_socket, write_fds);
224 /* Count the number of send queue items. */
226 for (sendreq = server->qhead; sendreq; sendreq = sendreq->next)
229 /* Allocate iovecs so we can send all our data at once. */
230 vec = ares_malloc(n * sizeof(struct iovec));
233 /* Fill in the iovecs and send. */
235 for (sendreq = server->qhead; sendreq; sendreq = sendreq->next)
237 vec[n].iov_base = (char *) sendreq->data;
238 vec[n].iov_len = sendreq->len;
241 wcount = (ssize_t)writev(server->tcp_socket, vec, (int)n);
245 if (!try_again(SOCKERRNO))
246 handle_error(channel, i, now);
250 /* Advance the send queue by as many bytes as we sent. */
251 advance_tcp_send_queue(channel, i, wcount);
255 /* Can't allocate iovecs; just send the first request. */
256 sendreq = server->qhead;
258 scount = swrite(server->tcp_socket, sendreq->data, sendreq->len);
261 if (!try_again(SOCKERRNO))
262 handle_error(channel, i, now);
266 /* Advance the send queue by as many bytes as we sent. */
267 advance_tcp_send_queue(channel, i, scount);
272 /* Consume the given number of bytes from the head of the TCP send queue. */
273 static void advance_tcp_send_queue(ares_channel channel, int whichserver,
276 struct send_request *sendreq;
277 struct server_state *server = &channel->servers[whichserver];
278 while (num_bytes > 0) {
279 sendreq = server->qhead;
280 if ((size_t)num_bytes >= sendreq->len) {
281 num_bytes -= sendreq->len;
282 server->qhead = sendreq->next;
283 if (sendreq->data_storage)
284 ares_free(sendreq->data_storage);
286 if (server->qhead == NULL) {
287 SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 0);
288 server->qtail = NULL;
290 /* qhead is NULL so we cannot continue this loop */
295 sendreq->data += num_bytes;
296 sendreq->len -= num_bytes;
302 /* If any TCP socket selects true for reading, read some data,
303 * allocate a buffer if we finish reading the length word, and process
304 * a packet if we finish reading one.
306 static void read_tcp_data(ares_channel channel, fd_set *read_fds,
307 ares_socket_t read_fd, struct timeval *now)
309 struct server_state *server;
313 if(!read_fds && (read_fd == ARES_SOCKET_BAD))
314 /* no possible action */
317 for (i = 0; i < channel->nservers; i++)
319 /* Make sure the server has a socket and is selected in read_fds. */
320 server = &channel->servers[i];
321 if (server->tcp_socket == ARES_SOCKET_BAD || server->is_broken)
325 if(!FD_ISSET(server->tcp_socket, read_fds))
329 if(server->tcp_socket != read_fd)
334 /* If there's an error and we close this socket, then open another
335 * with the same fd to talk to another server, then we don't want to
336 * think that it was the new socket that was ready. This is not
337 * disastrous, but is likely to result in extra system calls and
339 FD_CLR(server->tcp_socket, read_fds);
341 if (server->tcp_lenbuf_pos != 2)
343 /* We haven't yet read a length word, so read that (or
344 * what's left to read of it).
346 count = sread(server->tcp_socket,
347 server->tcp_lenbuf + server->tcp_lenbuf_pos,
348 2 - server->tcp_lenbuf_pos);
351 if (!(count == -1 && try_again(SOCKERRNO)))
352 handle_error(channel, i, now);
356 server->tcp_lenbuf_pos += (int)count;
357 if (server->tcp_lenbuf_pos == 2)
359 /* We finished reading the length word. Decode the
360 * length and allocate a buffer for the data.
362 server->tcp_length = server->tcp_lenbuf[0] << 8
363 | server->tcp_lenbuf[1];
364 server->tcp_buffer = ares_malloc(server->tcp_length);
365 if (!server->tcp_buffer) {
366 handle_error(channel, i, now);
367 return; /* bail out on malloc failure. TODO: make this
368 function return error codes */
370 server->tcp_buffer_pos = 0;
375 /* Read data into the allocated buffer. */
376 count = sread(server->tcp_socket,
377 server->tcp_buffer + server->tcp_buffer_pos,
378 server->tcp_length - server->tcp_buffer_pos);
381 if (!(count == -1 && try_again(SOCKERRNO)))
382 handle_error(channel, i, now);
386 server->tcp_buffer_pos += (int)count;
387 if (server->tcp_buffer_pos == server->tcp_length)
389 /* We finished reading this answer; process it and
390 * prepare to read another length word.
392 process_answer(channel, server->tcp_buffer, server->tcp_length,
394 ares_free(server->tcp_buffer);
395 server->tcp_buffer = NULL;
396 server->tcp_lenbuf_pos = 0;
397 server->tcp_buffer_pos = 0;
403 /* If any UDP sockets select true for reading, process them. */
404 static void read_udp_packets(ares_channel channel, fd_set *read_fds,
405 ares_socket_t read_fd, struct timeval *now)
407 struct server_state *server;
410 unsigned char buf[MAXENDSSZ + 1];
412 ares_socklen_t fromlen;
415 struct sockaddr_in sa4;
416 struct sockaddr_in6 sa6;
420 if(!read_fds && (read_fd == ARES_SOCKET_BAD))
421 /* no possible action */
424 for (i = 0; i < channel->nservers; i++)
426 /* Make sure the server has a socket and is selected in read_fds. */
427 server = &channel->servers[i];
429 if (server->udp_socket == ARES_SOCKET_BAD || server->is_broken)
433 if(!FD_ISSET(server->udp_socket, read_fds))
437 if(server->udp_socket != read_fd)
442 /* If there's an error and we close this socket, then open
443 * another with the same fd to talk to another server, then we
444 * don't want to think that it was the new socket that was
445 * ready. This is not disastrous, but is likely to result in
446 * extra system calls and confusion. */
447 FD_CLR(server->udp_socket, read_fds);
449 /* To reduce event loop overhead, read and process as many
450 * packets as we can. */
452 if (server->udp_socket == ARES_SOCKET_BAD)
457 if (server->addr.family == AF_INET)
458 fromlen = sizeof(from.sa4);
460 fromlen = sizeof(from.sa6);
461 count = (ssize_t)recvfrom(server->udp_socket, (void *)buf,
462 sizeof(buf), 0, &from.sa, &fromlen);
464 count = sread(server->udp_socket, buf, sizeof(buf));
468 if (count == -1 && try_again(SOCKERRNO))
471 handle_error(channel, i, now);
473 else if (!same_address(&from.sa, &server->addr))
474 /* The address the response comes from does not match the address we
475 * sent the request to. Someone may be attempting to perform a cache
476 * poisoning attack. */
480 process_answer(channel, buf, (int)count, i, 0, now);
485 /* If any queries have timed out, note the timeout and move them on. */
486 static void process_timeouts(ares_channel channel, struct timeval *now)
488 time_t t; /* the time of the timeouts we're processing */
490 struct list_node* list_head;
491 struct list_node* list_node;
493 /* Process all the timeouts that have fired since the last time we processed
494 * timeouts. If things are going well, then we'll have hundreds/thousands of
495 * queries that fall into future buckets, and only a handful of requests
496 * that fall into the "now" bucket, so this should be quite quick.
498 for (t = channel->last_timeout_processed; t <= now->tv_sec; t++)
500 list_head = &(channel->queries_by_timeout[t % ARES_TIMEOUT_TABLE_SIZE]);
501 for (list_node = list_head->next; list_node != list_head; )
503 query = list_node->data;
504 list_node = list_node->next; /* in case the query gets deleted */
505 if (query->timeout.tv_sec && ares__timedout(now, &query->timeout))
507 query->error_status = ARES_ETIMEOUT;
509 next_server(channel, query, now);
513 channel->last_timeout_processed = now->tv_sec;
516 /* Handle an answer from a server. */
517 static void process_answer(ares_channel channel, unsigned char *abuf,
518 int alen, int whichserver, int tcp,
521 int tc, rcode, packetsz;
524 struct list_node* list_head;
525 struct list_node* list_node;
527 /* If there's no room in the answer for a header, we can't do much
532 /* Grab the query ID, truncate bit, and response code from the packet. */
533 id = DNS_HEADER_QID(abuf);
534 tc = DNS_HEADER_TC(abuf);
535 rcode = DNS_HEADER_RCODE(abuf);
537 /* Find the query corresponding to this packet. The queries are
538 * hashed/bucketed by query id, so this lookup should be quick. Note that
539 * both the query id and the questions must be the same; when the query id
540 * wraps around we can have multiple outstanding queries with the same query
541 * id, so we need to check both the id and question.
544 list_head = &(channel->queries_by_qid[id % ARES_QID_TABLE_SIZE]);
545 for (list_node = list_head->next; list_node != list_head;
546 list_node = list_node->next)
548 struct query *q = list_node->data;
549 if ((q->qid == id) && same_questions(q->qbuf, q->qlen, abuf, alen))
559 /* If we use EDNS and server answers with one of these RCODES, the protocol
560 * extension is not understood by the responder. We must retry the query
561 * without EDNS enabled.
563 if (channel->flags & ARES_FLAG_EDNS)
565 packetsz = channel->ednspsz;
566 if (rcode == NOTIMP || rcode == FORMERR || rcode == SERVFAIL)
568 int qlen = (query->tcplen - 2) - EDNSFIXEDSZ;
569 channel->flags ^= ARES_FLAG_EDNS;
570 query->tcplen -= EDNSFIXEDSZ;
571 query->qlen -= EDNSFIXEDSZ;
572 query->tcpbuf[0] = (unsigned char)((qlen >> 8) & 0xff);
573 query->tcpbuf[1] = (unsigned char)(qlen & 0xff);
574 DNS_HEADER_SET_ARCOUNT(query->tcpbuf + 2, 0);
575 query->tcpbuf = ares_realloc(query->tcpbuf, query->tcplen);
576 query->qbuf = query->tcpbuf + 2;
577 ares__send_query(channel, query, now);
582 /* If we got a truncated UDP packet and are not ignoring truncation,
583 * don't accept the packet, and switch the query to TCP if we hadn't
586 if ((tc || alen > packetsz) && !tcp && !(channel->flags & ARES_FLAG_IGNTC))
588 if (!query->using_tcp)
590 query->using_tcp = 1;
591 ares__send_query(channel, query, now);
596 /* Limit alen to PACKETSZ if we aren't using TCP (only relevant if we
597 * are ignoring truncation.
599 if (alen > packetsz && !tcp)
602 /* If we aren't passing through all error packets, discard packets
603 * with SERVFAIL, NOTIMP, or REFUSED response codes.
605 if (!(channel->flags & ARES_FLAG_NOCHECKRESP))
607 if (rcode == SERVFAIL || rcode == NOTIMP || rcode == REFUSED)
609 skip_server(channel, query, whichserver);
610 if (query->server == whichserver)
611 next_server(channel, query, now);
616 end_query(channel, query, ARES_SUCCESS, abuf, alen);
619 /* Close all the connections that are no longer usable. */
620 static void process_broken_connections(ares_channel channel,
624 for (i = 0; i < channel->nservers; i++)
626 struct server_state *server = &channel->servers[i];
627 if (server->is_broken)
629 handle_error(channel, i, now);
634 /* Swap the contents of two lists */
635 static void swap_lists(struct list_node* head_a,
636 struct list_node* head_b)
638 int is_a_empty = ares__is_list_empty(head_a);
639 int is_b_empty = ares__is_list_empty(head_b);
640 struct list_node old_a = *head_a;
641 struct list_node old_b = *head_b;
644 ares__init_list_head(head_b);
647 old_a.next->prev = head_b;
648 old_a.prev->next = head_b;
651 ares__init_list_head(head_a);
654 old_b.next->prev = head_a;
655 old_b.prev->next = head_a;
659 static void handle_error(ares_channel channel, int whichserver,
662 struct server_state *server;
664 struct list_node list_head;
665 struct list_node* list_node;
667 server = &channel->servers[whichserver];
669 /* Reset communications with this server. */
670 ares__close_sockets(channel, server);
672 /* Tell all queries talking to this server to move on and not try this
673 * server again. We steal the current list of queries that were in-flight to
674 * this server, since when we call next_server this can cause the queries to
675 * be re-sent to this server, which will re-insert these queries in that
676 * same server->queries_to_server list.
678 ares__init_list_head(&list_head);
679 swap_lists(&list_head, &(server->queries_to_server));
680 for (list_node = list_head.next; list_node != &list_head; )
682 query = list_node->data;
683 list_node = list_node->next; /* in case the query gets deleted */
684 assert(query->server == whichserver);
685 skip_server(channel, query, whichserver);
686 next_server(channel, query, now);
688 /* Each query should have removed itself from our temporary list as
689 * it re-sent itself or finished up...
691 assert(ares__is_list_empty(&list_head));
694 static void skip_server(ares_channel channel, struct query *query,
697 /* The given server gave us problems with this query, so if we have the
698 * luxury of using other servers, then let's skip the potentially broken
699 * server and just use the others. If we only have one server and we need to
700 * retry then we should just go ahead and re-use that server, since it's our
701 * only hope; perhaps we just got unlucky, and retrying will work (eg, the
702 * server timed out our TCP connection just as we were sending another
705 if (channel->nservers > 1)
707 query->server_info[whichserver].skip_server = 1;
711 static void next_server(ares_channel channel, struct query *query,
714 /* We need to try each server channel->tries times. We have channel->nservers
715 * servers to try. In total, we need to do channel->nservers * channel->tries
716 * attempts. Use query->try to remember how many times we already attempted
717 * this query. Use modular arithmetic to find the next server to try. */
718 while (++(query->try_count) < (channel->nservers * channel->tries))
720 struct server_state *server;
722 /* Move on to the next server. */
723 query->server = (query->server + 1) % channel->nservers;
724 server = &channel->servers[query->server];
726 /* We don't want to use this server if (1) we decided this connection is
727 * broken, and thus about to be closed, (2) we've decided to skip this
728 * server because of earlier errors we encountered, or (3) we already
729 * sent this query over this exact connection.
731 if (!server->is_broken &&
732 !query->server_info[query->server].skip_server &&
733 !(query->using_tcp &&
734 (query->server_info[query->server].tcp_connection_generation ==
735 server->tcp_connection_generation)))
737 ares__send_query(channel, query, now);
741 /* You might think that with TCP we only need one try. However, even
742 * when using TCP, servers can time-out our connection just as we're
743 * sending a request, or close our connection because they die, or never
744 * send us a reply because they get wedged or tickle a bug that drops
749 /* If we are here, all attempts to perform query failed. */
750 end_query(channel, query, query->error_status, NULL, 0);
753 void ares__send_query(ares_channel channel, struct query *query,
756 struct send_request *sendreq;
757 struct server_state *server;
760 server = &channel->servers[query->server];
761 if (query->using_tcp)
763 /* Make sure the TCP socket for this server is set up and queue
766 if (server->tcp_socket == ARES_SOCKET_BAD)
768 if (open_tcp_socket(channel, server) == -1)
770 skip_server(channel, query, query->server);
771 next_server(channel, query, now);
775 sendreq = ares_malloc(sizeof(struct send_request));
778 end_query(channel, query, ARES_ENOMEM, NULL, 0);
781 memset(sendreq, 0, sizeof(struct send_request));
782 /* To make the common case fast, we avoid copies by using the query's
783 * tcpbuf for as long as the query is alive. In the rare case where the
784 * query ends while it's queued for transmission, then we give the
785 * sendreq its own copy of the request packet and put it in
786 * sendreq->data_storage.
788 sendreq->data_storage = NULL;
789 sendreq->data = query->tcpbuf;
790 sendreq->len = query->tcplen;
791 sendreq->owner_query = query;
792 sendreq->next = NULL;
794 server->qtail->next = sendreq;
797 SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 1);
798 server->qhead = sendreq;
800 server->qtail = sendreq;
801 query->server_info[query->server].tcp_connection_generation =
802 server->tcp_connection_generation;
806 if (server->udp_socket == ARES_SOCKET_BAD)
808 if (open_udp_socket(channel, server) == -1)
810 skip_server(channel, query, query->server);
811 next_server(channel, query, now);
815 if (swrite(server->udp_socket, query->qbuf, query->qlen) == -1)
817 /* FIXME: Handle EAGAIN here since it likely can happen. */
818 skip_server(channel, query, query->server);
819 next_server(channel, query, now);
823 timeplus = channel->timeout << (query->try_count / channel->nservers);
824 timeplus = (timeplus * (9 + (rand () & 7))) / 16;
825 query->timeout = *now;
826 timeadd(&query->timeout, timeplus);
827 /* Keep track of queries bucketed by timeout, so we can process
828 * timeout events quickly.
830 ares__remove_from_list(&(query->queries_by_timeout));
831 ares__insert_in_list(
832 &(query->queries_by_timeout),
833 &(channel->queries_by_timeout[query->timeout.tv_sec %
834 ARES_TIMEOUT_TABLE_SIZE]));
836 /* Keep track of queries bucketed by server, so we can process server
839 ares__remove_from_list(&(query->queries_to_server));
840 ares__insert_in_list(&(query->queries_to_server),
841 &(server->queries_to_server));
845 * setsocknonblock sets the given socket to either blocking or non-blocking
846 * mode based on the 'nonblock' boolean argument. This function is highly
849 static int setsocknonblock(ares_socket_t sockfd, /* operate on this */
850 int nonblock /* TRUE or FALSE */)
852 #if defined(USE_BLOCKING_SOCKETS)
854 return 0; /* returns success */
856 #elif defined(HAVE_FCNTL_O_NONBLOCK)
858 /* most recent unix versions */
860 flags = fcntl(sockfd, F_GETFL, 0);
861 if (FALSE != nonblock)
862 return fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);
864 return fcntl(sockfd, F_SETFL, flags & (~O_NONBLOCK)); /* LCOV_EXCL_LINE */
866 #elif defined(HAVE_IOCTL_FIONBIO)
868 /* older unix versions */
869 int flags = nonblock ? 1 : 0;
870 return ioctl(sockfd, FIONBIO, &flags);
872 #elif defined(HAVE_IOCTLSOCKET_FIONBIO)
875 char flags = nonblock ? 1 : 0;
878 unsigned long flags = nonblock ? 1UL : 0UL;
880 return ioctlsocket(sockfd, FIONBIO, &flags);
882 #elif defined(HAVE_IOCTLSOCKET_CAMEL_FIONBIO)
885 long flags = nonblock ? 1L : 0L;
886 return IoctlSocket(sockfd, FIONBIO, flags);
888 #elif defined(HAVE_SETSOCKOPT_SO_NONBLOCK)
891 long b = nonblock ? 1L : 0L;
892 return setsockopt(sockfd, SOL_SOCKET, SO_NONBLOCK, &b, sizeof(b));
895 # error "no non-blocking method was found/used/set"
899 static int configure_socket(ares_socket_t s, int family, ares_channel channel)
903 struct sockaddr_in sa4;
904 struct sockaddr_in6 sa6;
907 (void)setsocknonblock(s, TRUE);
909 #if defined(FD_CLOEXEC) && !defined(MSDOS)
910 /* Configure the socket fd as close-on-exec. */
911 if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1)
912 return -1; /* LCOV_EXCL_LINE */
915 /* Set the socket's send and receive buffer sizes. */
916 if ((channel->socket_send_buffer_size > 0) &&
917 setsockopt(s, SOL_SOCKET, SO_SNDBUF,
918 (void *)&channel->socket_send_buffer_size,
919 sizeof(channel->socket_send_buffer_size)) == -1)
922 if ((channel->socket_receive_buffer_size > 0) &&
923 setsockopt(s, SOL_SOCKET, SO_RCVBUF,
924 (void *)&channel->socket_receive_buffer_size,
925 sizeof(channel->socket_receive_buffer_size)) == -1)
928 #ifdef SO_BINDTODEVICE
929 if (channel->local_dev_name[0]) {
930 if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
931 channel->local_dev_name, sizeof(channel->local_dev_name))) {
932 /* Only root can do this, and usually not fatal if it doesn't work, so */
933 /* just continue on. */
938 if (family == AF_INET) {
939 if (channel->local_ip4) {
940 memset(&local.sa4, 0, sizeof(local.sa4));
941 local.sa4.sin_family = AF_INET;
942 local.sa4.sin_addr.s_addr = htonl(channel->local_ip4);
943 if (bind(s, &local.sa, sizeof(local.sa4)) < 0)
947 else if (family == AF_INET6) {
948 if (memcmp(channel->local_ip6, &ares_in6addr_any,
949 sizeof(channel->local_ip6)) != 0) {
950 memset(&local.sa6, 0, sizeof(local.sa6));
951 local.sa6.sin6_family = AF_INET6;
952 memcpy(&local.sa6.sin6_addr, channel->local_ip6,
953 sizeof(channel->local_ip6));
954 if (bind(s, &local.sa, sizeof(local.sa6)) < 0)
962 static int open_tcp_socket(ares_channel channel, struct server_state *server)
966 ares_socklen_t salen;
968 struct sockaddr_in sa4;
969 struct sockaddr_in6 sa6;
973 switch (server->addr.family)
976 sa = (void *)&saddr.sa4;
977 salen = sizeof(saddr.sa4);
978 memset(sa, 0, salen);
979 saddr.sa4.sin_family = AF_INET;
980 if (server->addr.tcp_port) {
981 saddr.sa4.sin_port = aresx_sitous(server->addr.tcp_port);
983 saddr.sa4.sin_port = aresx_sitous(channel->tcp_port);
985 memcpy(&saddr.sa4.sin_addr, &server->addr.addrV4,
986 sizeof(server->addr.addrV4));
989 sa = (void *)&saddr.sa6;
990 salen = sizeof(saddr.sa6);
991 memset(sa, 0, salen);
992 saddr.sa6.sin6_family = AF_INET6;
993 if (server->addr.tcp_port) {
994 saddr.sa6.sin6_port = aresx_sitous(server->addr.tcp_port);
996 saddr.sa6.sin6_port = aresx_sitous(channel->tcp_port);
998 memcpy(&saddr.sa6.sin6_addr, &server->addr.addrV6,
999 sizeof(server->addr.addrV6));
1002 return -1; /* LCOV_EXCL_LINE */
1005 /* Acquire a socket. */
1006 s = socket(server->addr.family, SOCK_STREAM, 0);
1007 if (s == ARES_SOCKET_BAD)
1011 if (configure_socket(s, server->addr.family, channel) < 0)
1019 * Disable the Nagle algorithm (only relevant for TCP sockets, and thus not
1020 * in configure_socket). In general, in DNS lookups we're pretty much
1021 * interested in firing off a single request and then waiting for a reply,
1022 * so batching isn't very interesting.
1025 if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY,
1026 (void *)&opt, sizeof(opt)) == -1)
1033 if (channel->sock_config_cb)
1035 int err = channel->sock_config_cb(s, SOCK_STREAM,
1036 channel->sock_config_cb_data);
1044 /* Connect to the server. */
1045 if (connect(s, sa, salen) == -1)
1047 int err = SOCKERRNO;
1049 if (err != EINPROGRESS && err != EWOULDBLOCK)
1056 if (channel->sock_create_cb)
1058 int err = channel->sock_create_cb(s, SOCK_STREAM,
1059 channel->sock_create_cb_data);
1067 SOCK_STATE_CALLBACK(channel, s, 1, 0);
1068 server->tcp_buffer_pos = 0;
1069 server->tcp_socket = s;
1070 server->tcp_connection_generation = ++channel->tcp_connection_generation;
1074 static int open_udp_socket(ares_channel channel, struct server_state *server)
1077 ares_socklen_t salen;
1079 struct sockaddr_in sa4;
1080 struct sockaddr_in6 sa6;
1082 struct sockaddr *sa;
1084 switch (server->addr.family)
1087 sa = (void *)&saddr.sa4;
1088 salen = sizeof(saddr.sa4);
1089 memset(sa, 0, salen);
1090 saddr.sa4.sin_family = AF_INET;
1091 if (server->addr.udp_port) {
1092 saddr.sa4.sin_port = aresx_sitous(server->addr.udp_port);
1094 saddr.sa4.sin_port = aresx_sitous(channel->udp_port);
1096 memcpy(&saddr.sa4.sin_addr, &server->addr.addrV4,
1097 sizeof(server->addr.addrV4));
1100 sa = (void *)&saddr.sa6;
1101 salen = sizeof(saddr.sa6);
1102 memset(sa, 0, salen);
1103 saddr.sa6.sin6_family = AF_INET6;
1104 if (server->addr.udp_port) {
1105 saddr.sa6.sin6_port = aresx_sitous(server->addr.udp_port);
1107 saddr.sa6.sin6_port = aresx_sitous(channel->udp_port);
1109 memcpy(&saddr.sa6.sin6_addr, &server->addr.addrV6,
1110 sizeof(server->addr.addrV6));
1113 return -1; /* LCOV_EXCL_LINE */
1116 /* Acquire a socket. */
1117 s = socket(server->addr.family, SOCK_DGRAM, 0);
1118 if (s == ARES_SOCKET_BAD)
1121 /* Set the socket non-blocking. */
1122 if (configure_socket(s, server->addr.family, channel) < 0)
1128 if (channel->sock_config_cb)
1130 int err = channel->sock_config_cb(s, SOCK_DGRAM,
1131 channel->sock_config_cb_data);
1139 /* Connect to the server. */
1140 if (connect(s, sa, salen) == -1)
1142 int err = SOCKERRNO;
1144 if (err != EINPROGRESS && err != EWOULDBLOCK)
1151 if (channel->sock_create_cb)
1153 int err = channel->sock_create_cb(s, SOCK_DGRAM,
1154 channel->sock_create_cb_data);
1162 SOCK_STATE_CALLBACK(channel, s, 1, 0);
1164 server->udp_socket = s;
1168 static int same_questions(const unsigned char *qbuf, int qlen,
1169 const unsigned char *abuf, int alen)
1172 const unsigned char *p;
1181 if (qlen < HFIXEDSZ || alen < HFIXEDSZ)
1184 /* Extract qdcount from the request and reply buffers and compare them. */
1185 q.qdcount = DNS_HEADER_QDCOUNT(qbuf);
1186 a.qdcount = DNS_HEADER_QDCOUNT(abuf);
1187 if (q.qdcount != a.qdcount)
1190 /* For each question in qbuf, find it in abuf. */
1191 q.p = qbuf + HFIXEDSZ;
1192 for (i = 0; i < q.qdcount; i++)
1194 /* Decode the question in the query. */
1195 if (ares_expand_name(q.p, qbuf, qlen, &q.name, &q.namelen)
1199 if (q.p + QFIXEDSZ > qbuf + qlen)
1204 q.type = DNS_QUESTION_TYPE(q.p);
1205 q.dnsclass = DNS_QUESTION_CLASS(q.p);
1208 /* Search for this question in the answer. */
1209 a.p = abuf + HFIXEDSZ;
1210 for (j = 0; j < a.qdcount; j++)
1212 /* Decode the question in the answer. */
1213 if (ares_expand_name(a.p, abuf, alen, &a.name, &a.namelen)
1220 if (a.p + QFIXEDSZ > abuf + alen)
1226 a.type = DNS_QUESTION_TYPE(a.p);
1227 a.dnsclass = DNS_QUESTION_CLASS(a.p);
1230 /* Compare the decoded questions. */
1231 if (strcasecmp(q.name, a.name) == 0 && q.type == a.type
1232 && q.dnsclass == a.dnsclass)
1247 static int same_address(struct sockaddr *sa, struct ares_addr *aa)
1252 if (sa->sa_family == aa->family)
1257 addr1 = &aa->addrV4;
1258 addr2 = &((struct sockaddr_in *)sa)->sin_addr;
1259 if (memcmp(addr1, addr2, sizeof(aa->addrV4)) == 0)
1260 return 1; /* match */
1263 addr1 = &aa->addrV6;
1264 addr2 = &((struct sockaddr_in6 *)sa)->sin6_addr;
1265 if (memcmp(addr1, addr2, sizeof(aa->addrV6)) == 0)
1266 return 1; /* match */
1269 break; /* LCOV_EXCL_LINE */
1272 return 0; /* different */
1275 static void end_query (ares_channel channel, struct query *query, int status,
1276 unsigned char *abuf, int alen)
1280 /* First we check to see if this query ended while one of our send
1281 * queues still has pointers to it.
1283 for (i = 0; i < channel->nservers; i++)
1285 struct server_state *server = &channel->servers[i];
1286 struct send_request *sendreq;
1287 for (sendreq = server->qhead; sendreq; sendreq = sendreq->next)
1288 if (sendreq->owner_query == query)
1290 sendreq->owner_query = NULL;
1291 assert(sendreq->data_storage == NULL);
1292 if (status == ARES_SUCCESS)
1294 /* We got a reply for this query, but this queued sendreq
1295 * points into this soon-to-be-gone query's tcpbuf. Probably
1296 * this means we timed out and queued the query for
1297 * retransmission, then received a response before actually
1298 * retransmitting. This is perfectly fine, so we want to keep
1299 * the connection running smoothly if we can. But in the worst
1300 * case we may have sent only some prefix of the query, with
1301 * some suffix of the query left to send. Also, the buffer may
1302 * be queued on multiple queues. To prevent dangling pointers
1303 * to the query's tcpbuf and handle these cases, we just give
1304 * such sendreqs their own copy of the query packet.
1306 sendreq->data_storage = ares_malloc(sendreq->len);
1307 if (sendreq->data_storage != NULL)
1309 memcpy(sendreq->data_storage, sendreq->data, sendreq->len);
1310 sendreq->data = sendreq->data_storage;
1313 if ((status != ARES_SUCCESS) || (sendreq->data_storage == NULL))
1315 /* We encountered an error (probably a timeout, suggesting the
1316 * DNS server we're talking to is probably unreachable,
1317 * wedged, or severely overloaded) or we couldn't copy the
1318 * request, so mark the connection as broken. When we get to
1319 * process_broken_connections() we'll close the connection and
1320 * try to re-send requests to another server.
1322 server->is_broken = 1;
1323 /* Just to be paranoid, zero out this sendreq... */
1324 sendreq->data = NULL;
1330 /* Invoke the callback */
1331 query->callback(query->arg, status, query->timeouts, abuf, alen);
1332 ares__free_query(query);
1334 /* Simple cleanup policy: if no queries are remaining, close all network
1335 * sockets unless STAYOPEN is set.
1337 if (!(channel->flags & ARES_FLAG_STAYOPEN) &&
1338 ares__is_list_empty(&(channel->all_queries)))
1340 for (i = 0; i < channel->nservers; i++)
1341 ares__close_sockets(channel, &channel->servers[i]);
1345 void ares__free_query(struct query *query)
1347 /* Remove the query from all the lists in which it is linked */
1348 ares__remove_from_list(&(query->queries_by_qid));
1349 ares__remove_from_list(&(query->queries_by_timeout));
1350 ares__remove_from_list(&(query->queries_to_server));
1351 ares__remove_from_list(&(query->all_queries));
1352 /* Zero out some important stuff, to help catch bugs */
1353 query->callback = NULL;
1355 /* Deallocate the memory associated with the query */
1356 ares_free(query->tcpbuf);
1357 ares_free(query->server_info);