1 /* dnsmasq is Copyright (c) 2000-2022 Simon Kelley
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 dated June, 1991, or
6 (at your option) version 3 dated 29 June, 2007.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 static struct frec *get_new_frec(time_t now, struct server *serv, int force);
20 static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp);
21 static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask);
23 static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header);
26 static unsigned short get_id(void);
27 static void free_frec(struct frec *f);
28 static void query_full(time_t now, char *domain);
30 static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status);
32 /* Send a UDP packet with its source address set as "source"
33 unless nowild is true, when we just send it with the kernel default */
34 int send_from(int fd, int nowild, char *packet, size_t len,
35 union mysockaddr *to, union all_addr *source,
41 struct cmsghdr align; /* this ensures alignment */
42 #if defined(HAVE_LINUX_NETWORK)
43 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
44 #elif defined(IP_SENDSRCADDR)
45 char control[CMSG_SPACE(sizeof(struct in_addr))];
47 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
50 iov[0].iov_base = packet;
53 msg.msg_control = NULL;
54 msg.msg_controllen = 0;
57 msg.msg_namelen = sa_len(to);
63 struct cmsghdr *cmptr;
64 msg.msg_control = &control_u;
65 msg.msg_controllen = sizeof(control_u);
66 cmptr = CMSG_FIRSTHDR(&msg);
68 if (to->sa.sa_family == AF_INET)
70 #if defined(HAVE_LINUX_NETWORK)
73 p.ipi_spec_dst = source->addr4;
74 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
75 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
76 cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
77 cmptr->cmsg_level = IPPROTO_IP;
78 cmptr->cmsg_type = IP_PKTINFO;
79 #elif defined(IP_SENDSRCADDR)
80 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
81 memcpy(CMSG_DATA(cmptr), &(source->addr4), sizeof(source->addr4));
82 cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
83 cmptr->cmsg_level = IPPROTO_IP;
84 cmptr->cmsg_type = IP_SENDSRCADDR;
90 p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
91 p.ipi6_addr = source->addr6;
92 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
93 memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
94 cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
95 cmptr->cmsg_type = daemon->v6pktinfo;
96 cmptr->cmsg_level = IPPROTO_IPV6;
100 while (retry_send(sendmsg(fd, &msg, 0)));
104 #ifdef HAVE_LINUX_NETWORK
105 /* If interface is still in DAD, EINVAL results - ignore that. */
107 my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
115 #ifdef HAVE_CONNTRACK
116 static void set_outgoing_mark(struct frec *forward, int fd)
118 /* Copy connection mark of incoming query to outgoing connection. */
120 if (get_incoming_mark(&forward->frec_src.source, &forward->frec_src.dest, 0, &mark))
121 setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
125 static void log_query_mysockaddr(unsigned int flags, char *name, union mysockaddr *addr, char *arg, unsigned short type)
127 if (addr->sa.sa_family == AF_INET)
129 if (flags & F_SERVER)
130 type = ntohs(addr->in.sin_port);
131 log_query(flags | F_IPV4, name, (union all_addr *)&addr->in.sin_addr, arg, type);
135 if (flags & F_SERVER)
136 type = ntohs(addr->in6.sin6_port);
137 log_query(flags | F_IPV6, name, (union all_addr *)&addr->in6.sin6_addr, arg, type);
141 static void server_send(struct server *server, int fd,
142 const void *header, size_t plen, int flags)
144 while (retry_send(sendto(fd, header, plen, flags,
146 sa_len(&server->addr))));
149 static int domain_no_rebind(char *domain)
151 struct rebind_domain *rbd;
152 size_t tlen, dlen = strlen(domain);
153 char *dots = strchr(domain, '.');
155 /* Match whole labels only. Empty domain matches no dots (any single label) */
156 for (rbd = daemon->no_rebind; rbd; rbd = rbd->next)
158 if (dlen >= (tlen = strlen(rbd->domain)) &&
159 hostname_isequal(rbd->domain, &domain[dlen - tlen]) &&
160 (dlen == tlen || domain[dlen - tlen - 1] == '.'))
163 if (tlen == 0 && !dots)
170 static int forward_query(int udpfd, union mysockaddr *udpaddr,
171 union all_addr *dst_addr, unsigned int dst_iface,
172 struct dns_header *header, size_t plen, char *limit, time_t now,
173 struct frec *forward, int ad_reqd, int do_bit, int fast_retry)
175 unsigned int flags = 0;
176 unsigned int fwd_flags = 0;
177 int is_dnssec = forward && (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY));
178 struct server *master;
179 void *hash = hash_questions(header, plen, daemon->namebuff);
180 unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
181 unsigned char *oph = find_pseudoheader(header, plen, NULL, NULL, NULL, NULL);
182 int old_src = 0, old_reply = 0;
183 int first, last, start = 0;
184 int cacheable, forwarded = 0;
186 unsigned char *pheader;
190 if (header->hb4 & HB4_CD)
191 fwd_flags |= FREC_CHECKING_DISABLED;
193 fwd_flags |= FREC_AD_QUESTION;
195 fwd_flags |= FREC_HAS_PHEADER;
198 fwd_flags |= FREC_DO_QUESTION;
201 /* Check for retry on existing query.
202 FREC_DNSKEY and FREC_DS_QUERY are never set in flags, so the test below
203 ensures that no frec created for internal DNSSEC query can be returned here.
205 Similarly FREC_NO_CACHE is never set in flags, so a query which is
206 contigent on a particular source address EDNS0 option will never be matched. */
212 else if ((forward = lookup_frec_by_query(hash, fwd_flags,
213 FREC_CHECKING_DISABLED | FREC_AD_QUESTION | FREC_DO_QUESTION |
214 FREC_HAS_PHEADER | FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_NO_CACHE)))
216 struct frec_src *src;
218 for (src = &forward->frec_src; src; src = src->next)
219 if (src->orig_id == ntohs(header->id) &&
220 sockaddr_isequal(&src->source, udpaddr))
226 /* If a query is retried, use the log_id for the retry when logging the answer. */
227 src->log_id = daemon->log_id;
231 /* Existing query, but from new source, just add this
232 client to the list that will get the reply.*/
234 /* Note whine_malloc() zeros memory. */
235 if (!daemon->free_frec_src &&
236 daemon->frec_src_count < daemon->ftabsize &&
237 (daemon->free_frec_src = whine_malloc(sizeof(struct frec_src))))
239 daemon->frec_src_count++;
240 daemon->free_frec_src->next = NULL;
243 /* If we've been spammed with many duplicates, return REFUSED. */
244 if (!daemon->free_frec_src)
246 query_full(now, NULL);
247 /* This is tricky; if we're blasted with the same query
248 over and over, we'll end up taking this path each time
249 and never resetting until the frec gets deleted by
250 aging followed by the receipt of a different query. This
251 is a bit of a DoS vuln. Avoid by explicitly deleting the
252 frec once it expires. */
253 if (difftime(now, forward->time) >= TIMEOUT)
258 src = daemon->free_frec_src;
259 daemon->free_frec_src = src->next;
260 src->next = forward->frec_src.next;
261 forward->frec_src.next = src;
262 src->orig_id = ntohs(header->id);
263 src->source = *udpaddr;
264 src->dest = *dst_addr;
265 src->log_id = daemon->log_id;
266 src->iface = dst_iface;
269 /* closely spaced identical queries cannot be a try and a retry, so
270 it's safe to wait for the reply from the first without
271 forwarding the second. */
272 if (difftime(now, forward->time) < 2)
280 /* If the query is malformed, we can't forward it because
281 we can't get a reliable hash to recognise the answer. */
285 ede = EDE_INVALID_DATA;
289 if (lookup_domain(daemon->namebuff, gotname, &first, &last))
290 flags = is_local_answer(now, first, daemon->namebuff);
293 /* no available server. */
298 /* don't forward A or AAAA queries for simple names, except the empty name */
300 option_bool(OPT_NODOTS_LOCAL) &&
301 (gotname & (F_IPV4 | F_IPV6)) &&
302 !strchr(daemon->namebuff, '.') &&
303 strlen(daemon->namebuff) != 0)
304 flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
306 /* Configured answer. */
307 if (flags || ede == EDE_NOT_READY)
310 master = daemon->serverarray[first];
312 if (!(forward = get_new_frec(now, master, 0)))
314 /* table full - flags == 0, return REFUSED */
316 /* Keep copy of query if we're doing fast retry. */
317 if (daemon->fast_retry_time != 0)
319 forward->stash = blockdata_alloc((char *)header, plen);
320 forward->stash_len = plen;
323 forward->frec_src.log_id = daemon->log_id;
324 forward->frec_src.source = *udpaddr;
325 forward->frec_src.orig_id = ntohs(header->id);
326 forward->frec_src.dest = *dst_addr;
327 forward->frec_src.iface = dst_iface;
328 forward->frec_src.next = NULL;
329 forward->frec_src.fd = udpfd;
330 forward->new_id = get_id();
331 memcpy(forward->hash, hash, HASH_SIZE);
332 forward->forwardall = 0;
333 forward->flags = fwd_flags;
334 if (domain_no_rebind(daemon->namebuff))
335 forward->flags |= FREC_NOREBIND;
336 if (header->hb4 & HB4_CD)
337 forward->flags |= FREC_CHECKING_DISABLED;
339 forward->flags |= FREC_AD_QUESTION;
341 forward->work_counter = DNSSEC_WORK;
343 forward->flags |= FREC_DO_QUESTION;
348 if (option_bool(OPT_ALL_SERVERS))
349 forward->forwardall = 1;
351 if (!option_bool(OPT_ORDER))
353 if (master->forwardcount++ > FORWARD_TEST ||
354 difftime(now, master->forwardtime) > FORWARD_TIME ||
355 master->last_server == -1)
357 master->forwardtime = now;
358 master->forwardcount = 0;
359 forward->forwardall = 1;
362 start = master->last_server;
368 /* If we've already got an answer to this query, but we're awaiting keys for validation,
369 there's no point retrying the query, retry the key query instead...... */
370 while (forward->blocking_query)
371 forward = forward->blocking_query;
373 if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
376 unsigned char *pheader;
378 /* log_id should match previous DNSSEC query. */
379 daemon->log_display_id = forward->frec_src.log_id;
381 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
382 plen = forward->stash_len;
383 /* get query for logging. */
384 extract_request(header, plen, daemon->namebuff, NULL);
386 if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
387 PUTSHORT(SAFE_PKTSZ, pheader);
389 /* Find suitable servers: should never fail. */
390 if (!filter_servers(forward->sentto->arrayposn, F_DNSSECOK, &first, &last))
394 forward->forwardall = 1;
399 /* retry on existing query, from original source. Send to all available servers */
400 if (udpfd == -1 && !fast_retry)
401 forward->sentto->failed_queries++;
403 forward->sentto->retrys++;
405 if (!filter_servers(forward->sentto->arrayposn, F_SERVER, &first, &last))
408 master = daemon->serverarray[first];
410 /* Forward to all available servers on retry of query from same host. */
411 if (!option_bool(OPT_ORDER) && old_src && !fast_retry)
412 forward->forwardall = 1;
415 start = forward->sentto->arrayposn;
417 if (option_bool(OPT_ORDER) && !fast_retry)
419 /* In strict order mode, there must be a server later in the list
420 left to send to, otherwise without the forwardall mechanism,
421 code further on will cycle around the list forwever if they
422 all return REFUSED. If at the last, give up.
423 Note that we can get here EITHER because a client retried,
424 or an upstream server returned REFUSED. The above only
425 applied in the later case. For client retries,
426 keep trying the last server.. */
438 /* If we didn't get an answer advertising a maximal packet in EDNS,
439 fall back to 1280, which should work everywhere on IPv6.
440 If that generates an answer, it will become the new default
442 forward->flags |= FREC_TEST_PKTSZ;
445 /* We may be resending a DNSSEC query here, for which the below processing is not necessary. */
448 header->id = htons(forward->new_id);
450 plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &cacheable);
453 forward->flags |= FREC_NO_CACHE;
456 if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
458 plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ);
460 /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
461 this allows it to select auth servers when one is returning bad data. */
462 if (option_bool(OPT_DNSSEC_DEBUG))
463 header->hb4 |= HB4_CD;
468 if (find_pseudoheader(header, plen, &edns0_len, &pheader, NULL, NULL))
470 /* If there wasn't a PH before, and there is now, we added it. */
472 forward->flags |= FREC_ADDED_PHEADER;
474 /* If we're sending an EDNS0 with any options, we can't recreate the query from a reply. */
476 forward->flags |= FREC_HAS_EXTRADATA;
478 /* Reduce udp size on retransmits. */
479 if (forward->flags & FREC_TEST_PKTSZ)
480 PUTSHORT(SAFE_PKTSZ, pheader);
484 if (forward->forwardall)
489 /* check for send errors here (no route to host)
490 if we fail to send to all nameservers, send back an error
491 packet straight away (helps modem users when offline) */
496 struct server *srv = daemon->serverarray[start];
498 if ((fd = allocate_rfd(&forward->rfds, srv)) != -1)
501 #ifdef HAVE_CONNTRACK
502 /* Copy connection mark of incoming query to outgoing connection. */
503 if (option_bool(OPT_CONNTRACK))
504 set_outgoing_mark(forward, fd);
508 if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
510 /* Difficult one here. If our client didn't send EDNS0, we will have set the UDP
511 packet size to 512. But that won't provide space for the RRSIGS in many cases.
512 The RRSIGS will be stripped out before the answer goes back, so the packet should
513 shrink again. So, if we added a do-bit, bump the udp packet size to the value
514 known to be OK for this server. We check returned size after stripping and set
515 the truncated bit if it's still too big. */
516 unsigned char *pheader;
518 if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
519 PUTSHORT(srv->edns_pktsz, pheader);
523 if (retry_send(sendto(fd, (char *)header, plen, 0,
525 sa_len(&srv->addr))))
531 dump_packet_udp(DUMP_UP_QUERY, (void *)header, plen, NULL, &srv->addr, fd);
534 /* Keep info in case we want to re-send this packet */
535 daemon->srv_save = srv;
536 daemon->packet_len = plen;
537 daemon->fd_save = fd;
539 if (!(forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)))
542 strcpy(daemon->namebuff, "query");
543 log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
544 &srv->addr, NULL, 0);
548 log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->namebuff, &srv->addr,
549 (forward->flags & FREC_DNSKEY_QUERY) ? "dnssec-retry[DNSKEY]" : "dnssec-retry[DS]", 0);
554 forward->sentto = srv;
555 if (!forward->forwardall)
557 forward->forwardall++;
565 if (forwarded || is_dnssec)
567 forward->forward_timestamp = dnsmasq_milliseconds();
571 /* could not send on, prepare to return */
572 header->id = htons(forward->frec_src.orig_id);
573 free_frec(forward); /* cancel */
579 if (!(plen = make_local_answer(flags, gotname, plen, header, daemon->namebuff, limit, first, last, ede)))
584 u16 swap = htons((u16)ede);
586 if (ede != EDE_UNSET)
587 plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
589 plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
592 #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
593 if (option_bool(OPT_CMARK_ALST_EN))
596 int have_mark = get_incoming_mark(udpaddr, dst_addr, /* istcp: */ 0, &mark);
597 if (have_mark && ((u32)mark & daemon->allowlist_mask))
598 report_addresses(header, plen, mark);
602 send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
608 /* Check if any frecs need to do a retry, and action that if so.
609 Return time in milliseconds until he next retry will be required,
611 int fast_retry(time_t now)
616 if (daemon->fast_retry_time != 0)
618 u32 millis = dnsmasq_milliseconds();
620 for (f = daemon->frec_list; f; f = f->next)
621 if (f->sentto && f->stash && difftime(now, f->time) < daemon->fast_retry_timeout)
624 if (f->blocking_query)
627 /* t is milliseconds since last query sent. */
628 int to_run, t = (int)(millis - f->forward_timestamp);
630 if (t < f->forward_delay)
631 to_run = f->forward_delay - t;
634 unsigned char *udpsz;
635 unsigned short udp_size = PACKETSZ; /* default if no EDNS0 */
636 struct dns_header *header = (struct dns_header *)daemon->packet;
638 /* packet buffer overwritten */
639 daemon->srv_save = NULL;
641 blockdata_retrieve(f->stash, f->stash_len, (void *)header);
643 /* UDP size already set in saved query. */
644 if (find_pseudoheader(header, f->stash_len, NULL, &udpsz, NULL, NULL))
645 GETSHORT(udp_size, udpsz);
647 daemon->log_display_id = f->frec_src.log_id;
649 forward_query(-1, NULL, NULL, 0, header, f->stash_len, ((char *) header) + udp_size, now, f,
650 f->flags & FREC_AD_QUESTION, f->flags & FREC_DO_QUESTION, 1);
652 to_run = f->forward_delay = 2 * f->forward_delay;
655 if (ret == -1 || ret > to_run)
663 static struct ipsets *domain_find_sets(struct ipsets *setlist, const char *domain) {
664 /* Similar algorithm to search_servers. */
665 struct ipsets *ipset_pos, *ret = NULL;
666 unsigned int namelen = strlen(domain);
667 unsigned int matchlen = 0;
668 for (ipset_pos = setlist; ipset_pos; ipset_pos = ipset_pos->next)
670 unsigned int domainlen = strlen(ipset_pos->domain);
671 const char *matchstart = domain + namelen - domainlen;
672 if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
673 (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
674 domainlen >= matchlen)
676 matchlen = domainlen;
684 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind,
685 int no_cache, int cache_secure, int bogusanswer, int ad_reqd, int do_bit, int added_pheader,
686 union mysockaddr *query_source, unsigned char *limit, int ede)
688 unsigned char *pheader, *sizep;
689 struct ipsets *ipsets = NULL, *nftsets = NULL;
690 int munged = 0, is_sign;
691 unsigned int rcode = RCODE(header);
699 if (daemon->ipsets && extract_request(header, n, daemon->namebuff, NULL))
700 ipsets = domain_find_sets(daemon->ipsets, daemon->namebuff);
704 if (daemon->nftsets && extract_request(header, n, daemon->namebuff, NULL))
705 nftsets = domain_find_sets(daemon->nftsets, daemon->namebuff);
708 if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign, NULL)))
710 /* Get extended RCODE. */
711 rcode |= sizep[2] << 4;
713 if (option_bool(OPT_CLIENT_SUBNET) && !check_source(header, plen, pheader, query_source))
715 my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
723 /* client didn't send EDNS0, we added one, strip it off before returning answer. */
724 n = rrfilter(header, n, RRFILTER_EDNS0);
729 /* If upstream is advertising a larger UDP packet size
730 than we allow, trim it so that we don't get overlarge
731 requests for the client. We can't do this for signed packets. */
732 unsigned short udpsz;
733 GETSHORT(udpsz, sizep);
734 if (udpsz > daemon->edns_pktsz)
737 PUTSHORT(daemon->edns_pktsz, sizep);
741 /* If the client didn't set the do bit, but we did, reset it. */
742 if (option_bool(OPT_DNSSEC_VALID) && !do_bit)
744 unsigned short flags;
745 sizep += 2; /* skip RCODE */
746 GETSHORT(flags, sizep);
749 PUTSHORT(flags, sizep);
756 /* RFC 4035 sect 4.6 para 3 */
757 if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
758 header->hb4 &= ~HB4_AD;
760 header->hb4 |= HB4_RA; /* recursion if available */
762 if (OPCODE(header) != QUERY)
763 return resize_packet(header, n, pheader, plen);
765 if (rcode != NOERROR && rcode != NXDOMAIN)
770 log_query(F_UPSTREAM | F_RCODE, "error", &a, NULL, 0);
772 return resize_packet(header, n, pheader, plen);
775 /* Complain loudly if the upstream server is non-recursive. */
776 if (!(header->hb4 & HB4_RA) && rcode == NOERROR &&
777 server && !(server->flags & SERV_WARNED_RECURSIVE))
779 (void)prettyprint_addr(&server->addr, daemon->namebuff);
780 my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
781 if (!option_bool(OPT_LOG))
782 server->flags |= SERV_WARNED_RECURSIVE;
785 if (daemon->bogus_addr && rcode != NXDOMAIN &&
786 check_for_bogus_wildcard(header, n, daemon->namebuff, now))
789 SET_RCODE(header, NXDOMAIN);
790 header->hb3 &= ~HB3_AA;
798 if (rcode == NXDOMAIN &&
799 extract_request(header, n, daemon->namebuff, NULL))
801 if (check_for_local_domain(daemon->namebuff, now) ||
802 lookup_domain(daemon->namebuff, F_CONFIG, NULL, NULL))
804 /* if we forwarded a query for a locally known name (because it was for
805 an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
806 since we know that the domain exists, even if upstream doesn't */
808 header->hb3 |= HB3_AA;
809 SET_RCODE(header, NOERROR);
814 /* Before extract_addresses() */
815 if (rcode == NOERROR)
817 if (option_bool(OPT_FILTER_A))
818 n = rrfilter(header, n, RRFILTER_A);
820 if (option_bool(OPT_FILTER_AAAA))
821 n = rrfilter(header, n, RRFILTER_AAAA);
824 switch (extract_addresses(header, n, daemon->namebuff, now, ipsets, nftsets, is_sign, check_rebind, no_cache, cache_secure, &doctored))
827 my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
833 /* extract_addresses() found a malformed answer. */
836 SET_RCODE(header, SERVFAIL);
847 if (bogusanswer && !(header->hb4 & HB4_CD) && !option_bool(OPT_DNSSEC_DEBUG))
849 /* Bogus reply, turn into SERVFAIL */
850 SET_RCODE(header, SERVFAIL);
854 if (option_bool(OPT_DNSSEC_VALID))
856 header->hb4 &= ~HB4_AD;
858 if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
859 header->hb4 |= HB4_AD;
861 /* If the requestor didn't set the DO bit, don't return DNSSEC info. */
863 n = rrfilter(header, n, RRFILTER_DNSSEC);
867 /* do this after extract_addresses. Ensure NODATA reply and remove
871 header->ancount = htons(0);
872 header->nscount = htons(0);
873 header->arcount = htons(0);
874 header->hb3 &= ~HB3_TC;
877 /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
878 sections of the packet. Find the new length here and put back pseudoheader
879 if it was removed. */
880 n = resize_packet(header, n, pheader, plen);
882 if (pheader && ede != EDE_UNSET)
884 u16 swap = htons((u16)ede);
885 n = add_pseudoheader(header, n, limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 1);
888 if (RCODE(header) == NXDOMAIN)
889 server->nxdomain_replies++;
895 static void dnssec_validate(struct frec *forward, struct dns_header *header,
896 ssize_t plen, int status, time_t now)
898 daemon->log_display_id = forward->frec_src.log_id;
900 /* We've had a reply already, which we're validating. Ignore this duplicate */
901 if (forward->blocking_query)
904 /* Truncated answer can't be validated.
905 If this is an answer to a DNSSEC-generated query, we still
906 need to get the client to retry over TCP, so return
907 an answer with the TC bit set, even if the actual answer fits.
909 if (header->hb3 & HB3_TC)
910 status = STAT_TRUNCATED;
912 /* If all replies to a query are REFUSED, give up. */
913 if (RCODE(header) == REFUSED)
914 status = STAT_ABANDONED;
916 /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
917 would invite infinite loops, since the answers to DNSKEY and DS queries
918 will not be cached, so they'll be repeated. */
919 if (!STAT_ISEQUAL(status, STAT_BOGUS) && !STAT_ISEQUAL(status, STAT_TRUNCATED) && !STAT_ISEQUAL(status, STAT_ABANDONED))
921 if (forward->flags & FREC_DNSKEY_QUERY)
922 status = dnssec_validate_by_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class);
923 else if (forward->flags & FREC_DS_QUERY)
924 status = dnssec_validate_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class);
926 status = dnssec_validate_reply(now, header, plen, daemon->namebuff, daemon->keyname, &forward->class,
927 !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC),
930 if (STAT_ISEQUAL(status, STAT_BOGUS))
931 dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_BOGUS : DUMP_BOGUS,
932 header, (size_t)plen, &forward->sentto->addr, NULL, -daemon->port);
936 /* Can't validate, as we're missing key data. Put this
937 answer aside, whilst we get that. */
938 if (STAT_ISEQUAL(status, STAT_NEED_DS) || STAT_ISEQUAL(status, STAT_NEED_KEY))
940 struct frec *new = NULL;
941 struct blockdata *stash;
943 /* Now save reply pending receipt of key data */
944 if ((stash = blockdata_alloc((char *)header, plen)))
946 /* validate routines leave name of required record in daemon->keyname */
947 unsigned int flags = STAT_ISEQUAL(status, STAT_NEED_KEY) ? FREC_DNSKEY_QUERY : FREC_DS_QUERY;
949 if ((new = lookup_frec_dnssec(daemon->keyname, forward->class, flags, header)))
951 /* This is tricky; it detects loops in the dependency
952 graph for DNSSEC validation, say validating A requires DS B
953 and validating DS B requires DNSKEY C and validating DNSKEY C requires DS B.
954 This should never happen in correctly signed records, but it's
955 likely the case that sufficiently broken ones can cause our validation
956 code requests to exhibit cycles. The result is that the ->blocking_query list
957 can form a cycle, and under certain circumstances that can lock us in
958 an infinite loop. Here we transform the situation into ABANDONED. */
960 for (f = new; f; f = f->blocking_query)
966 forward->next_dependent = new->dependent;
967 new->dependent = forward;
968 /* Make consistent, only replace query copy with unvalidated answer
969 when we set ->blocking_query. */
971 blockdata_free(forward->stash);
972 forward->blocking_query = new;
973 forward->stash_len = plen;
974 forward->stash = stash;
980 struct server *server;
985 struct randfd_list *rfds = NULL;
987 /* Find the original query that started it all.... */
988 for (orig = forward; orig->dependent; orig = orig->dependent);
990 /* Make sure we don't expire and free the orig frec during the
991 allocation of a new one: third arg of get_new_frec() does that. */
992 if ((serverind = dnssec_server(forward->sentto, daemon->keyname, NULL, NULL)) != -1 &&
993 (server = daemon->serverarray[serverind]) &&
994 (nn = dnssec_generate_query(header, ((unsigned char *) header) + server->edns_pktsz,
995 daemon->keyname, forward->class,
996 STAT_ISEQUAL(status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz)) &&
997 (hash = hash_questions(header, nn, daemon->namebuff)) &&
998 --orig->work_counter != 0 &&
999 (fd = allocate_rfd(&rfds, server)) != -1 &&
1000 (new = get_new_frec(now, server, 1)))
1002 struct frec *next = new->next;
1004 *new = *forward; /* copy everything, then overwrite */
1006 new->blocking_query = NULL;
1008 new->frec_src.log_id = daemon->log_display_id = ++daemon->log_id;
1009 new->sentto = server;
1011 new->frec_src.next = NULL;
1012 new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_HAS_EXTRADATA);
1013 new->flags |= flags;
1014 new->forwardall = 0;
1016 forward->next_dependent = NULL;
1017 new->dependent = forward; /* to find query awaiting new one. */
1019 /* Make consistent, only replace query copy with unvalidated answer
1020 when we set ->blocking_query. */
1021 forward->blocking_query = new;
1023 blockdata_free(forward->stash);
1024 forward->stash_len = plen;
1025 forward->stash = stash;
1027 memcpy(new->hash, hash, HASH_SIZE);
1028 new->new_id = get_id();
1029 header->id = htons(new->new_id);
1030 /* Save query for retransmission and de-dup */
1031 new->stash = blockdata_alloc((char *)header, nn);
1032 new->stash_len = nn;
1033 if (daemon->fast_retry_time != 0)
1034 new->forward_timestamp = dnsmasq_milliseconds();
1036 /* Don't resend this. */
1037 daemon->srv_save = NULL;
1039 #ifdef HAVE_CONNTRACK
1040 if (option_bool(OPT_CONNTRACK))
1041 set_outgoing_mark(orig, fd);
1044 server_send(server, fd, header, nn, 0);
1046 #ifdef HAVE_DUMPFILE
1047 dump_packet_udp(DUMP_SEC_QUERY, (void *)header, (size_t)nn, NULL, &server->addr, fd);
1049 log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->keyname, &server->addr,
1050 STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
1054 free_rfds(&rfds); /* error unwind */
1057 blockdata_free(stash); /* don't leak this on failure. */
1060 /* sending DNSSEC query failed or loop detected. */
1061 status = STAT_ABANDONED;
1064 /* Validated original answer, all done. */
1065 if (!forward->dependent)
1066 return_reply(now, forward, header, plen, status);
1069 /* validated subsidiary query/queries, (and cached result)
1070 pop that and return to the previous query/queries we were working on. */
1071 struct frec *prev, *nxt = forward->dependent;
1075 while ((prev = nxt))
1077 /* ->next_dependent will have changed after return from recursive call below. */
1078 nxt = prev->next_dependent;
1079 prev->blocking_query = NULL; /* already gone */
1080 blockdata_retrieve(prev->stash, prev->stash_len, (void *)header);
1081 dnssec_validate(prev, header, prev->stash_len, status, now);
1087 /* sets new last_server */
1088 void reply_query(int fd, time_t now)
1090 /* packet from peer server, extract data for cache, and send to
1091 original requester */
1092 struct dns_header *header;
1093 union mysockaddr serveraddr;
1094 struct frec *forward;
1095 socklen_t addrlen = sizeof(serveraddr);
1096 ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
1097 struct server *server;
1101 /* packet buffer overwritten */
1102 daemon->srv_save = NULL;
1104 /* Determine the address of the server replying so that we can mark that as good */
1105 if (serveraddr.sa.sa_family == AF_INET6)
1106 serveraddr.in6.sin6_flowinfo = 0;
1108 header = (struct dns_header *)daemon->packet;
1110 if (n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR))
1113 hash = hash_questions(header, n, daemon->namebuff);
1115 if (!(forward = lookup_frec(ntohs(header->id), fd, hash, &first, &last)))
1118 /* spoof check: answer must come from known server, also
1119 we may have sent the same query to multiple servers from
1120 the same local socket, and would like to know which one has answered. */
1121 for (c = first; c != last; c++)
1122 if (sockaddr_isequal(&daemon->serverarray[c]->addr, &serveraddr))
1128 server = daemon->serverarray[c];
1130 if (RCODE(header) != REFUSED)
1131 daemon->serverarray[first]->last_server = c;
1132 else if (daemon->serverarray[first]->last_server == c)
1133 daemon->serverarray[first]->last_server = -1;
1135 /* If sufficient time has elapsed, try and expand UDP buffer size again. */
1136 if (difftime(now, server->pktsz_reduced) > UDP_TEST_TIME)
1137 server->edns_pktsz = daemon->edns_pktsz;
1139 /* log_query gets called indirectly all over the place, so
1140 pass these in global variables - sorry. */
1141 daemon->log_display_id = forward->frec_src.log_id;
1142 daemon->log_source_addr = &forward->frec_src.source;
1144 #ifdef HAVE_DUMPFILE
1145 dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_REPLY : DUMP_UP_REPLY,
1146 (void *)header, n, &serveraddr, NULL, fd);
1149 if (daemon->ignore_addr && RCODE(header) == NOERROR &&
1150 check_for_ignored_address(header, n))
1153 /* Note: if we send extra options in the EDNS0 header, we can't recreate
1154 the query from the reply. */
1155 if ((RCODE(header) == REFUSED || RCODE(header) == SERVFAIL) &&
1156 forward->forwardall == 0 &&
1157 !(forward->flags & FREC_HAS_EXTRADATA))
1158 /* for broken servers, attempt to send to another one. */
1160 unsigned char *pheader, *udpsz;
1161 unsigned short udp_size = PACKETSZ; /* default if no EDNS0 */
1167 /* The query MAY have got a good answer, and be awaiting
1168 the results of further queries, in which case
1169 The Stash contains something else and we don't need to retry anyway. */
1170 if (forward->blocking_query)
1173 if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
1175 /* DNSSEC queries have a copy of the original query stashed. */
1176 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
1177 nn = forward->stash_len;
1178 udp_size = daemon->edns_pktsz;
1183 /* in fast retry mode, we have a copy of the query. */
1184 if (daemon->fast_retry_time != 0 && forward->stash)
1186 blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
1187 nn = forward->stash_len;
1188 /* UDP size already set in saved query. */
1189 if (find_pseudoheader(header, (size_t)n, NULL, &udpsz, NULL, NULL))
1190 GETSHORT(udp_size, udpsz);
1194 /* recreate query from reply */
1195 if ((pheader = find_pseudoheader(header, (size_t)n, &plen, &udpsz, &is_sign, NULL)))
1196 GETSHORT(udp_size, udpsz);
1198 /* If the client provides an EDNS0 UDP size, use that to limit our reply.
1199 (bounded by the maximum configured). If no EDNS0, then it
1201 if (udp_size > daemon->edns_pktsz)
1202 udp_size = daemon->edns_pktsz;
1203 else if (udp_size < PACKETSZ)
1204 udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
1206 header->ancount = htons(0);
1207 header->nscount = htons(0);
1208 header->arcount = htons(0);
1209 header->hb3 &= ~(HB3_QR | HB3_AA | HB3_TC);
1210 header->hb4 &= ~(HB4_RA | HB4_RCODE | HB4_CD | HB4_AD);
1211 if (forward->flags & FREC_CHECKING_DISABLED)
1212 header->hb4 |= HB4_CD;
1213 if (forward->flags & FREC_AD_QUESTION)
1214 header->hb4 |= HB4_AD;
1217 (nn = resize_packet(header, (size_t)n, pheader, plen)) &&
1218 (forward->flags & FREC_DO_QUESTION))
1219 add_do_bit(header, nn, (unsigned char *)pheader + plen);
1225 forward_query(-1, NULL, NULL, 0, header, nn, ((char *) header) + udp_size, now, forward,
1226 forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 0);
1231 /* If the answer is an error, keep the forward record in place in case
1232 we get a good reply from another server. Kill it when we've
1233 had replies from all to avoid filling the forwarding table when
1234 everything is broken */
1236 /* decrement count of replies recieved if we sent to more than one server. */
1237 if (forward->forwardall && (--forward->forwardall > 1) && RCODE(header) == REFUSED)
1240 /* We tried resending to this server with a smaller maximum size and got an answer.
1241 Make that permanent. To avoid reduxing the packet size for a single dropped packet,
1242 only do this when we get a truncated answer, or one larger than the safe size. */
1243 if (server->edns_pktsz > SAFE_PKTSZ && (forward->flags & FREC_TEST_PKTSZ) &&
1244 ((header->hb3 & HB3_TC) || n >= SAFE_PKTSZ))
1246 server->edns_pktsz = SAFE_PKTSZ;
1247 server->pktsz_reduced = now;
1248 (void)prettyprint_addr(&server->addr, daemon->addrbuff);
1249 my_syslog(LOG_WARNING, _("reducing DNS packet size for nameserver %s to %d"), daemon->addrbuff, SAFE_PKTSZ);
1252 forward->sentto = server;
1254 /* We have a good answer, and will now validate it or return it.
1255 It may be some time before this the validation completes, but we don't need
1256 any more answers, so close the socket(s) on which we were expecting
1257 answers, to conserve file descriptors, and to save work reading and
1258 discarding answers for other upstreams. */
1259 free_rfds(&forward->rfds);
1261 /* calculate modified moving average of server latency */
1262 if (server->query_latency == 0)
1263 server->mma_latency = (dnsmasq_milliseconds() - forward->forward_timestamp) * 128; /* init */
1265 server->mma_latency += dnsmasq_milliseconds() - forward->forward_timestamp - server->query_latency;
1266 /* denominator controls how many queries we average over. */
1267 server->query_latency = server->mma_latency/128;
1271 if ((forward->sentto->flags & SERV_DO_DNSSEC) &&
1272 option_bool(OPT_DNSSEC_VALID) &&
1273 !(forward->flags & FREC_CHECKING_DISABLED))
1274 dnssec_validate(forward, header, n, STAT_OK, now);
1277 return_reply(now, forward, header, n, STAT_OK);
1280 static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status)
1282 int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
1284 int ede = EDE_UNSET;
1288 daemon->log_display_id = forward->frec_src.log_id;
1289 daemon->log_source_addr = &forward->frec_src.source;
1291 /* Don't cache replies where DNSSEC validation was turned off, either
1292 the upstream server told us so, or the original query specified it. */
1293 if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
1294 no_cache_dnssec = 1;
1297 if (!STAT_ISEQUAL(status, STAT_OK))
1299 /* status is STAT_OK when validation not turned on. */
1300 no_cache_dnssec = 0;
1302 if (STAT_ISEQUAL(status, STAT_TRUNCATED))
1303 header->hb3 |= HB3_TC;
1306 char *result, *domain = "result";
1309 a.log.ede = ede = errflags_to_ede(status);
1311 if (STAT_ISEQUAL(status, STAT_ABANDONED))
1313 result = "ABANDONED";
1314 status = STAT_BOGUS;
1317 result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
1319 if (STAT_ISEQUAL(status, STAT_SECURE))
1321 else if (STAT_ISEQUAL(status, STAT_BOGUS))
1323 no_cache_dnssec = 1;
1326 if (extract_request(header, n, daemon->namebuff, NULL))
1327 domain = daemon->namebuff;
1330 log_query(F_SECSTAT, domain, &a, result, 0);
1335 if (option_bool(OPT_NO_REBIND))
1336 check_rebind = !(forward->flags & FREC_NOREBIND);
1338 /* restore CD bit to the value in the query */
1339 if (forward->flags & FREC_CHECKING_DISABLED)
1340 header->hb4 |= HB4_CD;
1342 header->hb4 &= ~HB4_CD;
1344 /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
1345 since the cache is ignorant of such things. */
1346 if (forward->flags & FREC_NO_CACHE)
1347 no_cache_dnssec = 1;
1349 if ((nn = process_reply(header, now, forward->sentto, (size_t)n, check_rebind, no_cache_dnssec, cache_secure, bogusanswer,
1350 forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION,
1351 forward->flags & FREC_ADDED_PHEADER, &forward->frec_src.source,
1352 ((unsigned char *)header) + daemon->edns_pktsz, ede)))
1354 struct frec_src *src;
1356 header->id = htons(forward->frec_src.orig_id);
1358 /* We added an EDNSO header for the purpose of getting DNSSEC RRs, and set the value of the UDP payload size
1359 greater than the no-EDNS0-implied 512 to have space for the RRSIGS. If, having stripped them and the EDNS0
1360 header, the answer is still bigger than 512, truncate it and mark it so. The client then retries with TCP. */
1361 if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER) && (nn > PACKETSZ))
1363 header->ancount = htons(0);
1364 header->nscount = htons(0);
1365 header->arcount = htons(0);
1366 header->hb3 |= HB3_TC;
1367 nn = resize_packet(header, nn, NULL, 0);
1371 for (src = &forward->frec_src; src; src = src->next)
1373 header->id = htons(src->orig_id);
1375 #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
1376 if (option_bool(OPT_CMARK_ALST_EN))
1379 int have_mark = get_incoming_mark(&src->source, &src->dest, /* istcp: */ 0, &mark);
1380 if (have_mark && ((u32)mark & daemon->allowlist_mask))
1381 report_addresses(header, nn, mark);
1387 #ifdef HAVE_DUMPFILE
1388 dump_packet_udp(DUMP_REPLY, daemon->packet, (size_t)nn, NULL, &src->source, src->fd);
1390 send_from(src->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn,
1391 &src->source, &src->dest, src->iface);
1393 if (option_bool(OPT_EXTRALOG) && src != &forward->frec_src)
1395 daemon->log_display_id = src->log_id;
1396 daemon->log_source_addr = &src->source;
1397 log_query(F_UPSTREAM, "query", NULL, "duplicate", 0);
1403 free_frec(forward); /* cancel */
1407 #ifdef HAVE_CONNTRACK
1408 static int is_query_allowed_for_mark(u32 mark, const char *name)
1410 int is_allowable_name, did_validate_name = 0;
1411 struct allowlist *allowlists;
1412 char **patterns_pos;
1414 for (allowlists = daemon->allowlists; allowlists; allowlists = allowlists->next)
1415 if (allowlists->mark == (mark & daemon->allowlist_mask & allowlists->mask))
1416 for (patterns_pos = allowlists->patterns; *patterns_pos; patterns_pos++)
1418 if (!strcmp(*patterns_pos, "*"))
1420 if (!did_validate_name)
1422 is_allowable_name = name ? is_valid_dns_name(name) : 0;
1423 did_validate_name = 1;
1425 if (is_allowable_name && is_dns_name_matching_pattern(name, *patterns_pos))
1431 static size_t answer_disallowed(struct dns_header *header, size_t qlen, u32 mark, const char *name)
1439 ubus_event_bcast_connmark_allowlist_refused(mark, name);
1442 setup_reply(header, /* flags: */ 0, EDE_BLOCKED);
1444 if (!(p = skip_questions(header, qlen)))
1446 return p - (unsigned char *)header;
1450 void receive_query(struct listener *listen, time_t now)
1452 struct dns_header *header = (struct dns_header *)daemon->packet;
1453 union mysockaddr source_addr;
1454 unsigned char *pheader;
1455 unsigned short type, udp_size = PACKETSZ; /* default if no EDNS0 */
1456 union all_addr dst_addr;
1457 struct in_addr netmask, dst_addr_4;
1460 int if_index = 0, auth_dns = 0, do_bit = 0, have_pseudoheader = 0;
1461 #ifdef HAVE_CONNTRACK
1462 unsigned int mark = 0;
1464 int is_single_query = 0, allowed = 1;
1469 struct iovec iov[1];
1471 struct cmsghdr *cmptr;
1473 struct cmsghdr align; /* this ensures alignment */
1474 char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
1475 #if defined(HAVE_LINUX_NETWORK)
1476 char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
1477 #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
1478 char control[CMSG_SPACE(sizeof(struct in_addr)) +
1479 CMSG_SPACE(sizeof(unsigned int))];
1480 #elif defined(IP_RECVDSTADDR)
1481 char control[CMSG_SPACE(sizeof(struct in_addr)) +
1482 CMSG_SPACE(sizeof(struct sockaddr_dl))];
1485 int family = listen->addr.sa.sa_family;
1486 /* Can always get recvd interface for IPv6 */
1487 int check_dst = !option_bool(OPT_NOWILD) || family == AF_INET6;
1489 /* packet buffer overwritten */
1490 daemon->srv_save = NULL;
1492 dst_addr_4.s_addr = dst_addr.addr4.s_addr = 0;
1495 if (option_bool(OPT_NOWILD) && listen->iface)
1497 auth_dns = listen->iface->dns_auth;
1499 if (family == AF_INET)
1501 dst_addr_4 = dst_addr.addr4 = listen->iface->addr.in.sin_addr;
1502 netmask = listen->iface->netmask;
1506 iov[0].iov_base = daemon->packet;
1507 iov[0].iov_len = daemon->edns_pktsz;
1509 msg.msg_control = control_u.control;
1510 msg.msg_controllen = sizeof(control_u);
1512 msg.msg_name = &source_addr;
1513 msg.msg_namelen = sizeof(source_addr);
1517 if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
1520 if (n < (int)sizeof(struct dns_header) ||
1521 (msg.msg_flags & MSG_TRUNC) ||
1522 (header->hb3 & HB3_QR))
1525 /* Clear buffer beyond request to avoid risk of
1526 information disclosure. */
1527 memset(daemon->packet + n, 0, daemon->edns_pktsz - n);
1529 source_addr.sa.sa_family = family;
1531 if (family == AF_INET)
1533 /* Source-port == 0 is an error, we can't send back to that.
1534 http://www.ietf.org/mail-archive/web/dnsop/current/msg11441.html */
1535 if (source_addr.in.sin_port == 0)
1540 /* Source-port == 0 is an error, we can't send back to that. */
1541 if (source_addr.in6.sin6_port == 0)
1543 source_addr.in6.sin6_flowinfo = 0;
1546 /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
1547 if (option_bool(OPT_LOCAL_SERVICE))
1549 struct addrlist *addr;
1551 if (family == AF_INET6)
1553 for (addr = daemon->interface_addrs; addr; addr = addr->next)
1554 if ((addr->flags & ADDRLIST_IPV6) &&
1555 is_same_net6(&addr->addr.addr6, &source_addr.in6.sin6_addr, addr->prefixlen))
1560 struct in_addr netmask;
1561 for (addr = daemon->interface_addrs; addr; addr = addr->next)
1563 netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
1564 if (!(addr->flags & ADDRLIST_IPV6) &&
1565 is_same_net(addr->addr.addr4, source_addr.in.sin_addr, netmask))
1571 static int warned = 0;
1574 prettyprint_addr(&source_addr, daemon->addrbuff);
1575 my_syslog(LOG_WARNING, _("ignoring query from non-local network %s (logged only once)"), daemon->addrbuff);
1586 if (msg.msg_controllen < sizeof(struct cmsghdr))
1589 #if defined(HAVE_LINUX_NETWORK)
1590 if (family == AF_INET)
1591 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1592 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
1596 struct in_pktinfo *p;
1598 p.c = CMSG_DATA(cmptr);
1599 dst_addr_4 = dst_addr.addr4 = p.p->ipi_spec_dst;
1600 if_index = p.p->ipi_ifindex;
1602 #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
1603 if (family == AF_INET)
1605 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1611 #ifndef HAVE_SOLARIS_NETWORK
1612 struct sockaddr_dl *s;
1615 p.c = CMSG_DATA(cmptr);
1616 if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
1617 dst_addr_4 = dst_addr.addr4 = *(p.a);
1618 else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
1619 #ifdef HAVE_SOLARIS_NETWORK
1622 if_index = p.s->sdl_index;
1628 if (family == AF_INET6)
1630 for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1631 if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
1635 struct in6_pktinfo *p;
1637 p.c = CMSG_DATA(cmptr);
1639 dst_addr.addr6 = p.p->ipi6_addr;
1640 if_index = p.p->ipi6_ifindex;
1644 /* enforce available interface configuration */
1646 if (!indextoname(listen->fd, if_index, ifr.ifr_name))
1649 if (!iface_check(family, &dst_addr, ifr.ifr_name, &auth_dns))
1651 if (!option_bool(OPT_CLEVERBIND))
1652 enumerate_interfaces(0);
1653 if (!loopback_exception(listen->fd, family, &dst_addr, ifr.ifr_name) &&
1654 !label_exception(if_index, family, &dst_addr))
1658 if (family == AF_INET && option_bool(OPT_LOCALISE))
1662 /* get the netmask of the interface which has the address we were sent to.
1663 This is no necessarily the interface we arrived on. */
1665 for (iface = daemon->interfaces; iface; iface = iface->next)
1666 if (iface->addr.sa.sa_family == AF_INET &&
1667 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1670 /* interface may be new */
1671 if (!iface && !option_bool(OPT_CLEVERBIND))
1672 enumerate_interfaces(0);
1674 for (iface = daemon->interfaces; iface; iface = iface->next)
1675 if (iface->addr.sa.sa_family == AF_INET &&
1676 iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1679 /* If we failed, abandon localisation */
1681 netmask = iface->netmask;
1683 dst_addr_4.s_addr = 0;
1687 /* log_query gets called indirectly all over the place, so
1688 pass these in global variables - sorry. */
1689 daemon->log_display_id = ++daemon->log_id;
1690 daemon->log_source_addr = &source_addr;
1692 #ifdef HAVE_DUMPFILE
1693 dump_packet_udp(DUMP_QUERY, daemon->packet, (size_t)n, &source_addr, NULL, listen->fd);
1696 #ifdef HAVE_CONNTRACK
1697 if (option_bool(OPT_CMARK_ALST_EN))
1698 have_mark = get_incoming_mark(&source_addr, &dst_addr, /* istcp: */ 0, &mark);
1701 if (extract_request(header, (size_t)n, daemon->namebuff, &type))
1704 struct auth_zone *zone;
1706 log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
1707 &source_addr, auth_dns ? "auth" : "query", type);
1709 #ifdef HAVE_CONNTRACK
1710 is_single_query = 1;
1714 /* find queries for zones we're authoritative for, and answer them directly */
1715 if (!auth_dns && !option_bool(OPT_LOCALISE))
1716 for (zone = daemon->auth_zones; zone; zone = zone->next)
1717 if (in_zone(zone, daemon->namebuff, NULL))
1726 /* Check for forwarding loop */
1727 if (detect_loop(daemon->namebuff, type))
1732 if (find_pseudoheader(header, (size_t)n, NULL, &pheader, NULL, NULL))
1734 unsigned short flags;
1736 have_pseudoheader = 1;
1737 GETSHORT(udp_size, pheader);
1738 pheader += 2; /* ext_rcode */
1739 GETSHORT(flags, pheader);
1742 do_bit = 1;/* do bit */
1744 /* If the client provides an EDNS0 UDP size, use that to limit our reply.
1745 (bounded by the maximum configured). If no EDNS0, then it
1746 defaults to 512. We write this value into the query packet too, so that
1747 if it's forwarded, we don't specify a maximum size greater than we can handle. */
1748 if (udp_size > daemon->edns_pktsz)
1749 udp_size = daemon->edns_pktsz;
1750 else if (udp_size < PACKETSZ)
1751 udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
1753 pheader -= 6; /* ext_class */
1754 PUTSHORT(udp_size, pheader); /* Bounding forwarded queries to maximum configured */
1757 #ifdef HAVE_CONNTRACK
1759 if (!auth_dns || local_auth)
1761 if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
1762 allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
1766 #ifdef HAVE_CONNTRACK
1769 u16 swap = htons(EDE_BLOCKED);
1771 m = answer_disallowed(header, (size_t)n, (u32)mark, is_single_query ? daemon->namebuff : NULL);
1773 if (have_pseudoheader && m != 0)
1774 m = add_pseudoheader(header, m, ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
1775 EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
1779 #ifdef HAVE_DUMPFILE
1780 dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
1782 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1783 (char *)header, m, &source_addr, &dst_addr, if_index);
1784 daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
1791 m = answer_auth(header, ((char *) header) + udp_size, (size_t)n, now, &source_addr,
1792 local_auth, do_bit, have_pseudoheader);
1795 #ifdef HAVE_DUMPFILE
1796 dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
1798 #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
1800 if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
1801 report_addresses(header, m, mark);
1803 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1804 (char *)header, m, &source_addr, &dst_addr, if_index);
1805 daemon->metrics[METRIC_DNS_AUTH_ANSWERED]++;
1812 int ad_reqd = do_bit;
1813 u16 hb3 = header->hb3, hb4 = header->hb4;
1814 int fd = listen->fd;
1817 if (header->hb4 & HB4_AD)
1820 m = answer_request(header, ((char *) header) + udp_size, (size_t)n,
1821 dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale);
1825 if (stale && have_pseudoheader)
1827 u16 swap = htons(EDE_STALE);
1829 m = add_pseudoheader(header, m, ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
1830 EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
1832 #ifdef HAVE_DUMPFILE
1833 dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
1835 #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
1836 if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
1837 report_addresses(header, m, mark);
1839 send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1840 (char *)header, m, &source_addr, &dst_addr, if_index);
1841 daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
1843 daemon->metrics[METRIC_DNS_STALE_ANSWERED]++;
1846 if (m == 0 || stale)
1852 /* We answered with stale cache data, so forward the query anyway to
1853 refresh that. Restore the query from the answer packet. */
1854 pheader = find_pseudoheader(header, (size_t)m, &plen, NULL, NULL, NULL);
1858 header->ancount = htons(0);
1859 header->nscount = htons(0);
1860 header->arcount = htons(0);
1862 m = resize_packet(header, m, pheader, plen);
1864 /* We've already answered the client, so don't send it the answer
1865 when it comes back. */
1869 if (forward_query(fd, &source_addr, &dst_addr, if_index,
1870 header, (size_t)n, ((char *) header) + udp_size, now, NULL, ad_reqd, do_bit, 0))
1871 daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
1873 daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
1878 /* Send query in packet, qsize to a server determined by first,last,start and
1879 get the reply. return reply size. */
1880 static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet, size_t qsize,
1881 int have_mark, unsigned int mark, struct server **servp)
1883 int firstsendto = -1;
1884 u16 *length = (u16 *)packet;
1885 unsigned char *payload = &packet[2];
1886 struct dns_header *header = (struct dns_header *)payload;
1887 unsigned char c1, c2;
1888 unsigned char hash[HASH_SIZE], *hashp;
1894 if (!(hashp = hash_questions(header, (unsigned int)qsize, daemon->namebuff)))
1897 memcpy(hash, hashp, HASH_SIZE);
1902 struct server *serv;
1904 if (firstsendto == -1)
1905 firstsendto = start;
1913 if (start == firstsendto)
1917 serv = daemon->serverarray[start];
1920 *length = htons(qsize);
1922 if (serv->tcpfd == -1)
1924 if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
1927 #ifdef HAVE_CONNTRACK
1928 /* Copy connection mark of incoming query to outgoing connection. */
1930 setsockopt(serv->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
1933 if ((!local_bind(serv->tcpfd, &serv->source_addr, serv->interface, 0, 1)))
1941 server_send(serv, serv->tcpfd, packet, qsize + sizeof(u16), MSG_FASTOPEN);
1947 if (!data_sent && connect(serv->tcpfd, &serv->addr.sa, sa_len(&serv->addr)) == -1)
1954 daemon->serverarray[first]->last_server = start;
1955 serv->flags &= ~SERV_GOT_TCP;
1958 if ((!data_sent && !read_write(serv->tcpfd, packet, qsize + sizeof(u16), 0)) ||
1959 !read_write(serv->tcpfd, &c1, 1, 1) ||
1960 !read_write(serv->tcpfd, &c2, 1, 1) ||
1961 !read_write(serv->tcpfd, payload, (rsize = (c1 << 8) | c2), 1))
1965 /* We get data then EOF, reopen connection to same server,
1966 else try next. This avoids DoS from a server which accepts
1967 connections and then closes them. */
1968 if (serv->flags & SERV_GOT_TCP)
1974 /* If the hash of the question section doesn't match the crc we sent, then
1975 someone might be attempting to insert bogus values into the cache by
1976 sending replies containing questions and bogus answers.
1977 Try another server, or give up */
1978 if (!(hashp = hash_questions(header, rsize, daemon->namebuff)) || memcmp(hash, hashp, HASH_SIZE) != 0)
1981 serv->flags |= SERV_GOT_TCP;
1991 /* Recurse down the key hierarchy */
1992 static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
1993 int class, char *name, char *keyname, struct server *server,
1994 int have_mark, unsigned int mark, int *keycount)
1996 int first, last, start, new_status;
1997 unsigned char *packet = NULL;
1998 struct dns_header *new_header = NULL;
2005 /* limit the amount of work we do, to avoid cycling forever on loops in the DNS */
2006 if (--(*keycount) == 0)
2007 new_status = STAT_ABANDONED;
2008 else if (STAT_ISEQUAL(status, STAT_NEED_KEY))
2009 new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
2010 else if (STAT_ISEQUAL(status, STAT_NEED_DS))
2011 new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
2013 new_status = dnssec_validate_reply(now, header, n, name, keyname, &class,
2014 !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC),
2017 if (!STAT_ISEQUAL(new_status, STAT_NEED_DS) && !STAT_ISEQUAL(new_status, STAT_NEED_KEY))
2020 /* Can't validate because we need a key/DS whose name now in keyname.
2021 Make query for same, and recurse to validate */
2024 packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
2025 new_header = (struct dns_header *)&packet[2];
2030 new_status = STAT_ABANDONED;
2034 m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class,
2035 STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz);
2037 if ((start = dnssec_server(server, daemon->keyname, &first, &last)) == -1 ||
2038 (m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0)
2040 new_status = STAT_ABANDONED;
2044 log_save = daemon->log_display_id;
2045 daemon->log_display_id = ++daemon->log_id;
2047 log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr,
2048 STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
2050 new_status = tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server, have_mark, mark, keycount);
2052 daemon->log_display_id = log_save;
2054 if (!STAT_ISEQUAL(new_status, STAT_OK))
2066 /* The daemon forks before calling this: it should deal with one connection,
2067 blocking as necessary, and then return. Note, need to be a bit careful
2068 about resources for debug mode, when the fork is suppressed: that's
2069 done by the caller. */
2070 unsigned char *tcp_request(int confd, time_t now,
2071 union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
2075 #ifdef HAVE_CONNTRACK
2076 int is_single_query = 0, allowed = 1;
2081 int checking_disabled, do_bit, added_pheader = 0, have_pseudoheader = 0;
2082 int cacheable, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
2084 unsigned short qtype;
2085 unsigned int gotname;
2086 /* Max TCP packet + slop + size */
2087 unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
2088 unsigned char *payload = &packet[2];
2089 unsigned char c1, c2;
2090 /* largest field in header is 16-bits, so this is still sufficiently aligned */
2091 struct dns_header *header = (struct dns_header *)payload;
2092 u16 *length = (u16 *)packet;
2093 struct server *serv;
2094 struct in_addr dst_addr_4;
2095 union mysockaddr peer_addr;
2096 socklen_t peer_len = sizeof(union mysockaddr);
2097 int query_count = 0;
2098 unsigned char *pheader;
2099 unsigned int mark = 0;
2101 int first, last, stale, do_stale = 0;
2102 unsigned int flags = 0;
2105 if (!packet || getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
2108 #ifdef HAVE_CONNTRACK
2109 /* Get connection mark of incoming query to set on outgoing connections. */
2110 if (option_bool(OPT_CONNTRACK) || option_bool(OPT_CMARK_ALST_EN))
2112 union all_addr local;
2114 if (local_addr->sa.sa_family == AF_INET6)
2115 local.addr6 = local_addr->in6.sin6_addr;
2117 local.addr4 = local_addr->in.sin_addr;
2119 have_mark = get_incoming_mark(&peer_addr, &local, 1, &mark);
2123 /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
2124 if (option_bool(OPT_LOCAL_SERVICE))
2126 struct addrlist *addr;
2128 if (peer_addr.sa.sa_family == AF_INET6)
2130 for (addr = daemon->interface_addrs; addr; addr = addr->next)
2131 if ((addr->flags & ADDRLIST_IPV6) &&
2132 is_same_net6(&addr->addr.addr6, &peer_addr.in6.sin6_addr, addr->prefixlen))
2137 struct in_addr netmask;
2138 for (addr = daemon->interface_addrs; addr; addr = addr->next)
2140 netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
2141 if (!(addr->flags & ADDRLIST_IPV6) &&
2142 is_same_net(addr->addr.addr4, peer_addr.in.sin_addr, netmask))
2148 prettyprint_addr(&peer_addr, daemon->addrbuff);
2149 my_syslog(LOG_WARNING, _("ignoring query from non-local network %s"), daemon->addrbuff);
2156 int ede = EDE_UNSET;
2158 if (query_count == TCP_MAX_QUERIES)
2165 /* We answered the last query with stale data. Now try and get fresh data.
2166 Restore query from answer. */
2167 pheader = find_pseudoheader(header, m, &plen, NULL, NULL, NULL);
2171 header->ancount = htons(0);
2172 header->nscount = htons(0);
2173 header->arcount = htons(0);
2175 size = resize_packet(header, m, pheader, plen);
2179 if (!read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
2180 !(size = c1 << 8 | c2) ||
2181 !read_write(confd, payload, size, 1))
2184 /* for stale-answer processing. */
2189 if (size < (int)sizeof(struct dns_header))
2192 /* Clear buffer beyond request to avoid risk of
2193 information disclosure. */
2194 memset(payload + size, 0, 65536 - size);
2198 /* log_query gets called indirectly all over the place, so
2199 pass these in global variables - sorry. */
2200 daemon->log_display_id = ++daemon->log_id;
2201 daemon->log_source_addr = &peer_addr;
2203 /* save state of "cd" flag in query */
2204 if ((checking_disabled = header->hb4 & HB4_CD))
2205 no_cache_dnssec = 1;
2207 if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
2210 struct auth_zone *zone;
2213 #ifdef HAVE_CONNTRACK
2214 is_single_query = 1;
2219 log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
2220 &peer_addr, auth_dns ? "auth" : "query", qtype);
2223 /* find queries for zones we're authoritative for, and answer them directly */
2224 if (!auth_dns && !option_bool(OPT_LOCALISE))
2225 for (zone = daemon->auth_zones; zone; zone = zone->next)
2226 if (in_zone(zone, daemon->namebuff, NULL))
2236 norebind = domain_no_rebind(daemon->namebuff);
2238 if (local_addr->sa.sa_family == AF_INET)
2239 dst_addr_4 = local_addr->in.sin_addr;
2241 dst_addr_4.s_addr = 0;
2245 if (find_pseudoheader(header, (size_t)size, NULL, &pheader, NULL, NULL))
2247 unsigned short flags;
2249 have_pseudoheader = 1;
2250 pheader += 4; /* udp_size, ext_rcode */
2251 GETSHORT(flags, pheader);
2254 do_bit = 1; /* do bit */
2257 #ifdef HAVE_CONNTRACK
2259 if (!auth_dns || local_auth)
2261 if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
2262 allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
2266 #ifdef HAVE_CONNTRACK
2269 u16 swap = htons(EDE_BLOCKED);
2271 m = answer_disallowed(header, size, (u32)mark, is_single_query ? daemon->namebuff : NULL);
2273 if (have_pseudoheader && m != 0)
2274 m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz,
2275 EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
2280 m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr,
2281 local_auth, do_bit, have_pseudoheader);
2285 int ad_reqd = do_bit;
2287 if (header->hb4 & HB4_AD)
2293 /* m > 0 if answered from cache */
2294 m = answer_request(header, ((char *) header) + 65536, (size_t)size,
2295 dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale);
2297 /* Do this by steam now we're not in the select() loop */
2298 check_log_writer(1);
2302 struct server *master;
2305 if (lookup_domain(daemon->namebuff, gotname, &first, &last))
2306 flags = is_local_answer(now, first, daemon->namebuff);
2309 /* No configured servers */
2310 ede = EDE_NOT_READY;
2314 /* don't forward A or AAAA queries for simple names, except the empty name */
2316 option_bool(OPT_NODOTS_LOCAL) &&
2317 (gotname & (F_IPV4 | F_IPV6)) &&
2318 !strchr(daemon->namebuff, '.') &&
2319 strlen(daemon->namebuff) != 0)
2320 flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
2322 if (!flags && ede != EDE_NOT_READY)
2324 master = daemon->serverarray[first];
2326 if (option_bool(OPT_ORDER) || master->last_server == -1)
2329 start = master->last_server;
2331 size = add_edns0_config(header, size, ((unsigned char *) header) + 65536, &peer_addr, now, &cacheable);
2334 if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
2336 size = add_do_bit(header, size, ((unsigned char *) header) + 65536);
2338 /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
2339 this allows it to select auth servers when one is returning bad data. */
2340 if (option_bool(OPT_DNSSEC_DEBUG))
2341 header->hb4 |= HB4_CD;
2345 /* Check if we added a pheader on forwarding - may need to
2346 strip it from the reply. */
2347 if (!have_pseudoheader && find_pseudoheader(header, size, NULL, NULL, NULL, NULL))
2350 /* Loop round available servers until we succeed in connecting to one. */
2351 if ((m = tcp_talk(first, last, start, packet, size, have_mark, mark, &serv)) == 0)
2357 /* get query name again for logging - may have been overwritten */
2358 if (!(gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
2359 strcpy(daemon->namebuff, "query");
2360 log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff, &serv->addr, NULL, 0);
2363 if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled && (master->flags & SERV_DO_DNSSEC))
2365 int keycount = DNSSEC_WORK; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
2366 int status = tcp_key_recurse(now, STAT_OK, header, m, 0, daemon->namebuff, daemon->keyname,
2367 serv, have_mark, mark, &keycount);
2368 char *result, *domain = "result";
2371 a.log.ede = ede = errflags_to_ede(status);
2373 if (STAT_ISEQUAL(status, STAT_ABANDONED))
2375 result = "ABANDONED";
2376 status = STAT_BOGUS;
2379 result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
2381 if (STAT_ISEQUAL(status, STAT_SECURE))
2383 else if (STAT_ISEQUAL(status, STAT_BOGUS))
2385 no_cache_dnssec = 1;
2388 if (extract_request(header, m, daemon->namebuff, NULL))
2389 domain = daemon->namebuff;
2392 log_query(F_SECSTAT, domain, &a, result, 0);
2396 /* restore CD bit to the value in the query */
2397 if (checking_disabled)
2398 header->hb4 |= HB4_CD;
2400 header->hb4 &= ~HB4_CD;
2402 /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
2403 since the cache is ignorant of such things. */
2405 no_cache_dnssec = 1;
2407 m = process_reply(header, now, serv, (unsigned int)m,
2408 option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec, cache_secure, bogusanswer,
2409 ad_reqd, do_bit, added_pheader, &peer_addr, ((unsigned char *)header) + 65536, ede);
2417 /* In case of local answer or no connections made. */
2420 if (!(m = make_local_answer(flags, gotname, size, header, daemon->namebuff,
2421 ((char *) header) + 65536, first, last, ede)))
2424 if (have_pseudoheader)
2426 u16 swap = htons((u16)ede);
2428 if (ede != EDE_UNSET)
2429 m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
2431 m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
2436 u16 swap = htons((u16)EDE_STALE);
2438 m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
2441 check_log_writer(1);
2445 #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
2447 if (!auth_dns || local_auth)
2449 if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
2450 report_addresses(header, m, mark);
2452 if (!read_write(confd, packet, m + sizeof(u16), 0))
2455 /* If we answered with stale data, this process will now try and get fresh data into
2456 the cache then and cannot therefore accept new queries. Close the incoming
2457 connection to signal that to the client. Then set do_stale and loop round
2458 once more to try and get fresh data, after which we exit. */
2461 shutdown(confd, SHUT_RDWR);
2467 /* If we ran once to get fresh data, confd is already closed. */
2470 shutdown(confd, SHUT_RDWR);
2477 /* return a UDP socket bound to a random port, have to cope with straying into
2478 occupied port nos and reserved ones. */
2479 static int random_sock(struct server *s)
2483 if ((fd = socket(s->source_addr.sa.sa_family, SOCK_DGRAM, 0)) != -1)
2485 /* We need to set IPV6ONLY so we can use the same ports
2486 for IPv4 and IPV6, otherwise, in restriced port situations,
2487 we can end up with all our available ports in use for
2488 one address family, and the other address family cannot be used. */
2489 if (s->source_addr.sa.sa_family == AF_INET6)
2493 if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt)) == -1)
2500 if (local_bind(fd, &s->source_addr, s->interface, s->ifindex, 0))
2503 /* don't log errors due to running out of available ports, we handle those. */
2504 if (!sockaddr_isnull(&s->source_addr) || errno != EADDRINUSE)
2506 if (s->interface[0] == 0)
2507 (void)prettyprint_addr(&s->source_addr, daemon->addrbuff);
2509 safe_strncpy(daemon->addrbuff, s->interface, ADDRSTRLEN);
2511 my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
2512 daemon->addrbuff, strerror(errno));
2521 /* compare source addresses and interface, serv2 can be null. */
2522 static int server_isequal(const struct server *serv1,
2523 const struct server *serv2)
2526 serv2->ifindex == serv1->ifindex &&
2527 sockaddr_isequal(&serv2->source_addr, &serv1->source_addr) &&
2528 strncmp(serv2->interface, serv1->interface, IF_NAMESIZE) == 0);
2531 /* fdlp points to chain of randomfds already in use by transaction.
2532 If there's already a suitable one, return it, else allocate a
2533 new one and add it to the list.
2535 Not leaking any resources in the face of allocation failures
2536 is rather convoluted here.
2538 Note that rfd->serv may be NULL, when a server goes away.
2540 int allocate_rfd(struct randfd_list **fdlp, struct server *serv)
2542 static int finger = 0;
2545 struct randfd_list **up, *rfl, *found, **found_link;
2546 struct randfd *rfd = NULL;
2548 int ports_avail = 0;
2550 /* We can't have more randomsocks for this AF available than ports in our port range,
2551 so check that here, to avoid trying and failing to bind every port
2552 in local_bind(), called from random_sock(). The actual check is below when
2554 if (daemon->max_port != 0)
2556 ports_avail = daemon->max_port - daemon->min_port + 1;
2557 if (ports_avail >= SMALL_PORT_RANGE)
2561 /* If server has a pre-allocated fd, use that. */
2563 return serv->sfd->fd;
2565 /* existing suitable random port socket linked to this transaction?
2566 Find the last one in the list and count how many there are. */
2567 for (found = NULL, found_link = NULL, i = 0, up = fdlp, rfl = *fdlp; rfl; up = &rfl->next, rfl = rfl->next)
2568 if (server_isequal(serv, rfl->rfd->serv))
2575 /* We have the maximum number for this query already. Promote
2576 the last one on the list to the head, to circulate them,
2578 if (found && i >= daemon->randport_limit)
2580 *found_link = found->next;
2581 found->next = *fdlp;
2583 return found->rfd->fd;
2586 /* check for all available ports in use. */
2587 if (ports_avail != 0)
2591 for (ports_inuse = 0, i = 0; i < daemon->numrrand; i++)
2592 if (daemon->randomsocks[i].refcount != 0 &&
2593 daemon->randomsocks[i].serv->source_addr.sa.sa_family == serv->source_addr.sa.sa_family &&
2594 ++ports_inuse >= ports_avail)
2601 /* limit the number of sockets we have open to avoid starvation of
2602 (eg) TFTP. Once we have a reasonable number, randomness should be OK */
2604 for (i = 0; i < daemon->numrrand; i++)
2605 if (daemon->randomsocks[i].refcount == 0)
2607 if ((fd = random_sock(serv)) != -1)
2609 rfd = &daemon->randomsocks[i];
2617 /* No good existing. Need new link. */
2618 if ((rfl = daemon->rfl_spare))
2619 daemon->rfl_spare = rfl->next;
2620 else if (!(rfl = whine_malloc(sizeof(struct randfd_list))))
2622 /* malloc failed, don't leak allocated sock */
2632 /* No free ones or cannot get new socket, grab an existing one */
2634 for (j = 0; j < daemon->numrrand; j++)
2636 i = (j + finger) % daemon->numrrand;
2637 if (daemon->randomsocks[i].refcount != 0 &&
2638 server_isequal(serv, daemon->randomsocks[i].serv) &&
2639 daemon->randomsocks[i].refcount != 0xfffe)
2641 struct randfd_list *rl;
2642 /* Don't pick one we already have. */
2643 for (rl = *fdlp; rl; rl = rl->next)
2644 if (rl->rfd == &daemon->randomsocks[i])
2650 rfd = &daemon->randomsocks[i];
2657 if (!rfd) /* should be when j == daemon->numrrand */
2659 struct randfd_list *rfl_poll;
2661 /* there are no free slots, and non with the same parameters we can piggy-back on.
2662 We're going to have to allocate a new temporary record, distinguished by
2663 refcount == 0xffff. This will exist in the frec randfd list, never be shared,
2664 and be freed when no longer in use. It will also be held on
2665 the daemon->rfl_poll list so the poll system can find it. */
2667 if ((rfl_poll = daemon->rfl_spare))
2668 daemon->rfl_spare = rfl_poll->next;
2670 rfl_poll = whine_malloc(sizeof(struct randfd_list));
2673 !(rfd = whine_malloc(sizeof(struct randfd))) ||
2674 (fd = random_sock(serv)) == -1)
2677 /* Don't leak anything we may already have */
2678 rfl->next = daemon->rfl_spare;
2679 daemon->rfl_spare = rfl;
2683 rfl_poll->next = daemon->rfl_spare;
2684 daemon->rfl_spare = rfl_poll;
2690 return -1; /* doom */
2693 /* Note rfd->serv not set here, since it's not reused */
2695 rfd->refcount = 0xffff; /* marker for temp record */
2697 rfl_poll->rfd = rfd;
2698 rfl_poll->next = daemon->rfl_poll;
2699 daemon->rfl_poll = rfl_poll;
2706 return rfl->rfd->fd;
2709 void free_rfds(struct randfd_list **fdlp)
2711 struct randfd_list *tmp, *rfl, *poll, *next, **up;
2713 for (rfl = *fdlp; rfl; rfl = tmp)
2715 if (rfl->rfd->refcount == 0xffff || --(rfl->rfd->refcount) == 0)
2716 close(rfl->rfd->fd);
2718 /* temporary overflow record */
2719 if (rfl->rfd->refcount == 0xffff)
2723 /* go through the link of all these by steam to delete.
2724 This list is expected to be almost always empty. */
2725 for (poll = daemon->rfl_poll, up = &daemon->rfl_poll; poll; poll = next)
2729 if (poll->rfd == rfl->rfd)
2732 poll->next = daemon->rfl_spare;
2733 daemon->rfl_spare = poll;
2741 rfl->next = daemon->rfl_spare;
2742 daemon->rfl_spare = rfl;
2748 static void free_frec(struct frec *f)
2750 struct frec_src *last;
2752 /* add back to freelist if not the record builtin to every frec. */
2753 for (last = f->frec_src.next; last && last->next; last = last->next) ;
2756 last->next = daemon->free_frec_src;
2757 daemon->free_frec_src = f->frec_src.next;
2760 f->frec_src.next = NULL;
2761 free_rfds(&f->rfds);
2767 blockdata_free(f->stash);
2772 /* Anything we're waiting on is pointless now, too */
2773 if (f->blocking_query)
2775 struct frec *n, **up;
2777 /* unlink outselves from the blocking query's dependents list. */
2778 for (n = f->blocking_query->dependent, up = &f->blocking_query->dependent; n; n = n->next_dependent)
2781 *up = n->next_dependent;
2785 up = &n->next_dependent;
2787 /* If we were the only/last dependent, free the blocking query too. */
2788 if (!f->blocking_query->dependent)
2789 free_frec(f->blocking_query);
2792 f->blocking_query = NULL;
2793 f->dependent = NULL;
2794 f->next_dependent = NULL;
2800 /* Impose an absolute
2801 limit of 4*TIMEOUT before we wipe things (for random sockets).
2802 If force is set, always return a result, even if we have
2803 to allocate above the limit, and don'y free any records.
2804 This is set when allocating for DNSSEC to avoid cutting off
2805 the branch we are sitting on. */
2806 static struct frec *get_new_frec(time_t now, struct server *master, int force)
2808 struct frec *f, *oldest, *target;
2811 /* look for free records, garbage collect old records and count number in use by our server-group. */
2812 for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next)
2819 /* Don't free DNSSEC sub-queries here, as we may end up with
2820 dangling references to them. They'll go when their "real" query
2822 if (!f->dependent && !force)
2825 if (difftime(now, f->time) >= 4*TIMEOUT)
2827 daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
2831 else if (!oldest || difftime(f->time, oldest->time) <= 0)
2836 if (f->sentto && ((int)difftime(now, f->time)) < TIMEOUT && server_samegroup(f->sentto, master))
2840 if (!force && count >= daemon->ftabsize)
2842 query_full(now, master->domain);
2846 if (!target && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
2848 /* can't find empty one, use oldest if there is one and it's older than timeout */
2849 daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
2854 if (!target && (target = (struct frec *)whine_malloc(sizeof(struct frec))))
2856 target->next = daemon->frec_list;
2857 daemon->frec_list = target;
2863 target->forward_delay = daemon->fast_retry_time;
2869 static void query_full(time_t now, char *domain)
2871 static time_t last_log = 0;
2873 if ((int)difftime(now, last_log) > 5)
2876 if (!domain || strlen(domain) == 0)
2877 my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
2879 my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries to %s reached (max: %d)"), domain, daemon->ftabsize);
2884 static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp)
2889 struct randfd_list *fdl;
2892 for (f = daemon->frec_list; f; f = f->next)
2893 if (f->sentto && f->new_id == id &&
2894 (memcmp(hash, f->hash, HASH_SIZE) == 0))
2896 filter_servers(f->sentto->arrayposn, F_SERVER, firstp, lastp);
2898 /* sent from random port */
2899 for (fdl = f->rfds; fdl; fdl = fdl->next)
2900 if (fdl->rfd->fd == fd)
2903 /* Sent to upstream from socket associated with a server.
2904 Note we have to iterate over all the possible servers, since they may
2905 have different bound sockets. */
2906 for (first = *firstp, last = *lastp; first != last; first++)
2908 s = daemon->serverarray[first];
2909 if (s->sfd && s->sfd->fd == fd)
2917 static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask)
2922 for (f = daemon->frec_list; f; f = f->next)
2924 (f->flags & flagmask) == flags &&
2925 memcmp(hash, f->hash, HASH_SIZE) == 0)
2932 /* DNSSEC frecs have the complete query in the block stash.
2933 Search for an existing query using that. */
2934 static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header)
2938 for (f = daemon->frec_list; f; f = f->next)
2940 (f->flags & flags) &&
2941 blockdata_retrieve(f->stash, f->stash_len, (void *)header))
2943 unsigned char *p = (unsigned char *)(header+1);
2946 if (extract_name(header, f->stash_len, &p, target, 0, 4) != 1)
2949 p += 2; /* type, known from flags */
2950 GETSHORT(hclass, p);
2952 if (class != hclass)
2962 /* Send query packet again, if we can. */
2965 if (daemon->srv_save)
2966 server_send(daemon->srv_save, daemon->fd_save,
2967 daemon->packet, daemon->packet_len, 0);
2970 /* A server record is going away, remove references to it */
2971 void server_gone(struct server *server)
2976 for (f = daemon->frec_list; f; f = f->next)
2977 if (f->sentto && f->sentto == server)
2980 /* If any random socket refers to this server, NULL the reference.
2981 No more references to the socket will be created in the future. */
2982 for (i = 0; i < daemon->numrrand; i++)
2983 if (daemon->randomsocks[i].refcount != 0 && daemon->randomsocks[i].serv == server)
2984 daemon->randomsocks[i].serv = NULL;
2986 if (daemon->srv_save == server)
2987 daemon->srv_save = NULL;
2990 /* return unique random ids. */
2991 static unsigned short get_id(void)
2993 unsigned short ret = 0;
3000 /* ensure id is unique. */
3001 for (f = daemon->frec_list; f; f = f->next)
3002 if (f->sentto && f->new_id == ret)