Imported Upstream version 1.24
[platform/upstream/connman.git] / src / dnsproxy.c
1 /*
2  *
3  *  Connection Manager
4  *
5  *  Copyright (C) 2007-2014  Intel Corporation. All rights reserved.
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License version 2 as
9  *  published by the Free Software Foundation.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  */
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <errno.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <string.h>
30 #include <stdint.h>
31 #include <arpa/inet.h>
32 #include <netinet/in.h>
33 #include <sys/types.h>
34 #include <sys/socket.h>
35 #include <fcntl.h>
36 #include <netdb.h>
37 #include <resolv.h>
38 #include <gweb/gresolv.h>
39
40 #include <glib.h>
41
42 #include "connman.h"
43
44 #if __BYTE_ORDER == __LITTLE_ENDIAN
45 struct domain_hdr {
46         uint16_t id;
47         uint8_t rd:1;
48         uint8_t tc:1;
49         uint8_t aa:1;
50         uint8_t opcode:4;
51         uint8_t qr:1;
52         uint8_t rcode:4;
53         uint8_t z:3;
54         uint8_t ra:1;
55         uint16_t qdcount;
56         uint16_t ancount;
57         uint16_t nscount;
58         uint16_t arcount;
59 } __attribute__ ((packed));
60 #elif __BYTE_ORDER == __BIG_ENDIAN
61 struct domain_hdr {
62         uint16_t id;
63         uint8_t qr:1;
64         uint8_t opcode:4;
65         uint8_t aa:1;
66         uint8_t tc:1;
67         uint8_t rd:1;
68         uint8_t ra:1;
69         uint8_t z:3;
70         uint8_t rcode:4;
71         uint16_t qdcount;
72         uint16_t ancount;
73         uint16_t nscount;
74         uint16_t arcount;
75 } __attribute__ ((packed));
76 #else
77 #error "Unknown byte order"
78 #endif
79
80 struct partial_reply {
81         uint16_t len;
82         uint16_t received;
83         unsigned char buf[];
84 };
85
86 struct server_data {
87         int index;
88         GList *domains;
89         char *server;
90         struct sockaddr *server_addr;
91         socklen_t server_addr_len;
92         int protocol;
93         GIOChannel *channel;
94         guint watch;
95         guint timeout;
96         bool enabled;
97         bool connected;
98         struct partial_reply *incoming_reply;
99 };
100
101 struct request_data {
102         union {
103                 struct sockaddr_in6 __sin6; /* Only for the length */
104                 struct sockaddr sa;
105         };
106         socklen_t sa_len;
107         int client_sk;
108         int protocol;
109         int family;
110         guint16 srcid;
111         guint16 dstid;
112         guint16 altid;
113         guint timeout;
114         guint watch;
115         guint numserv;
116         guint numresp;
117         gpointer request;
118         gsize request_len;
119         gpointer name;
120         gpointer resp;
121         gsize resplen;
122         struct listener_data *ifdata;
123         bool append_domain;
124 };
125
126 struct listener_data {
127         int index;
128
129         GIOChannel *udp4_listener_channel;
130         GIOChannel *tcp4_listener_channel;
131         guint udp4_listener_watch;
132         guint tcp4_listener_watch;
133
134         GIOChannel *udp6_listener_channel;
135         GIOChannel *tcp6_listener_channel;
136         guint udp6_listener_watch;
137         guint tcp6_listener_watch;
138 };
139
140 /*
141  * The TCP client requires some extra handling as we need to
142  * be prepared to receive also partial DNS requests.
143  */
144 struct tcp_partial_client_data {
145         int family;
146         struct listener_data *ifdata;
147         GIOChannel *channel;
148         guint watch;
149         unsigned char *buf;
150         unsigned int buf_end;
151         guint timeout;
152 };
153
154 struct cache_data {
155         time_t inserted;
156         time_t valid_until;
157         time_t cache_until;
158         int timeout;
159         uint16_t type;
160         uint16_t answers;
161         unsigned int data_len;
162         unsigned char *data; /* contains DNS header + body */
163 };
164
165 struct cache_entry {
166         char *key;
167         bool want_refresh;
168         int hits;
169         struct cache_data *ipv4;
170         struct cache_data *ipv6;
171 };
172
173 struct domain_question {
174         uint16_t type;
175         uint16_t class;
176 } __attribute__ ((packed));
177
178 struct domain_rr {
179         uint16_t type;
180         uint16_t class;
181         uint32_t ttl;
182         uint16_t rdlen;
183 } __attribute__ ((packed));
184
185 /*
186  * Max length of the DNS TCP packet.
187  */
188 #define TCP_MAX_BUF_LEN 4096
189
190 /*
191  * We limit how long the cached DNS entry stays in the cache.
192  * By default the TTL (time-to-live) of the DNS response is used
193  * when setting the cache entry life time. The value is in seconds.
194  */
195 #define MAX_CACHE_TTL (60 * 30)
196 /*
197  * Also limit the other end, cache at least for 30 seconds.
198  */
199 #define MIN_CACHE_TTL (30)
200
201 /*
202  * We limit the cache size to some sane value so that cached data does
203  * not occupy too much memory. Each cached entry occupies on average
204  * about 100 bytes memory (depending on DNS name length).
205  * Example: caching www.connman.net uses 97 bytes memory.
206  * The value is the max amount of cached DNS responses (count).
207  */
208 #define MAX_CACHE_SIZE 256
209
210 static int cache_size;
211 static GHashTable *cache;
212 static int cache_refcount;
213 static GSList *server_list = NULL;
214 static GSList *request_list = NULL;
215 static GHashTable *listener_table = NULL;
216 static time_t next_refresh;
217 static GHashTable *partial_tcp_req_table;
218
219 static guint16 get_id(void)
220 {
221         return random();
222 }
223
224 static int protocol_offset(int protocol)
225 {
226         switch (protocol) {
227         case IPPROTO_UDP:
228                 return 0;
229
230         case IPPROTO_TCP:
231                 return 2;
232
233         default:
234                 return -EINVAL;
235         }
236
237 }
238
239 /*
240  * There is a power and efficiency benefit to have entries
241  * in our cache expire at the same time. To this extend,
242  * we round down the cache valid time to common boundaries.
243  */
244 static time_t round_down_ttl(time_t end_time, int ttl)
245 {
246         if (ttl < 15)
247                 return end_time;
248
249         /* Less than 5 minutes, round to 10 second boundary */
250         if (ttl < 300) {
251                 end_time = end_time / 10;
252                 end_time = end_time * 10;
253         } else { /* 5 or more minutes, round to 30 seconds */
254                 end_time = end_time / 30;
255                 end_time = end_time * 30;
256         }
257         return end_time;
258 }
259
260 static struct request_data *find_request(guint16 id)
261 {
262         GSList *list;
263
264         for (list = request_list; list; list = list->next) {
265                 struct request_data *req = list->data;
266
267                 if (req->dstid == id || req->altid == id)
268                         return req;
269         }
270
271         return NULL;
272 }
273
274 static struct server_data *find_server(int index,
275                                         const char *server,
276                                                 int protocol)
277 {
278         GSList *list;
279
280         DBG("index %d server %s proto %d", index, server, protocol);
281
282         for (list = server_list; list; list = list->next) {
283                 struct server_data *data = list->data;
284
285                 if (index < 0 && data->index < 0 &&
286                                 g_str_equal(data->server, server) &&
287                                 data->protocol == protocol)
288                         return data;
289
290                 if (index < 0 ||
291                                 data->index < 0 || !data->server)
292                         continue;
293
294                 if (data->index == index &&
295                                 g_str_equal(data->server, server) &&
296                                 data->protocol == protocol)
297                         return data;
298         }
299
300         return NULL;
301 }
302
303 /* we can keep using the same resolve's */
304 static GResolv *ipv4_resolve;
305 static GResolv *ipv6_resolve;
306
307 static void dummy_resolve_func(GResolvResultStatus status,
308                                         char **results, gpointer user_data)
309 {
310 }
311
312 /*
313  * Refresh a DNS entry, but also age the hit count a bit */
314 static void refresh_dns_entry(struct cache_entry *entry, char *name)
315 {
316         int age = 1;
317
318         if (!ipv4_resolve) {
319                 ipv4_resolve = g_resolv_new(0);
320                 g_resolv_set_address_family(ipv4_resolve, AF_INET);
321                 g_resolv_add_nameserver(ipv4_resolve, "127.0.0.1", 53, 0);
322         }
323
324         if (!ipv6_resolve) {
325                 ipv6_resolve = g_resolv_new(0);
326                 g_resolv_set_address_family(ipv6_resolve, AF_INET6);
327                 g_resolv_add_nameserver(ipv6_resolve, "::1", 53, 0);
328         }
329
330         if (!entry->ipv4) {
331                 DBG("Refresing A record for %s", name);
332                 g_resolv_lookup_hostname(ipv4_resolve, name,
333                                         dummy_resolve_func, NULL);
334                 age = 4;
335         }
336
337         if (!entry->ipv6) {
338                 DBG("Refresing AAAA record for %s", name);
339                 g_resolv_lookup_hostname(ipv6_resolve, name,
340                                         dummy_resolve_func, NULL);
341                 age = 4;
342         }
343
344         entry->hits -= age;
345         if (entry->hits < 0)
346                 entry->hits = 0;
347 }
348
349 static int dns_name_length(unsigned char *buf)
350 {
351         if ((buf[0] & NS_CMPRSFLGS) == NS_CMPRSFLGS) /* compressed name */
352                 return 2;
353         return strlen((char *)buf);
354 }
355
356 static void update_cached_ttl(unsigned char *buf, int len, int new_ttl)
357 {
358         unsigned char *c;
359         uint32_t *i;
360         uint16_t *w;
361         int l;
362
363         /* skip the header */
364         c = buf + 12;
365         len -= 12;
366
367         /* skip the query, which is a name and 2 16 bit words */
368         l = dns_name_length(c);
369         c += l;
370         len -= l;
371         c += 4;
372         len -= 4;
373
374         /* now we get the answer records */
375
376         while (len > 0) {
377                 /* first a name */
378                 l = dns_name_length(c);
379                 c += l;
380                 len -= l;
381                 if (len < 0)
382                         break;
383                 /* then type + class, 2 bytes each */
384                 c += 4;
385                 len -= 4;
386                 if (len < 0)
387                         break;
388
389                 /* now the 4 byte TTL field */
390                 i = (uint32_t *)c;
391                 *i = htonl(new_ttl);
392                 c += 4;
393                 len -= 4;
394                 if (len < 0)
395                         break;
396
397                 /* now the 2 byte rdlen field */
398                 w = (uint16_t *)c;
399                 c += ntohs(*w) + 2;
400                 len -= ntohs(*w) + 2;
401         }
402 }
403
404 static void send_cached_response(int sk, unsigned char *buf, int len,
405                                 const struct sockaddr *to, socklen_t tolen,
406                                 int protocol, int id, uint16_t answers, int ttl)
407 {
408         struct domain_hdr *hdr;
409         unsigned char *ptr = buf;
410         int err, offset, dns_len, adj_len = len - 2;
411
412         /*
413          * The cached packet contains always the TCP offset (two bytes)
414          * so skip them for UDP.
415          */
416         switch (protocol) {
417         case IPPROTO_UDP:
418                 ptr += 2;
419                 len -= 2;
420                 dns_len = len;
421                 offset = 0;
422                 break;
423         case IPPROTO_TCP:
424                 offset = 2;
425                 dns_len = ptr[0] * 256 + ptr[1];
426                 break;
427         default:
428                 return;
429         }
430
431         if (len < 12)
432                 return;
433
434         hdr = (void *) (ptr + offset);
435
436         hdr->id = id;
437         hdr->qr = 1;
438         hdr->rcode = 0;
439         hdr->ancount = htons(answers);
440         hdr->nscount = 0;
441         hdr->arcount = 0;
442
443         /* if this is a negative reply, we are authorative */
444         if (answers == 0)
445                 hdr->aa = 1;
446         else
447                 update_cached_ttl((unsigned char *)hdr, adj_len, ttl);
448
449         DBG("sk %d id 0x%04x answers %d ptr %p length %d dns %d",
450                 sk, hdr->id, answers, ptr, len, dns_len);
451
452         err = sendto(sk, ptr, len, MSG_NOSIGNAL, to, tolen);
453         if (err < 0) {
454                 connman_error("Cannot send cached DNS response: %s",
455                                 strerror(errno));
456                 return;
457         }
458
459         if (err != len || (dns_len != (len - 2) && protocol == IPPROTO_TCP) ||
460                                 (dns_len != len && protocol == IPPROTO_UDP))
461                 DBG("Packet length mismatch, sent %d wanted %d dns %d",
462                         err, len, dns_len);
463 }
464
465 static void send_response(int sk, unsigned char *buf, int len,
466                                 const struct sockaddr *to, socklen_t tolen,
467                                 int protocol)
468 {
469         struct domain_hdr *hdr;
470         int err, offset = protocol_offset(protocol);
471
472         DBG("sk %d", sk);
473
474         if (offset < 0)
475                 return;
476
477         if (len < 12)
478                 return;
479
480         hdr = (void *) (buf + offset);
481
482         DBG("id 0x%04x qr %d opcode %d", hdr->id, hdr->qr, hdr->opcode);
483
484         hdr->qr = 1;
485         hdr->rcode = 2;
486
487         hdr->ancount = 0;
488         hdr->nscount = 0;
489         hdr->arcount = 0;
490
491         err = sendto(sk, buf, len, MSG_NOSIGNAL, to, tolen);
492         if (err < 0) {
493                 connman_error("Failed to send DNS response to %d: %s",
494                                 sk, strerror(errno));
495                 return;
496         }
497 }
498
499 static int get_req_udp_socket(struct request_data *req)
500 {
501         GIOChannel *channel;
502
503         if (req->family == AF_INET)
504                 channel = req->ifdata->udp4_listener_channel;
505         else
506                 channel = req->ifdata->udp6_listener_channel;
507
508         if (!channel)
509                 return -1;
510
511         return g_io_channel_unix_get_fd(channel);
512 }
513
514 static void destroy_request_data(struct request_data *req)
515 {
516         if (req->timeout > 0)
517                 g_source_remove(req->timeout);
518
519         g_free(req->resp);
520         g_free(req->request);
521         g_free(req->name);
522         g_free(req);
523 }
524
525 static gboolean request_timeout(gpointer user_data)
526 {
527         struct request_data *req = user_data;
528
529         if (!req)
530                 return FALSE;
531
532         DBG("id 0x%04x", req->srcid);
533
534         request_list = g_slist_remove(request_list, req);
535         req->numserv--;
536
537         if (req->resplen > 0 && req->resp) {
538                 int sk, err;
539
540                 if (req->protocol == IPPROTO_UDP) {
541                         sk = get_req_udp_socket(req);
542                         if (sk < 0)
543                                 return FALSE;
544
545                         err = sendto(sk, req->resp, req->resplen, MSG_NOSIGNAL,
546                                 &req->sa, req->sa_len);
547                 } else {
548                         sk = req->client_sk;
549                         err = send(sk, req->resp, req->resplen, MSG_NOSIGNAL);
550                         if (err < 0)
551                                 close(sk);
552                 }
553                 if (err < 0)
554                         return FALSE;
555         } else if (req->request && req->numserv == 0) {
556                 struct domain_hdr *hdr;
557
558                 if (req->protocol == IPPROTO_TCP) {
559                         hdr = (void *) (req->request + 2);
560                         hdr->id = req->srcid;
561                         send_response(req->client_sk, req->request,
562                                 req->request_len, NULL, 0, IPPROTO_TCP);
563
564                 } else if (req->protocol == IPPROTO_UDP) {
565                         int sk;
566
567                         hdr = (void *) (req->request);
568                         hdr->id = req->srcid;
569
570                         sk = get_req_udp_socket(req);
571                         if (sk >= 0)
572                                 send_response(sk, req->request,
573                                         req->request_len, &req->sa,
574                                         req->sa_len, IPPROTO_UDP);
575                 }
576         }
577
578         /*
579          * We cannot leave TCP client hanging so just kick it out
580          * if we get a request timeout from server.
581          */
582         if (req->protocol == IPPROTO_TCP) {
583                 DBG("client %d removed", req->client_sk);
584                 g_hash_table_remove(partial_tcp_req_table,
585                                 GINT_TO_POINTER(req->client_sk));
586         }
587
588         req->timeout = 0;
589         destroy_request_data(req);
590
591         return FALSE;
592 }
593
594 static int append_query(unsigned char *buf, unsigned int size,
595                                 const char *query, const char *domain)
596 {
597         unsigned char *ptr = buf;
598         int len;
599
600         DBG("query %s domain %s", query, domain);
601
602         while (query) {
603                 const char *tmp;
604
605                 tmp = strchr(query, '.');
606                 if (!tmp) {
607                         len = strlen(query);
608                         if (len == 0)
609                                 break;
610                         *ptr = len;
611                         memcpy(ptr + 1, query, len);
612                         ptr += len + 1;
613                         break;
614                 }
615
616                 *ptr = tmp - query;
617                 memcpy(ptr + 1, query, tmp - query);
618                 ptr += tmp - query + 1;
619
620                 query = tmp + 1;
621         }
622
623         while (domain) {
624                 const char *tmp;
625
626                 tmp = strchr(domain, '.');
627                 if (!tmp) {
628                         len = strlen(domain);
629                         if (len == 0)
630                                 break;
631                         *ptr = len;
632                         memcpy(ptr + 1, domain, len);
633                         ptr += len + 1;
634                         break;
635                 }
636
637                 *ptr = tmp - domain;
638                 memcpy(ptr + 1, domain, tmp - domain);
639                 ptr += tmp - domain + 1;
640
641                 domain = tmp + 1;
642         }
643
644         *ptr++ = 0x00;
645
646         return ptr - buf;
647 }
648
649 static bool cache_check_is_valid(struct cache_data *data,
650                                 time_t current_time)
651 {
652         if (!data)
653                 return false;
654
655         if (data->cache_until < current_time)
656                 return false;
657
658         return true;
659 }
660
661 /*
662  * remove stale cached entries so that they can be refreshed
663  */
664 static void cache_enforce_validity(struct cache_entry *entry)
665 {
666         time_t current_time = time(NULL);
667
668         if (!cache_check_is_valid(entry->ipv4, current_time)
669                                                         && entry->ipv4) {
670                 DBG("cache timeout \"%s\" type A", entry->key);
671                 g_free(entry->ipv4->data);
672                 g_free(entry->ipv4);
673                 entry->ipv4 = NULL;
674
675         }
676
677         if (!cache_check_is_valid(entry->ipv6, current_time)
678                                                         && entry->ipv6) {
679                 DBG("cache timeout \"%s\" type AAAA", entry->key);
680                 g_free(entry->ipv6->data);
681                 g_free(entry->ipv6);
682                 entry->ipv6 = NULL;
683         }
684 }
685
686 static uint16_t cache_check_validity(char *question, uint16_t type,
687                                 struct cache_entry *entry)
688 {
689         time_t current_time = time(NULL);
690         bool want_refresh = false;
691
692         /*
693          * if we have a popular entry, we want a refresh instead of
694          * total destruction of the entry.
695          */
696         if (entry->hits > 2)
697                 want_refresh = true;
698
699         cache_enforce_validity(entry);
700
701         switch (type) {
702         case 1:         /* IPv4 */
703                 if (!cache_check_is_valid(entry->ipv4, current_time)) {
704                         DBG("cache %s \"%s\" type A", entry->ipv4 ?
705                                         "timeout" : "entry missing", question);
706
707                         if (want_refresh)
708                                 entry->want_refresh = true;
709
710                         /*
711                          * We do not remove cache entry if there is still
712                          * valid IPv6 entry found in the cache.
713                          */
714                         if (!cache_check_is_valid(entry->ipv6, current_time) && !want_refresh) {
715                                 g_hash_table_remove(cache, question);
716                                 type = 0;
717                         }
718                 }
719                 break;
720
721         case 28:        /* IPv6 */
722                 if (!cache_check_is_valid(entry->ipv6, current_time)) {
723                         DBG("cache %s \"%s\" type AAAA", entry->ipv6 ?
724                                         "timeout" : "entry missing", question);
725
726                         if (want_refresh)
727                                 entry->want_refresh = true;
728
729                         if (!cache_check_is_valid(entry->ipv4, current_time) && !want_refresh) {
730                                 g_hash_table_remove(cache, question);
731                                 type = 0;
732                         }
733                 }
734                 break;
735         }
736
737         return type;
738 }
739
740 static void cache_element_destroy(gpointer value)
741 {
742         struct cache_entry *entry = value;
743
744         if (!entry)
745                 return;
746
747         if (entry->ipv4) {
748                 g_free(entry->ipv4->data);
749                 g_free(entry->ipv4);
750         }
751
752         if (entry->ipv6) {
753                 g_free(entry->ipv6->data);
754                 g_free(entry->ipv6);
755         }
756
757         g_free(entry->key);
758         g_free(entry);
759
760         if (--cache_size < 0)
761                 cache_size = 0;
762 }
763
764 static gboolean try_remove_cache(gpointer user_data)
765 {
766         if (__sync_fetch_and_sub(&cache_refcount, 1) == 1) {
767                 DBG("No cache users, removing it.");
768
769                 g_hash_table_destroy(cache);
770                 cache = NULL;
771         }
772
773         return FALSE;
774 }
775
776 static void create_cache(void)
777 {
778         if (__sync_fetch_and_add(&cache_refcount, 1) == 0)
779                 cache = g_hash_table_new_full(g_str_hash,
780                                         g_str_equal,
781                                         NULL,
782                                         cache_element_destroy);
783 }
784
785 static struct cache_entry *cache_check(gpointer request, int *qtype, int proto)
786 {
787         char *question;
788         struct cache_entry *entry;
789         struct domain_question *q;
790         uint16_t type;
791         int offset, proto_offset;
792
793         if (!request)
794                 return NULL;
795
796         proto_offset = protocol_offset(proto);
797         if (proto_offset < 0)
798                 return NULL;
799
800         question = request + proto_offset + 12;
801
802         offset = strlen(question) + 1;
803         q = (void *) (question + offset);
804         type = ntohs(q->type);
805
806         /* We only cache either A (1) or AAAA (28) requests */
807         if (type != 1 && type != 28)
808                 return NULL;
809
810         if (!cache) {
811                 create_cache();
812                 return NULL;
813         }
814
815         entry = g_hash_table_lookup(cache, question);
816         if (!entry)
817                 return NULL;
818
819         type = cache_check_validity(question, type, entry);
820         if (type == 0)
821                 return NULL;
822
823         *qtype = type;
824         return entry;
825 }
826
827 /*
828  * Get a label/name from DNS resource record. The function decompresses the
829  * label if necessary. The function does not convert the name to presentation
830  * form. This means that the result string will contain label lengths instead
831  * of dots between labels. We intentionally do not want to convert to dotted
832  * format so that we can cache the wire format string directly.
833  */
834 static int get_name(int counter,
835                 unsigned char *pkt, unsigned char *start, unsigned char *max,
836                 unsigned char *output, int output_max, int *output_len,
837                 unsigned char **end, char *name, int *name_len)
838 {
839         unsigned char *p;
840
841         /* Limit recursion to 10 (this means up to 10 labels in domain name) */
842         if (counter > 10)
843                 return -EINVAL;
844
845         p = start;
846         while (*p) {
847                 if ((*p & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
848                         uint16_t offset = (*p & 0x3F) * 256 + *(p + 1);
849
850                         if (offset >= max - pkt)
851                                 return -ENOBUFS;
852
853                         if (!*end)
854                                 *end = p + 2;
855
856                         return get_name(counter + 1, pkt, pkt + offset, max,
857                                         output, output_max, output_len, end,
858                                         name, name_len);
859                 } else {
860                         unsigned label_len = *p;
861
862                         if (pkt + label_len > max)
863                                 return -ENOBUFS;
864
865                         if (*output_len > output_max)
866                                 return -ENOBUFS;
867
868                         /*
869                          * We need the original name in order to check
870                          * if this answer is the correct one.
871                          */
872                         name[(*name_len)++] = label_len;
873                         memcpy(name + *name_len, p + 1, label_len + 1);
874                         *name_len += label_len;
875
876                         /* We compress the result */
877                         output[0] = NS_CMPRSFLGS;
878                         output[1] = 0x0C;
879                         *output_len = 2;
880
881                         p += label_len + 1;
882
883                         if (!*end)
884                                 *end = p;
885
886                         if (p >= max)
887                                 return -ENOBUFS;
888                 }
889         }
890
891         return 0;
892 }
893
894 static int parse_rr(unsigned char *buf, unsigned char *start,
895                         unsigned char *max,
896                         unsigned char *response, unsigned int *response_size,
897                         uint16_t *type, uint16_t *class, int *ttl, int *rdlen,
898                         unsigned char **end,
899                         char *name)
900 {
901         struct domain_rr *rr;
902         int err, offset;
903         int name_len = 0, output_len = 0, max_rsp = *response_size;
904
905         err = get_name(0, buf, start, max, response, max_rsp,
906                 &output_len, end, name, &name_len);
907         if (err < 0)
908                 return err;
909
910         offset = output_len;
911
912         if ((unsigned int) offset > *response_size)
913                 return -ENOBUFS;
914
915         rr = (void *) (*end);
916
917         if (!rr)
918                 return -EINVAL;
919
920         *type = ntohs(rr->type);
921         *class = ntohs(rr->class);
922         *ttl = ntohl(rr->ttl);
923         *rdlen = ntohs(rr->rdlen);
924
925         if (*ttl < 0)
926                 return -EINVAL;
927
928         memcpy(response + offset, *end, sizeof(struct domain_rr));
929
930         offset += sizeof(struct domain_rr);
931         *end += sizeof(struct domain_rr);
932
933         if ((unsigned int) (offset + *rdlen) > *response_size)
934                 return -ENOBUFS;
935
936         memcpy(response + offset, *end, *rdlen);
937
938         *end += *rdlen;
939
940         *response_size = offset + *rdlen;
941
942         return 0;
943 }
944
945 static bool check_alias(GSList *aliases, char *name)
946 {
947         GSList *list;
948
949         if (aliases) {
950                 for (list = aliases; list; list = list->next) {
951                         int len = strlen((char *)list->data);
952                         if (strncmp((char *)list->data, name, len) == 0)
953                                 return true;
954                 }
955         }
956
957         return false;
958 }
959
960 static int parse_response(unsigned char *buf, int buflen,
961                         char *question, int qlen,
962                         uint16_t *type, uint16_t *class, int *ttl,
963                         unsigned char *response, unsigned int *response_len,
964                         uint16_t *answers)
965 {
966         struct domain_hdr *hdr = (void *) buf;
967         struct domain_question *q;
968         unsigned char *ptr;
969         uint16_t qdcount = ntohs(hdr->qdcount);
970         uint16_t ancount = ntohs(hdr->ancount);
971         int err, i;
972         uint16_t qtype, qclass;
973         unsigned char *next = NULL;
974         unsigned int maxlen = *response_len;
975         GSList *aliases = NULL, *list;
976         char name[NS_MAXDNAME + 1];
977
978         if (buflen < 12)
979                 return -EINVAL;
980
981         DBG("qr %d qdcount %d", hdr->qr, qdcount);
982
983         /* We currently only cache responses where question count is 1 */
984         if (hdr->qr != 1 || qdcount != 1)
985                 return -EINVAL;
986
987         ptr = buf + sizeof(struct domain_hdr);
988
989         strncpy(question, (char *) ptr, qlen);
990         qlen = strlen(question);
991         ptr += qlen + 1; /* skip \0 */
992
993         q = (void *) ptr;
994         qtype = ntohs(q->type);
995
996         /* We cache only A and AAAA records */
997         if (qtype != 1 && qtype != 28)
998                 return -ENOMSG;
999
1000         qclass = ntohs(q->class);
1001
1002         ptr += 2 + 2; /* ptr points now to answers */
1003
1004         err = -ENOMSG;
1005         *response_len = 0;
1006         *answers = 0;
1007
1008         memset(name, 0, sizeof(name));
1009
1010         /*
1011          * We have a bunch of answers (like A, AAAA, CNAME etc) to
1012          * A or AAAA question. We traverse the answers and parse the
1013          * resource records. Only A and AAAA records are cached, all
1014          * the other records in answers are skipped.
1015          */
1016         for (i = 0; i < ancount; i++) {
1017                 /*
1018                  * Get one address at a time to this buffer.
1019                  * The max size of the answer is
1020                  *   2 (pointer) + 2 (type) + 2 (class) +
1021                  *   4 (ttl) + 2 (rdlen) + addr (16 or 4) = 28
1022                  * for A or AAAA record.
1023                  * For CNAME the size can be bigger.
1024                  */
1025                 unsigned char rsp[NS_MAXCDNAME];
1026                 unsigned int rsp_len = sizeof(rsp) - 1;
1027                 int ret, rdlen;
1028
1029                 memset(rsp, 0, sizeof(rsp));
1030
1031                 ret = parse_rr(buf, ptr, buf + buflen, rsp, &rsp_len,
1032                         type, class, ttl, &rdlen, &next, name);
1033                 if (ret != 0) {
1034                         err = ret;
1035                         goto out;
1036                 }
1037
1038                 /*
1039                  * Now rsp contains compressed or uncompressed resource
1040                  * record. Next we check if this record answers the question.
1041                  * The name var contains the uncompressed label.
1042                  * One tricky bit is the CNAME records as they alias
1043                  * the name we might be interested in.
1044                  */
1045
1046                 /*
1047                  * Go to next answer if the class is not the one we are
1048                  * looking for.
1049                  */
1050                 if (*class != qclass) {
1051                         ptr = next;
1052                         next = NULL;
1053                         continue;
1054                 }
1055
1056                 /*
1057                  * Try to resolve aliases also, type is CNAME(5).
1058                  * This is important as otherwise the aliased names would not
1059                  * be cached at all as the cache would not contain the aliased
1060                  * question.
1061                  *
1062                  * If any CNAME is found in DNS packet, then we cache the alias
1063                  * IP address instead of the question (as the server
1064                  * said that question has only an alias).
1065                  * This means in practice that if e.g., ipv6.google.com is
1066                  * queried, DNS server returns CNAME of that name which is
1067                  * ipv6.l.google.com. We then cache the address of the CNAME
1068                  * but return the question name to client. So the alias
1069                  * status of the name is not saved in cache and thus not
1070                  * returned to the client. We do not return DNS packets from
1071                  * cache to client saying that ipv6.google.com is an alias to
1072                  * ipv6.l.google.com but we return instead a DNS packet that
1073                  * says ipv6.google.com has address xxx which is in fact the
1074                  * address of ipv6.l.google.com. For caching purposes this
1075                  * should not cause any issues.
1076                  */
1077                 if (*type == 5 && strncmp(question, name, qlen) == 0) {
1078                         /*
1079                          * So now the alias answered the question. This is
1080                          * not very useful from caching point of view as
1081                          * the following A or AAAA records will not match the
1082                          * question. We need to find the real A/AAAA record
1083                          * of the alias and cache that.
1084                          */
1085                         unsigned char *end = NULL;
1086                         int name_len = 0, output_len = 0;
1087
1088                         memset(rsp, 0, sizeof(rsp));
1089                         rsp_len = sizeof(rsp) - 1;
1090
1091                         /*
1092                          * Alias is in rdata part of the message,
1093                          * and next-rdlen points to it. So we need to get
1094                          * the real name of the alias.
1095                          */
1096                         ret = get_name(0, buf, next - rdlen, buf + buflen,
1097                                         rsp, rsp_len, &output_len, &end,
1098                                         name, &name_len);
1099                         if (ret != 0) {
1100                                 /* just ignore the error at this point */
1101                                 ptr = next;
1102                                 next = NULL;
1103                                 continue;
1104                         }
1105
1106                         /*
1107                          * We should now have the alias of the entry we might
1108                          * want to cache. Just remember it for a while.
1109                          * We check the alias list when we have parsed the
1110                          * A or AAAA record.
1111                          */
1112                         aliases = g_slist_prepend(aliases, g_strdup(name));
1113
1114                         ptr = next;
1115                         next = NULL;
1116                         continue;
1117                 }
1118
1119                 if (*type == qtype) {
1120                         /*
1121                          * We found correct type (A or AAAA)
1122                          */
1123                         if (check_alias(aliases, name) ||
1124                                 (!aliases && strncmp(question, name,
1125                                                         qlen) == 0)) {
1126                                 /*
1127                                  * We found an alias or the name of the rr
1128                                  * matches the question. If so, we append
1129                                  * the compressed label to the cache.
1130                                  * The end result is a response buffer that
1131                                  * will contain one or more cached and
1132                                  * compressed resource records.
1133                                  */
1134                                 if (*response_len + rsp_len > maxlen) {
1135                                         err = -ENOBUFS;
1136                                         goto out;
1137                                 }
1138                                 memcpy(response + *response_len, rsp, rsp_len);
1139                                 *response_len += rsp_len;
1140                                 (*answers)++;
1141                                 err = 0;
1142                         }
1143                 }
1144
1145                 ptr = next;
1146                 next = NULL;
1147         }
1148
1149 out:
1150         for (list = aliases; list; list = list->next)
1151                 g_free(list->data);
1152         g_slist_free(aliases);
1153
1154         return err;
1155 }
1156
1157 struct cache_timeout {
1158         time_t current_time;
1159         int max_timeout;
1160         int try_harder;
1161 };
1162
1163 static gboolean cache_check_entry(gpointer key, gpointer value,
1164                                         gpointer user_data)
1165 {
1166         struct cache_timeout *data = user_data;
1167         struct cache_entry *entry = value;
1168         int max_timeout;
1169
1170         /* Scale the number of hits by half as part of cache aging */
1171
1172         entry->hits /= 2;
1173
1174         /*
1175          * If either IPv4 or IPv6 cached entry has expired, we
1176          * remove both from the cache.
1177          */
1178
1179         if (entry->ipv4 && entry->ipv4->timeout > 0) {
1180                 max_timeout = entry->ipv4->cache_until;
1181                 if (max_timeout > data->max_timeout)
1182                         data->max_timeout = max_timeout;
1183
1184                 if (entry->ipv4->cache_until < data->current_time)
1185                         return TRUE;
1186         }
1187
1188         if (entry->ipv6 && entry->ipv6->timeout > 0) {
1189                 max_timeout = entry->ipv6->cache_until;
1190                 if (max_timeout > data->max_timeout)
1191                         data->max_timeout = max_timeout;
1192
1193                 if (entry->ipv6->cache_until < data->current_time)
1194                         return TRUE;
1195         }
1196
1197         /*
1198          * if we're asked to try harder, also remove entries that have
1199          * few hits
1200          */
1201         if (data->try_harder && entry->hits < 4)
1202                 return TRUE;
1203
1204         return FALSE;
1205 }
1206
1207 static void cache_cleanup(void)
1208 {
1209         static int max_timeout;
1210         struct cache_timeout data;
1211         int count = 0;
1212
1213         data.current_time = time(NULL);
1214         data.max_timeout = 0;
1215         data.try_harder = 0;
1216
1217         /*
1218          * In the first pass, we only remove entries that have timed out.
1219          * We use a cache of the first time to expire to do this only
1220          * when it makes sense.
1221          */
1222         if (max_timeout <= data.current_time) {
1223                 count = g_hash_table_foreach_remove(cache, cache_check_entry,
1224                                                 &data);
1225         }
1226         DBG("removed %d in the first pass", count);
1227
1228         /*
1229          * In the second pass, if the first pass turned up blank,
1230          * we also expire entries with a low hit count,
1231          * while aging the hit count at the same time.
1232          */
1233         data.try_harder = 1;
1234         if (count == 0)
1235                 count = g_hash_table_foreach_remove(cache, cache_check_entry,
1236                                                 &data);
1237
1238         if (count == 0)
1239                 /*
1240                  * If we could not remove anything, then remember
1241                  * what is the max timeout and do nothing if we
1242                  * have not yet reached it. This will prevent
1243                  * constant traversal of the cache if it is full.
1244                  */
1245                 max_timeout = data.max_timeout;
1246         else
1247                 max_timeout = 0;
1248 }
1249
1250 static gboolean cache_invalidate_entry(gpointer key, gpointer value,
1251                                         gpointer user_data)
1252 {
1253         struct cache_entry *entry = value;
1254
1255         /* first, delete any expired elements */
1256         cache_enforce_validity(entry);
1257
1258         /* if anything is not expired, mark the entry for refresh */
1259         if (entry->hits > 0 && (entry->ipv4 || entry->ipv6))
1260                 entry->want_refresh = true;
1261
1262         /* delete the cached data */
1263         if (entry->ipv4) {
1264                 g_free(entry->ipv4->data);
1265                 g_free(entry->ipv4);
1266                 entry->ipv4 = NULL;
1267         }
1268
1269         if (entry->ipv6) {
1270                 g_free(entry->ipv6->data);
1271                 g_free(entry->ipv6);
1272                 entry->ipv6 = NULL;
1273         }
1274
1275         /* keep the entry if we want it refreshed, delete it otherwise */
1276         if (entry->want_refresh)
1277                 return FALSE;
1278         else
1279                 return TRUE;
1280 }
1281
1282 /*
1283  * cache_invalidate is called from places where the DNS landscape
1284  * has changed, say because connections are added or we entered a VPN.
1285  * The logic is to wipe all cache data, but mark all non-expired
1286  * parts of the cache for refresh rather than deleting the whole cache.
1287  */
1288 static void cache_invalidate(void)
1289 {
1290         DBG("Invalidating the DNS cache %p", cache);
1291
1292         if (!cache)
1293                 return;
1294
1295         g_hash_table_foreach_remove(cache, cache_invalidate_entry, NULL);
1296 }
1297
1298 static void cache_refresh_entry(struct cache_entry *entry)
1299 {
1300
1301         cache_enforce_validity(entry);
1302
1303         if (entry->hits > 2 && !entry->ipv4)
1304                 entry->want_refresh = true;
1305         if (entry->hits > 2 && !entry->ipv6)
1306                 entry->want_refresh = true;
1307
1308         if (entry->want_refresh) {
1309                 char *c;
1310                 char dns_name[NS_MAXDNAME + 1];
1311                 entry->want_refresh = false;
1312
1313                 /* turn a DNS name into a hostname with dots */
1314                 strncpy(dns_name, entry->key, NS_MAXDNAME);
1315                 c = dns_name;
1316                 while (c && *c) {
1317                         int jump;
1318                         jump = *c;
1319                         *c = '.';
1320                         c += jump + 1;
1321                 }
1322                 DBG("Refreshing %s\n", dns_name);
1323                 /* then refresh the hostname */
1324                 refresh_dns_entry(entry, &dns_name[1]);
1325         }
1326 }
1327
1328 static void cache_refresh_iterator(gpointer key, gpointer value,
1329                                         gpointer user_data)
1330 {
1331         struct cache_entry *entry = value;
1332
1333         cache_refresh_entry(entry);
1334 }
1335
1336 static void cache_refresh(void)
1337 {
1338         if (!cache)
1339                 return;
1340
1341         g_hash_table_foreach(cache, cache_refresh_iterator, NULL);
1342 }
1343
1344 static int reply_query_type(unsigned char *msg, int len)
1345 {
1346         unsigned char *c;
1347         uint16_t *w;
1348         int l;
1349         int type;
1350
1351         /* skip the header */
1352         c = msg + sizeof(struct domain_hdr);
1353         len -= sizeof(struct domain_hdr);
1354
1355         if (len < 0)
1356                 return 0;
1357
1358         /* now the query, which is a name and 2 16 bit words */
1359         l = dns_name_length(c) + 1;
1360         c += l;
1361         w = (uint16_t *) c;
1362         type = ntohs(*w);
1363
1364         return type;
1365 }
1366
1367 static int cache_update(struct server_data *srv, unsigned char *msg,
1368                         unsigned int msg_len)
1369 {
1370         int offset = protocol_offset(srv->protocol);
1371         int err, qlen, ttl = 0;
1372         uint16_t answers = 0, type = 0, class = 0;
1373         struct domain_hdr *hdr = (void *)(msg + offset);
1374         struct domain_question *q;
1375         struct cache_entry *entry;
1376         struct cache_data *data;
1377         char question[NS_MAXDNAME + 1];
1378         unsigned char response[NS_MAXDNAME + 1];
1379         unsigned char *ptr;
1380         unsigned int rsplen;
1381         bool new_entry = true;
1382         time_t current_time;
1383
1384         if (cache_size >= MAX_CACHE_SIZE) {
1385                 cache_cleanup();
1386                 if (cache_size >= MAX_CACHE_SIZE)
1387                         return 0;
1388         }
1389
1390         current_time = time(NULL);
1391
1392         /* don't do a cache refresh more than twice a minute */
1393         if (next_refresh < current_time) {
1394                 cache_refresh();
1395                 next_refresh = current_time + 30;
1396         }
1397
1398         if (offset < 0)
1399                 return 0;
1400
1401         DBG("offset %d hdr %p msg %p rcode %d", offset, hdr, msg, hdr->rcode);
1402
1403         /* Continue only if response code is 0 (=ok) */
1404         if (hdr->rcode != 0)
1405                 return 0;
1406
1407         if (!cache)
1408                 create_cache();
1409
1410         rsplen = sizeof(response) - 1;
1411         question[sizeof(question) - 1] = '\0';
1412
1413         err = parse_response(msg + offset, msg_len - offset,
1414                                 question, sizeof(question) - 1,
1415                                 &type, &class, &ttl,
1416                                 response, &rsplen, &answers);
1417
1418         /*
1419          * special case: if we do a ipv6 lookup and get no result
1420          * for a record that's already in our ipv4 cache.. we want
1421          * to cache the negative response.
1422          */
1423         if ((err == -ENOMSG || err == -ENOBUFS) &&
1424                         reply_query_type(msg + offset,
1425                                         msg_len - offset) == 28) {
1426                 entry = g_hash_table_lookup(cache, question);
1427                 if (entry && entry->ipv4 && !entry->ipv6) {
1428                         int cache_offset = 0;
1429
1430                         data = g_try_new(struct cache_data, 1);
1431                         if (!data)
1432                                 return -ENOMEM;
1433                         data->inserted = entry->ipv4->inserted;
1434                         data->type = type;
1435                         data->answers = ntohs(hdr->ancount);
1436                         data->timeout = entry->ipv4->timeout;
1437                         if (srv->protocol == IPPROTO_UDP)
1438                                 cache_offset = 2;
1439                         data->data_len = msg_len + cache_offset;
1440                         data->data = ptr = g_malloc(data->data_len);
1441                         ptr[0] = (data->data_len - 2) / 256;
1442                         ptr[1] = (data->data_len - 2) - ptr[0] * 256;
1443                         if (srv->protocol == IPPROTO_UDP)
1444                                 ptr += 2;
1445                         data->valid_until = entry->ipv4->valid_until;
1446                         data->cache_until = entry->ipv4->cache_until;
1447                         memcpy(ptr, msg, msg_len);
1448                         entry->ipv6 = data;
1449                         /*
1450                          * we will get a "hit" when we serve the response
1451                          * out of the cache
1452                          */
1453                         entry->hits--;
1454                         if (entry->hits < 0)
1455                                 entry->hits = 0;
1456                         return 0;
1457                 }
1458         }
1459
1460         if (err < 0 || ttl == 0)
1461                 return 0;
1462
1463         qlen = strlen(question);
1464
1465         /*
1466          * If the cache contains already data, check if the
1467          * type of the cached data is the same and do not add
1468          * to cache if data is already there.
1469          * This is needed so that we can cache both A and AAAA
1470          * records for the same name.
1471          */
1472         entry = g_hash_table_lookup(cache, question);
1473         if (!entry) {
1474                 entry = g_try_new(struct cache_entry, 1);
1475                 if (!entry)
1476                         return -ENOMEM;
1477
1478                 data = g_try_new(struct cache_data, 1);
1479                 if (!data) {
1480                         g_free(entry);
1481                         return -ENOMEM;
1482                 }
1483
1484                 entry->key = g_strdup(question);
1485                 entry->ipv4 = entry->ipv6 = NULL;
1486                 entry->want_refresh = false;
1487                 entry->hits = 0;
1488
1489                 if (type == 1)
1490                         entry->ipv4 = data;
1491                 else
1492                         entry->ipv6 = data;
1493         } else {
1494                 if (type == 1 && entry->ipv4)
1495                         return 0;
1496
1497                 if (type == 28 && entry->ipv6)
1498                         return 0;
1499
1500                 data = g_try_new(struct cache_data, 1);
1501                 if (!data)
1502                         return -ENOMEM;
1503
1504                 if (type == 1)
1505                         entry->ipv4 = data;
1506                 else
1507                         entry->ipv6 = data;
1508
1509                 /*
1510                  * compensate for the hit we'll get for serving
1511                  * the response out of the cache
1512                  */
1513                 entry->hits--;
1514                 if (entry->hits < 0)
1515                         entry->hits = 0;
1516
1517                 new_entry = false;
1518         }
1519
1520         if (ttl < MIN_CACHE_TTL)
1521                 ttl = MIN_CACHE_TTL;
1522
1523         data->inserted = current_time;
1524         data->type = type;
1525         data->answers = answers;
1526         data->timeout = ttl;
1527         /*
1528          * The "2" in start of the length is the TCP offset. We allocate it
1529          * here even for UDP packet because it simplifies the sending
1530          * of cached packet.
1531          */
1532         data->data_len = 2 + 12 + qlen + 1 + 2 + 2 + rsplen;
1533         data->data = ptr = g_malloc(data->data_len);
1534         data->valid_until = current_time + ttl;
1535
1536         /*
1537          * Restrict the cached DNS record TTL to some sane value
1538          * in order to prevent data staying in the cache too long.
1539          */
1540         if (ttl > MAX_CACHE_TTL)
1541                 ttl = MAX_CACHE_TTL;
1542
1543         data->cache_until = round_down_ttl(current_time + ttl, ttl);
1544
1545         if (!data->data) {
1546                 g_free(entry->key);
1547                 g_free(data);
1548                 g_free(entry);
1549                 return -ENOMEM;
1550         }
1551
1552         /*
1553          * We cache the two extra bytes at the start of the message
1554          * in a TCP packet. When sending UDP packet, we skip the first
1555          * two bytes. This way we do not need to know the format
1556          * (UDP/TCP) of the cached message.
1557          */
1558         if (srv->protocol == IPPROTO_UDP)
1559                 memcpy(ptr + 2, msg, offset + 12);
1560         else
1561                 memcpy(ptr, msg, offset + 12);
1562
1563         ptr[0] = (data->data_len - 2) / 256;
1564         ptr[1] = (data->data_len - 2) - ptr[0] * 256;
1565         if (srv->protocol == IPPROTO_UDP)
1566                 ptr += 2;
1567
1568         memcpy(ptr + offset + 12, question, qlen + 1); /* copy also the \0 */
1569
1570         q = (void *) (ptr + offset + 12 + qlen + 1);
1571         q->type = htons(type);
1572         q->class = htons(class);
1573         memcpy(ptr + offset + 12 + qlen + 1 + sizeof(struct domain_question),
1574                 response, rsplen);
1575
1576         if (new_entry) {
1577                 g_hash_table_replace(cache, entry->key, entry);
1578                 cache_size++;
1579         }
1580
1581         DBG("cache %d %squestion \"%s\" type %d ttl %d size %zd packet %u "
1582                                                                 "dns len %u",
1583                 cache_size, new_entry ? "new " : "old ",
1584                 question, type, ttl,
1585                 sizeof(*entry) + sizeof(*data) + data->data_len + qlen,
1586                 data->data_len,
1587                 srv->protocol == IPPROTO_TCP ?
1588                         (unsigned int)(data->data[0] * 256 + data->data[1]) :
1589                         data->data_len);
1590
1591         return 0;
1592 }
1593
1594 static int ns_resolv(struct server_data *server, struct request_data *req,
1595                                 gpointer request, gpointer name)
1596 {
1597         GList *list;
1598         int sk, err, type = 0;
1599         char *dot, *lookup = (char *) name;
1600         struct cache_entry *entry;
1601
1602         entry = cache_check(request, &type, req->protocol);
1603         if (entry) {
1604                 int ttl_left = 0;
1605                 struct cache_data *data;
1606
1607                 DBG("cache hit %s type %s", lookup, type == 1 ? "A" : "AAAA");
1608                 if (type == 1)
1609                         data = entry->ipv4;
1610                 else
1611                         data = entry->ipv6;
1612
1613                 if (data) {
1614                         ttl_left = data->valid_until - time(NULL);
1615                         entry->hits++;
1616                 }
1617
1618                 if (data && req->protocol == IPPROTO_TCP) {
1619                         send_cached_response(req->client_sk, data->data,
1620                                         data->data_len, NULL, 0, IPPROTO_TCP,
1621                                         req->srcid, data->answers, ttl_left);
1622                         return 1;
1623                 }
1624
1625                 if (data && req->protocol == IPPROTO_UDP) {
1626                         int udp_sk = get_req_udp_socket(req);
1627
1628                         if (udp_sk < 0)
1629                                 return -EIO;
1630
1631                         send_cached_response(udp_sk, data->data,
1632                                 data->data_len, &req->sa, req->sa_len,
1633                                 IPPROTO_UDP, req->srcid, data->answers,
1634                                 ttl_left);
1635                         return 1;
1636                 }
1637         }
1638
1639         sk = g_io_channel_unix_get_fd(server->channel);
1640
1641         err = sendto(sk, request, req->request_len, MSG_NOSIGNAL,
1642                         server->server_addr, server->server_addr_len);
1643         if (err < 0) {
1644                 DBG("Cannot send message to server %s sock %d "
1645                         "protocol %d (%s/%d)",
1646                         server->server, sk, server->protocol,
1647                         strerror(errno), errno);
1648                 return -EIO;
1649         }
1650
1651         req->numserv++;
1652
1653         /* If we have more than one dot, we don't add domains */
1654         dot = strchr(lookup, '.');
1655         if (dot && dot != lookup + strlen(lookup) - 1)
1656                 return 0;
1657
1658         if (server->domains && server->domains->data)
1659                 req->append_domain = true;
1660
1661         for (list = server->domains; list; list = list->next) {
1662                 char *domain;
1663                 unsigned char alt[1024];
1664                 struct domain_hdr *hdr = (void *) &alt;
1665                 int altlen, domlen, offset;
1666
1667                 domain = list->data;
1668
1669                 if (!domain)
1670                         continue;
1671
1672                 offset = protocol_offset(server->protocol);
1673                 if (offset < 0)
1674                         return offset;
1675
1676                 domlen = strlen(domain) + 1;
1677                 if (domlen < 5)
1678                         return -EINVAL;
1679
1680                 alt[offset] = req->altid & 0xff;
1681                 alt[offset + 1] = req->altid >> 8;
1682
1683                 memcpy(alt + offset + 2, request + offset + 2, 10);
1684                 hdr->qdcount = htons(1);
1685
1686                 altlen = append_query(alt + offset + 12, sizeof(alt) - 12,
1687                                         name, domain);
1688                 if (altlen < 0)
1689                         return -EINVAL;
1690
1691                 altlen += 12;
1692
1693                 memcpy(alt + offset + altlen,
1694                         request + offset + altlen - domlen,
1695                                 req->request_len - altlen - offset + domlen);
1696
1697                 if (server->protocol == IPPROTO_TCP) {
1698                         int req_len = req->request_len + domlen - 2;
1699
1700                         alt[0] = (req_len >> 8) & 0xff;
1701                         alt[1] = req_len & 0xff;
1702                 }
1703
1704                 DBG("req %p dstid 0x%04x altid 0x%04x", req, req->dstid,
1705                                 req->altid);
1706
1707                 err = send(sk, alt, req->request_len + domlen, MSG_NOSIGNAL);
1708                 if (err < 0)
1709                         return -EIO;
1710
1711                 req->numserv++;
1712         }
1713
1714         return 0;
1715 }
1716
1717 static char *convert_label(char *start, char *end, char *ptr, char *uptr,
1718                         int remaining_len, int *used_comp, int *used_uncomp)
1719 {
1720         int pos, comp_pos;
1721         char name[NS_MAXLABEL];
1722
1723         pos = dn_expand((u_char *)start, (u_char *)end, (u_char *)ptr,
1724                         name, NS_MAXLABEL);
1725         if (pos < 0) {
1726                 DBG("uncompress error [%d/%s]", errno, strerror(errno));
1727                 goto out;
1728         }
1729
1730         /*
1731          * We need to compress back the name so that we get back to internal
1732          * label presentation.
1733          */
1734         comp_pos = dn_comp(name, (u_char *)uptr, remaining_len, NULL, NULL);
1735         if (comp_pos < 0) {
1736                 DBG("compress error [%d/%s]", errno, strerror(errno));
1737                 goto out;
1738         }
1739
1740         *used_comp = pos;
1741         *used_uncomp = comp_pos;
1742
1743         return ptr;
1744
1745 out:
1746         return NULL;
1747 }
1748
1749 static char *uncompress(int16_t field_count, char *start, char *end,
1750                         char *ptr, char *uncompressed, int uncomp_len,
1751                         char **uncompressed_ptr)
1752 {
1753         char *uptr = *uncompressed_ptr; /* position in result buffer */
1754
1755         DBG("count %d ptr %p end %p uptr %p", field_count, ptr, end, uptr);
1756
1757         while (field_count-- > 0 && ptr < end) {
1758                 int dlen;               /* data field length */
1759                 int ulen;               /* uncompress length */
1760                 int pos;                /* position in compressed string */
1761                 char name[NS_MAXLABEL]; /* tmp label */
1762                 uint16_t dns_type, dns_class;
1763
1764                 pos = dn_expand((const u_char *)start, (u_char *)end,
1765                                 (u_char *)ptr, name, NS_MAXLABEL);
1766                 if (pos < 0) {
1767                         DBG("uncompress error [%d/%s]", errno,
1768                                 strerror(errno));
1769                         goto out;
1770                 }
1771
1772                 /*
1773                  * Copy the uncompressed resource record, type, class and \0 to
1774                  * tmp buffer.
1775                  */
1776
1777                 ulen = strlen(name);
1778                 *uptr++ = ulen;
1779                 strncpy(uptr, name, uncomp_len - (uptr - uncompressed));
1780
1781                 DBG("pos %d ulen %d left %d name %s", pos, ulen,
1782                         (int)(uncomp_len - (uptr - uncompressed)), uptr);
1783
1784                 uptr += ulen;
1785                 *uptr++ = '\0';
1786
1787                 ptr += pos;
1788
1789                 /*
1790                  * We copy also the fixed portion of the result (type, class,
1791                  * ttl, address length and the address)
1792                  */
1793                 memcpy(uptr, ptr, NS_RRFIXEDSZ);
1794
1795                 dns_type = uptr[0] << 8 | uptr[1];
1796                 dns_class = uptr[2] << 8 | uptr[3];
1797
1798                 if (dns_class != ns_c_in)
1799                         goto out;
1800
1801                 ptr += NS_RRFIXEDSZ;
1802                 uptr += NS_RRFIXEDSZ;
1803
1804                 /*
1805                  * Then the variable portion of the result (data length).
1806                  * Typically this portion is also compressed
1807                  * so we need to uncompress it also when necessary.
1808                  */
1809                 if (dns_type == ns_t_cname) {
1810                         int comp_pos;
1811
1812                         if (!convert_label(start, end, ptr, uptr,
1813                                         uncomp_len - (uptr - uncompressed),
1814                                                 &pos, &comp_pos))
1815                                 goto out;
1816
1817                         uptr[-2] = comp_pos << 8;
1818                         uptr[-1] = comp_pos & 0xff;
1819
1820                         uptr += comp_pos;
1821                         ptr += pos;
1822
1823                 } else if (dns_type == ns_t_a || dns_type == ns_t_aaaa) {
1824                         dlen = uptr[-2] << 8 | uptr[-1];
1825
1826                         if (ptr + dlen > end) {
1827                                 DBG("data len %d too long", dlen);
1828                                 goto out;
1829                         }
1830
1831                         memcpy(uptr, ptr, dlen);
1832                         uptr += dlen;
1833                         ptr += dlen;
1834
1835                 } else if (dns_type == ns_t_soa) {
1836                         int comp_pos;
1837                         int total_len = 0;
1838                         char *len_ptr;
1839
1840                         /* Primary name server expansion */
1841                         if (!convert_label(start, end, ptr, uptr,
1842                                         uncomp_len - (uptr - uncompressed),
1843                                                 &pos, &comp_pos))
1844                                 goto out;
1845
1846                         total_len += comp_pos;
1847                         len_ptr = &uptr[-2];
1848                         ptr += pos;
1849                         uptr += comp_pos;
1850
1851                         /* Responsible authority's mailbox */
1852                         if (!convert_label(start, end, ptr, uptr,
1853                                         uncomp_len - (uptr - uncompressed),
1854                                                 &pos, &comp_pos))
1855                                 goto out;
1856
1857                         total_len += comp_pos;
1858                         ptr += pos;
1859                         uptr += comp_pos;
1860
1861                         /*
1862                          * Copy rest of the soa fields (serial number,
1863                          * refresh interval, retry interval, expiration
1864                          * limit and minimum ttl). They are 20 bytes long.
1865                          */
1866                         memcpy(uptr, ptr, 20);
1867                         uptr += 20;
1868                         ptr += 20;
1869                         total_len += 20;
1870
1871                         /*
1872                          * Finally fix the length of the data part
1873                          */
1874                         len_ptr[0] = total_len << 8;
1875                         len_ptr[1] = total_len & 0xff;
1876                 }
1877
1878                 *uncompressed_ptr = uptr;
1879         }
1880
1881         return ptr;
1882
1883 out:
1884         return NULL;
1885 }
1886
1887 static int forward_dns_reply(unsigned char *reply, int reply_len, int protocol,
1888                                 struct server_data *data)
1889 {
1890         struct domain_hdr *hdr;
1891         struct request_data *req;
1892         int dns_id, sk, err, offset = protocol_offset(protocol);
1893
1894         if (offset < 0)
1895                 return offset;
1896
1897         hdr = (void *)(reply + offset);
1898         dns_id = reply[offset] | reply[offset + 1] << 8;
1899
1900         DBG("Received %d bytes (id 0x%04x)", reply_len, dns_id);
1901
1902         req = find_request(dns_id);
1903         if (!req)
1904                 return -EINVAL;
1905
1906         DBG("req %p dstid 0x%04x altid 0x%04x rcode %d",
1907                         req, req->dstid, req->altid, hdr->rcode);
1908
1909         reply[offset] = req->srcid & 0xff;
1910         reply[offset + 1] = req->srcid >> 8;
1911
1912         req->numresp++;
1913
1914         if (hdr->rcode == 0 || !req->resp) {
1915                 unsigned char *new_reply = NULL;
1916
1917                 /*
1918                  * If the domain name was append
1919                  * remove it before forwarding the reply.
1920                  * If there were more than one question, then this
1921                  * domain name ripping can be hairy so avoid that
1922                  * and bail out in that that case.
1923                  *
1924                  * The reason we are doing this magic is that if the
1925                  * user's DNS client tries to resolv hostname without
1926                  * domain part, it also expects to get the result without
1927                  * a domain name part.
1928                  */
1929                 if (req->append_domain && ntohs(hdr->qdcount) == 1) {
1930                         uint16_t domain_len = 0;
1931                         uint16_t header_len;
1932                         uint16_t dns_type, dns_class;
1933                         uint8_t host_len, dns_type_pos;
1934                         char uncompressed[NS_MAXDNAME], *uptr;
1935                         char *ptr, *eom = (char *)reply + reply_len;
1936
1937                         /*
1938                          * ptr points to the first char of the hostname.
1939                          * ->hostname.domain.net
1940                          */
1941                         header_len = offset + sizeof(struct domain_hdr);
1942                         ptr = (char *)reply + header_len;
1943
1944                         host_len = *ptr;
1945                         if (host_len > 0)
1946                                 domain_len = strnlen(ptr + 1 + host_len,
1947                                                 reply_len - header_len);
1948
1949                         /*
1950                          * If the query type is anything other than A or AAAA,
1951                          * then bail out and pass the message as is.
1952                          * We only want to deal with IPv4 or IPv6 addresses.
1953                          */
1954                         dns_type_pos = host_len + 1 + domain_len + 1;
1955
1956                         dns_type = ptr[dns_type_pos] << 8 |
1957                                                         ptr[dns_type_pos + 1];
1958                         dns_class = ptr[dns_type_pos + 2] << 8 |
1959                                                         ptr[dns_type_pos + 3];
1960                         if (dns_type != ns_t_a && dns_type != ns_t_aaaa &&
1961                                         dns_class != ns_c_in) {
1962                                 DBG("Pass msg dns type %d class %d",
1963                                         dns_type, dns_class);
1964                                 goto pass;
1965                         }
1966
1967                         /*
1968                          * Remove the domain name and replace it by the end
1969                          * of reply. Check if the domain is really there
1970                          * before trying to copy the data. We also need to
1971                          * uncompress the answers if necessary.
1972                          * The domain_len can be 0 because if the original
1973                          * query did not contain a domain name, then we are
1974                          * sending two packets, first without the domain name
1975                          * and the second packet with domain name.
1976                          * The append_domain is set to true even if we sent
1977                          * the first packet without domain name. In this
1978                          * case we end up in this branch.
1979                          */
1980                         if (domain_len > 0) {
1981                                 int len = host_len + 1;
1982
1983                                 /*
1984                                  * First copy host (without domain name) into
1985                                  * tmp buffer.
1986                                  */
1987                                 uptr = &uncompressed[0];
1988                                 memcpy(uptr, ptr, len);
1989
1990                                 uptr[len] = '\0'; /* host termination */
1991                                 uptr += len + 1;
1992
1993                                 /*
1994                                  * Copy type and class fields of the question.
1995                                  */
1996                                 ptr += len + domain_len + 1;
1997                                 memcpy(uptr, ptr, NS_QFIXEDSZ);
1998
1999                                 /*
2000                                  * ptr points to answers after this
2001                                  */
2002                                 ptr += NS_QFIXEDSZ;
2003                                 uptr += NS_QFIXEDSZ;
2004
2005                                 /*
2006                                  * We then uncompress the result to buffer
2007                                  * so that we can rip off the domain name
2008                                  * part from the question. First answers,
2009                                  * then name server (authority) information,
2010                                  * and finally additional record info.
2011                                  */
2012
2013                                 ptr = uncompress(ntohs(hdr->ancount),
2014                                                 (char *)reply + offset, eom,
2015                                                 ptr, uncompressed, NS_MAXDNAME,
2016                                                 &uptr);
2017                                 if (ptr == NULL)
2018                                         goto out;
2019
2020                                 ptr = uncompress(ntohs(hdr->nscount),
2021                                                 (char *)reply + offset, eom,
2022                                                 ptr, uncompressed, NS_MAXDNAME,
2023                                                 &uptr);
2024                                 if (ptr == NULL)
2025                                         goto out;
2026
2027                                 ptr = uncompress(ntohs(hdr->arcount),
2028                                                 (char *)reply + offset, eom,
2029                                                 ptr, uncompressed, NS_MAXDNAME,
2030                                                 &uptr);
2031                                 if (ptr == NULL)
2032                                         goto out;
2033
2034                                 /*
2035                                  * Because we have now uncompressed the answers
2036                                  * we must create a bigger buffer to hold all
2037                                  * that data.
2038                                  */
2039
2040                                 new_reply = g_try_malloc(header_len +
2041                                                         uptr - uncompressed);
2042                                 if (!new_reply)
2043                                         return -ENOMEM;
2044
2045                                 memcpy(new_reply, reply, header_len);
2046                                 memcpy(new_reply + header_len, uncompressed,
2047                                         uptr - uncompressed);
2048
2049                                 reply = new_reply;
2050                                 reply_len = header_len + uptr - uncompressed;
2051                         }
2052                 }
2053
2054         pass:
2055                 g_free(req->resp);
2056                 req->resplen = 0;
2057
2058                 req->resp = g_try_malloc(reply_len);
2059                 if (!req->resp)
2060                         return -ENOMEM;
2061
2062                 memcpy(req->resp, reply, reply_len);
2063                 req->resplen = reply_len;
2064
2065                 cache_update(data, reply, reply_len);
2066
2067                 g_free(new_reply);
2068         }
2069
2070 out:
2071         if (hdr->rcode > 0 && req->numresp < req->numserv)
2072                 return -EINVAL;
2073
2074         request_list = g_slist_remove(request_list, req);
2075
2076         if (protocol == IPPROTO_UDP) {
2077                 sk = get_req_udp_socket(req);
2078                 if (sk < 0) {
2079                         errno = -EIO;
2080                         err = -EIO;
2081                 } else
2082                         err = sendto(sk, req->resp, req->resplen, 0,
2083                                 &req->sa, req->sa_len);
2084         } else {
2085                 sk = req->client_sk;
2086                 err = send(sk, req->resp, req->resplen, MSG_NOSIGNAL);
2087         }
2088
2089         if (err < 0)
2090                 DBG("Cannot send msg, sk %d proto %d errno %d/%s", sk,
2091                         protocol, errno, strerror(errno));
2092         else
2093                 DBG("proto %d sent %d bytes to %d", protocol, err, sk);
2094
2095         destroy_request_data(req);
2096
2097         return err;
2098 }
2099
2100 static void server_destroy_socket(struct server_data *data)
2101 {
2102         DBG("index %d server %s proto %d", data->index,
2103                                         data->server, data->protocol);
2104
2105         if (data->watch > 0) {
2106                 g_source_remove(data->watch);
2107                 data->watch = 0;
2108         }
2109
2110         if (data->timeout > 0) {
2111                 g_source_remove(data->timeout);
2112                 data->timeout = 0;
2113         }
2114
2115         if (data->channel) {
2116                 g_io_channel_shutdown(data->channel, TRUE, NULL);
2117                 g_io_channel_unref(data->channel);
2118                 data->channel = NULL;
2119         }
2120
2121         g_free(data->incoming_reply);
2122         data->incoming_reply = NULL;
2123 }
2124
2125 static void destroy_server(struct server_data *server)
2126 {
2127         DBG("index %d server %s sock %d", server->index, server->server,
2128                         server->channel ?
2129                         g_io_channel_unix_get_fd(server->channel): -1);
2130
2131         server_list = g_slist_remove(server_list, server);
2132         server_destroy_socket(server);
2133
2134         if (server->protocol == IPPROTO_UDP && server->enabled)
2135                 DBG("Removing DNS server %s", server->server);
2136
2137         g_free(server->server);
2138         g_list_free_full(server->domains, g_free);
2139         g_free(server->server_addr);
2140
2141         /*
2142          * We do not remove cache right away but delay it few seconds.
2143          * The idea is that when IPv6 DNS server is added via RDNSS, it has a
2144          * lifetime. When the lifetime expires we decrease the refcount so it
2145          * is possible that the cache is then removed. Because a new DNS server
2146          * is usually created almost immediately we would then loose the cache
2147          * without any good reason. The small delay allows the new RDNSS to
2148          * create a new DNS server instance and the refcount does not go to 0.
2149          */
2150         g_timeout_add_seconds(3, try_remove_cache, NULL);
2151
2152         g_free(server);
2153 }
2154
2155 static gboolean udp_server_event(GIOChannel *channel, GIOCondition condition,
2156                                                         gpointer user_data)
2157 {
2158         unsigned char buf[4096];
2159         int sk, err, len;
2160         struct server_data *data = user_data;
2161
2162         if (condition & (G_IO_NVAL | G_IO_ERR | G_IO_HUP)) {
2163                 connman_error("Error with UDP server %s", data->server);
2164                 server_destroy_socket(data);
2165                 return FALSE;
2166         }
2167
2168         sk = g_io_channel_unix_get_fd(channel);
2169
2170         len = recv(sk, buf, sizeof(buf), 0);
2171         if (len < 12)
2172                 return TRUE;
2173
2174         err = forward_dns_reply(buf, len, IPPROTO_UDP, data);
2175         if (err < 0)
2176                 return TRUE;
2177
2178         return TRUE;
2179 }
2180
2181 static gboolean tcp_server_event(GIOChannel *channel, GIOCondition condition,
2182                                                         gpointer user_data)
2183 {
2184         int sk;
2185         struct server_data *server = user_data;
2186
2187         sk = g_io_channel_unix_get_fd(channel);
2188         if (sk == 0)
2189                 return FALSE;
2190
2191         if (condition & (G_IO_NVAL | G_IO_ERR | G_IO_HUP)) {
2192                 GSList *list;
2193 hangup:
2194                 DBG("TCP server channel closed, sk %d", sk);
2195
2196                 /*
2197                  * Discard any partial response which is buffered; better
2198                  * to get a proper response from a working server.
2199                  */
2200                 g_free(server->incoming_reply);
2201                 server->incoming_reply = NULL;
2202
2203                 for (list = request_list; list; list = list->next) {
2204                         struct request_data *req = list->data;
2205                         struct domain_hdr *hdr;
2206
2207                         if (req->protocol == IPPROTO_UDP)
2208                                 continue;
2209
2210                         if (!req->request)
2211                                 continue;
2212
2213                         /*
2214                          * If we're not waiting for any further response
2215                          * from another name server, then we send an error
2216                          * response to the client.
2217                          */
2218                         if (req->numserv && --(req->numserv))
2219                                 continue;
2220
2221                         hdr = (void *) (req->request + 2);
2222                         hdr->id = req->srcid;
2223                         send_response(req->client_sk, req->request,
2224                                 req->request_len, NULL, 0, IPPROTO_TCP);
2225
2226                         request_list = g_slist_remove(request_list, req);
2227                 }
2228
2229                 destroy_server(server);
2230
2231                 return FALSE;
2232         }
2233
2234         if ((condition & G_IO_OUT) && !server->connected) {
2235                 GSList *list;
2236                 GList *domains;
2237                 bool no_request_sent = true;
2238                 struct server_data *udp_server;
2239
2240                 udp_server = find_server(server->index, server->server,
2241                                                                 IPPROTO_UDP);
2242                 if (udp_server) {
2243                         for (domains = udp_server->domains; domains;
2244                                                 domains = domains->next) {
2245                                 char *dom = domains->data;
2246
2247                                 DBG("Adding domain %s to %s",
2248                                                 dom, server->server);
2249
2250                                 server->domains = g_list_append(server->domains,
2251                                                                 g_strdup(dom));
2252                         }
2253                 }
2254
2255                 server->connected = true;
2256                 server_list = g_slist_append(server_list, server);
2257
2258                 if (server->timeout > 0) {
2259                         g_source_remove(server->timeout);
2260                         server->timeout = 0;
2261                 }
2262
2263                 for (list = request_list; list; ) {
2264                         struct request_data *req = list->data;
2265                         int status;
2266
2267                         if (req->protocol == IPPROTO_UDP) {
2268                                 list = list->next;
2269                                 continue;
2270                         }
2271
2272                         DBG("Sending req %s over TCP", (char *)req->name);
2273
2274                         status = ns_resolv(server, req,
2275                                                 req->request, req->name);
2276                         if (status > 0) {
2277                                 /*
2278                                  * A cached result was sent,
2279                                  * so the request can be released
2280                                  */
2281                                 list = list->next;
2282                                 request_list = g_slist_remove(request_list, req);
2283                                 destroy_request_data(req);
2284                                 continue;
2285                         }
2286
2287                         if (status < 0) {
2288                                 list = list->next;
2289                                 continue;
2290                         }
2291
2292                         no_request_sent = false;
2293
2294                         if (req->timeout > 0)
2295                                 g_source_remove(req->timeout);
2296
2297                         req->timeout = g_timeout_add_seconds(30,
2298                                                 request_timeout, req);
2299                         list = list->next;
2300                 }
2301
2302                 if (no_request_sent) {
2303                         destroy_server(server);
2304                         return FALSE;
2305                 }
2306
2307         } else if (condition & G_IO_IN) {
2308                 struct partial_reply *reply = server->incoming_reply;
2309                 int bytes_recv;
2310
2311                 if (!reply) {
2312                         unsigned char reply_len_buf[2];
2313                         uint16_t reply_len;
2314
2315                         bytes_recv = recv(sk, reply_len_buf, 2, MSG_PEEK);
2316                         if (!bytes_recv) {
2317                                 goto hangup;
2318                         } else if (bytes_recv < 0) {
2319                                 if (errno == EAGAIN || errno == EWOULDBLOCK)
2320                                         return TRUE;
2321
2322                                 connman_error("DNS proxy error %s",
2323                                                 strerror(errno));
2324                                 goto hangup;
2325                         } else if (bytes_recv < 2)
2326                                 return TRUE;
2327
2328                         reply_len = reply_len_buf[1] | reply_len_buf[0] << 8;
2329                         reply_len += 2;
2330
2331                         DBG("TCP reply %d bytes from %d", reply_len, sk);
2332
2333                         reply = g_try_malloc(sizeof(*reply) + reply_len + 2);
2334                         if (!reply)
2335                                 return TRUE;
2336
2337                         reply->len = reply_len;
2338                         reply->received = 0;
2339
2340                         server->incoming_reply = reply;
2341                 }
2342
2343                 while (reply->received < reply->len) {
2344                         bytes_recv = recv(sk, reply->buf + reply->received,
2345                                         reply->len - reply->received, 0);
2346                         if (!bytes_recv) {
2347                                 connman_error("DNS proxy TCP disconnect");
2348                                 break;
2349                         } else if (bytes_recv < 0) {
2350                                 if (errno == EAGAIN || errno == EWOULDBLOCK)
2351                                         return TRUE;
2352
2353                                 connman_error("DNS proxy error %s",
2354                                                 strerror(errno));
2355                                 break;
2356                         }
2357                         reply->received += bytes_recv;
2358                 }
2359
2360                 forward_dns_reply(reply->buf, reply->received, IPPROTO_TCP,
2361                                         server);
2362
2363                 g_free(reply);
2364                 server->incoming_reply = NULL;
2365
2366                 destroy_server(server);
2367
2368                 return FALSE;
2369         }
2370
2371         return TRUE;
2372 }
2373
2374 static gboolean tcp_idle_timeout(gpointer user_data)
2375 {
2376         struct server_data *server = user_data;
2377
2378         DBG("");
2379
2380         if (!server)
2381                 return FALSE;
2382
2383         destroy_server(server);
2384
2385         return FALSE;
2386 }
2387
2388 static int server_create_socket(struct server_data *data)
2389 {
2390         int sk, err;
2391         char *interface;
2392
2393         DBG("index %d server %s proto %d", data->index,
2394                                         data->server, data->protocol);
2395
2396         sk = socket(data->server_addr->sa_family,
2397                 data->protocol == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM,
2398                 data->protocol);
2399         if (sk < 0) {
2400                 err = errno;
2401                 connman_error("Failed to create server %s socket",
2402                                                         data->server);
2403                 server_destroy_socket(data);
2404                 return -err;
2405         }
2406
2407         DBG("sk %d", sk);
2408
2409         interface = connman_inet_ifname(data->index);
2410         if (interface) {
2411                 if (setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE,
2412                                         interface,
2413                                         strlen(interface) + 1) < 0) {
2414                         err = errno;
2415                         connman_error("Failed to bind server %s "
2416                                                 "to interface %s",
2417                                                 data->server, interface);
2418                         close(sk);
2419                         server_destroy_socket(data);
2420                         g_free(interface);
2421                         return -err;
2422                 }
2423                 g_free(interface);
2424         }
2425
2426         data->channel = g_io_channel_unix_new(sk);
2427         if (!data->channel) {
2428                 connman_error("Failed to create server %s channel",
2429                                                         data->server);
2430                 close(sk);
2431                 server_destroy_socket(data);
2432                 return -ENOMEM;
2433         }
2434
2435         g_io_channel_set_close_on_unref(data->channel, TRUE);
2436
2437         if (data->protocol == IPPROTO_TCP) {
2438                 g_io_channel_set_flags(data->channel, G_IO_FLAG_NONBLOCK, NULL);
2439                 data->watch = g_io_add_watch(data->channel,
2440                         G_IO_OUT | G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR,
2441                                                 tcp_server_event, data);
2442                 data->timeout = g_timeout_add_seconds(30, tcp_idle_timeout,
2443                                                                 data);
2444         } else
2445                 data->watch = g_io_add_watch(data->channel,
2446                         G_IO_IN | G_IO_NVAL | G_IO_ERR | G_IO_HUP,
2447                                                 udp_server_event, data);
2448
2449         if (connect(sk, data->server_addr, data->server_addr_len) < 0) {
2450                 err = errno;
2451
2452                 if ((data->protocol == IPPROTO_TCP && errno != EINPROGRESS) ||
2453                                 data->protocol == IPPROTO_UDP) {
2454
2455                         connman_error("Failed to connect to server %s",
2456                                                                 data->server);
2457                         server_destroy_socket(data);
2458                         return -err;
2459                 }
2460         }
2461
2462         create_cache();
2463
2464         return 0;
2465 }
2466
2467 static struct server_data *create_server(int index,
2468                                         const char *domain, const char *server,
2469                                         int protocol)
2470 {
2471         struct server_data *data;
2472         struct addrinfo hints, *rp;
2473         int ret;
2474
2475         DBG("index %d server %s", index, server);
2476
2477         data = g_try_new0(struct server_data, 1);
2478         if (!data) {
2479                 connman_error("Failed to allocate server %s data", server);
2480                 return NULL;
2481         }
2482
2483         data->index = index;
2484         if (domain)
2485                 data->domains = g_list_append(data->domains, g_strdup(domain));
2486         data->server = g_strdup(server);
2487         data->protocol = protocol;
2488
2489         memset(&hints, 0, sizeof(hints));
2490
2491         switch (protocol) {
2492         case IPPROTO_UDP:
2493                 hints.ai_socktype = SOCK_DGRAM;
2494                 break;
2495
2496         case IPPROTO_TCP:
2497                 hints.ai_socktype = SOCK_STREAM;
2498                 break;
2499
2500         default:
2501                 destroy_server(data);
2502                 return NULL;
2503         }
2504         hints.ai_family = AF_UNSPEC;
2505         hints.ai_flags = AI_NUMERICSERV | AI_NUMERICHOST;
2506
2507         ret = getaddrinfo(data->server, "53", &hints, &rp);
2508         if (ret) {
2509                 connman_error("Failed to parse server %s address: %s\n",
2510                               data->server, gai_strerror(ret));
2511                 destroy_server(data);
2512                 return NULL;
2513         }
2514
2515         /* Do not blindly copy this code elsewhere; it doesn't loop over the
2516            results using ->ai_next as it should. That's OK in *this* case
2517            because it was a numeric lookup; we *know* there's only one. */
2518
2519         data->server_addr_len = rp->ai_addrlen;
2520
2521         switch (rp->ai_family) {
2522         case AF_INET:
2523                 data->server_addr = (struct sockaddr *)
2524                                         g_try_new0(struct sockaddr_in, 1);
2525                 break;
2526         case AF_INET6:
2527                 data->server_addr = (struct sockaddr *)
2528                                         g_try_new0(struct sockaddr_in6, 1);
2529                 break;
2530         default:
2531                 connman_error("Wrong address family %d", rp->ai_family);
2532                 break;
2533         }
2534         if (!data->server_addr) {
2535                 freeaddrinfo(rp);
2536                 destroy_server(data);
2537                 return NULL;
2538         }
2539         memcpy(data->server_addr, rp->ai_addr, rp->ai_addrlen);
2540         freeaddrinfo(rp);
2541
2542         if (server_create_socket(data) != 0) {
2543                 destroy_server(data);
2544                 return NULL;
2545         }
2546
2547         if (protocol == IPPROTO_UDP) {
2548                 if (__connman_service_index_is_default(data->index) ||
2549                                 __connman_service_index_is_split_routing(
2550                                                                 data->index)) {
2551                         data->enabled = true;
2552                         DBG("Adding DNS server %s", data->server);
2553                 }
2554
2555                 server_list = g_slist_append(server_list, data);
2556         }
2557
2558         return data;
2559 }
2560
2561 static bool resolv(struct request_data *req,
2562                                 gpointer request, gpointer name)
2563 {
2564         GSList *list;
2565
2566         for (list = server_list; list; list = list->next) {
2567                 struct server_data *data = list->data;
2568
2569                 if (data->protocol == IPPROTO_TCP) {
2570                         DBG("server %s ignored proto TCP", data->server);
2571                         continue;
2572                 }
2573
2574                 DBG("server %s enabled %d", data->server, data->enabled);
2575
2576                 if (!data->enabled)
2577                         continue;
2578
2579                 if (!data->channel && data->protocol == IPPROTO_UDP) {
2580                         if (server_create_socket(data) < 0) {
2581                                 DBG("socket creation failed while resolving");
2582                                 continue;
2583                         }
2584                 }
2585
2586                 if (ns_resolv(data, req, request, name) > 0)
2587                         return true;
2588         }
2589
2590         return false;
2591 }
2592
2593 static void append_domain(int index, const char *domain)
2594 {
2595         GSList *list;
2596
2597         DBG("index %d domain %s", index, domain);
2598
2599         if (!domain)
2600                 return;
2601
2602         for (list = server_list; list; list = list->next) {
2603                 struct server_data *data = list->data;
2604                 GList *dom_list;
2605                 char *dom;
2606                 bool dom_found = false;
2607
2608                 if (data->index < 0)
2609                         continue;
2610
2611                 if (data->index != index)
2612                         continue;
2613
2614                 for (dom_list = data->domains; dom_list;
2615                                 dom_list = dom_list->next) {
2616                         dom = dom_list->data;
2617
2618                         if (g_str_equal(dom, domain)) {
2619                                 dom_found = true;
2620                                 break;
2621                         }
2622                 }
2623
2624                 if (!dom_found) {
2625                         data->domains =
2626                                 g_list_append(data->domains, g_strdup(domain));
2627                 }
2628         }
2629 }
2630
2631 int __connman_dnsproxy_append(int index, const char *domain,
2632                                                         const char *server)
2633 {
2634         struct server_data *data;
2635
2636         DBG("index %d server %s", index, server);
2637
2638         if (!server && !domain)
2639                 return -EINVAL;
2640
2641         if (!server) {
2642                 append_domain(index, domain);
2643
2644                 return 0;
2645         }
2646
2647         if (g_str_equal(server, "127.0.0.1"))
2648                 return -ENODEV;
2649
2650         if (g_str_equal(server, "::1"))
2651                 return -ENODEV;
2652
2653         data = find_server(index, server, IPPROTO_UDP);
2654         if (data) {
2655                 append_domain(index, domain);
2656                 return 0;
2657         }
2658
2659         data = create_server(index, domain, server, IPPROTO_UDP);
2660         if (!data)
2661                 return -EIO;
2662
2663         return 0;
2664 }
2665
2666 static void remove_server(int index, const char *domain,
2667                         const char *server, int protocol)
2668 {
2669         struct server_data *data;
2670
2671         data = find_server(index, server, protocol);
2672         if (!data)
2673                 return;
2674
2675         destroy_server(data);
2676 }
2677
2678 int __connman_dnsproxy_remove(int index, const char *domain,
2679                                                         const char *server)
2680 {
2681         DBG("index %d server %s", index, server);
2682
2683         if (!server)
2684                 return -EINVAL;
2685
2686         if (g_str_equal(server, "127.0.0.1"))
2687                 return -ENODEV;
2688
2689         if (g_str_equal(server, "::1"))
2690                 return -ENODEV;
2691
2692         remove_server(index, domain, server, IPPROTO_UDP);
2693         remove_server(index, domain, server, IPPROTO_TCP);
2694
2695         return 0;
2696 }
2697
2698 void __connman_dnsproxy_flush(void)
2699 {
2700         GSList *list;
2701
2702         list = request_list;
2703         while (list) {
2704                 struct request_data *req = list->data;
2705
2706                 list = list->next;
2707
2708                 if (resolv(req, req->request, req->name)) {
2709                         /*
2710                          * A cached result was sent,
2711                          * so the request can be released
2712                          */
2713                         request_list =
2714                                 g_slist_remove(request_list, req);
2715                         destroy_request_data(req);
2716                         continue;
2717                 }
2718
2719                 if (req->timeout > 0)
2720                         g_source_remove(req->timeout);
2721                 req->timeout = g_timeout_add_seconds(5, request_timeout, req);
2722         }
2723 }
2724
2725 static void dnsproxy_offline_mode(bool enabled)
2726 {
2727         GSList *list;
2728
2729         DBG("enabled %d", enabled);
2730
2731         for (list = server_list; list; list = list->next) {
2732                 struct server_data *data = list->data;
2733
2734                 if (!enabled) {
2735                         DBG("Enabling DNS server %s", data->server);
2736                         data->enabled = true;
2737                         cache_invalidate();
2738                         cache_refresh();
2739                 } else {
2740                         DBG("Disabling DNS server %s", data->server);
2741                         data->enabled = false;
2742                         cache_invalidate();
2743                 }
2744         }
2745 }
2746
2747 static void dnsproxy_default_changed(struct connman_service *service)
2748 {
2749         GSList *list;
2750         int index;
2751
2752         DBG("service %p", service);
2753
2754         /* DNS has changed, invalidate the cache */
2755         cache_invalidate();
2756
2757         if (!service) {
2758                 /* When no services are active, then disable DNS proxying */
2759                 dnsproxy_offline_mode(true);
2760                 return;
2761         }
2762
2763         index = __connman_service_get_index(service);
2764         if (index < 0)
2765                 return;
2766
2767         for (list = server_list; list; list = list->next) {
2768                 struct server_data *data = list->data;
2769
2770                 if (data->index == index) {
2771                         DBG("Enabling DNS server %s", data->server);
2772                         data->enabled = true;
2773                 } else {
2774                         DBG("Disabling DNS server %s", data->server);
2775                         data->enabled = false;
2776                 }
2777         }
2778
2779         cache_refresh();
2780 }
2781
2782 static struct connman_notifier dnsproxy_notifier = {
2783         .name                   = "dnsproxy",
2784         .default_changed        = dnsproxy_default_changed,
2785         .offline_mode           = dnsproxy_offline_mode,
2786 };
2787
2788 static unsigned char opt_edns0_type[2] = { 0x00, 0x29 };
2789
2790 static int parse_request(unsigned char *buf, int len,
2791                                         char *name, unsigned int size)
2792 {
2793         struct domain_hdr *hdr = (void *) buf;
2794         uint16_t qdcount = ntohs(hdr->qdcount);
2795         uint16_t arcount = ntohs(hdr->arcount);
2796         unsigned char *ptr;
2797         char *last_label = NULL;
2798         unsigned int remain, used = 0;
2799
2800         if (len < 12)
2801                 return -EINVAL;
2802
2803         DBG("id 0x%04x qr %d opcode %d qdcount %d arcount %d",
2804                                         hdr->id, hdr->qr, hdr->opcode,
2805                                                         qdcount, arcount);
2806
2807         if (hdr->qr != 0 || qdcount != 1)
2808                 return -EINVAL;
2809
2810         name[0] = '\0';
2811
2812         ptr = buf + sizeof(struct domain_hdr);
2813         remain = len - sizeof(struct domain_hdr);
2814
2815         while (remain > 0) {
2816                 uint8_t label_len = *ptr;
2817
2818                 if (label_len == 0x00) {
2819                         last_label = (char *) (ptr + 1);
2820                         break;
2821                 }
2822
2823                 if (used + label_len + 1 > size)
2824                         return -ENOBUFS;
2825
2826                 strncat(name, (char *) (ptr + 1), label_len);
2827                 strcat(name, ".");
2828
2829                 used += label_len + 1;
2830
2831                 ptr += label_len + 1;
2832                 remain -= label_len + 1;
2833         }
2834
2835         if (last_label && arcount && remain >= 9 && last_label[4] == 0 &&
2836                                 !memcmp(last_label + 5, opt_edns0_type, 2)) {
2837                 uint16_t edns0_bufsize;
2838
2839                 edns0_bufsize = last_label[7] << 8 | last_label[8];
2840
2841                 DBG("EDNS0 buffer size %u", edns0_bufsize);
2842
2843                 /* This is an evil hack until full TCP support has been
2844                  * implemented.
2845                  *
2846                  * Somtimes the EDNS0 request gets send with a too-small
2847                  * buffer size. Since glibc doesn't seem to crash when it
2848                  * gets a response biffer then it requested, just bump
2849                  * the buffer size up to 4KiB.
2850                  */
2851                 if (edns0_bufsize < 0x1000) {
2852                         last_label[7] = 0x10;
2853                         last_label[8] = 0x00;
2854                 }
2855         }
2856
2857         DBG("query %s", name);
2858
2859         return 0;
2860 }
2861
2862 static void client_reset(struct tcp_partial_client_data *client)
2863 {
2864         if (!client)
2865                 return;
2866
2867         if (client->channel) {
2868                 DBG("client %d closing",
2869                         g_io_channel_unix_get_fd(client->channel));
2870
2871                 g_io_channel_unref(client->channel);
2872                 client->channel = NULL;
2873         }
2874
2875         if (client->watch > 0) {
2876                 g_source_remove(client->watch);
2877                 client->watch = 0;
2878         }
2879
2880         if (client->timeout > 0) {
2881                 g_source_remove(client->timeout);
2882                 client->timeout = 0;
2883         }
2884
2885         g_free(client->buf);
2886         client->buf = NULL;
2887
2888         client->buf_end = 0;
2889 }
2890
2891 static unsigned int get_msg_len(unsigned char *buf)
2892 {
2893         return buf[0]<<8 | buf[1];
2894 }
2895
2896 static bool read_tcp_data(struct tcp_partial_client_data *client,
2897                                 void *client_addr, socklen_t client_addr_len,
2898                                 int read_len)
2899 {
2900         char query[TCP_MAX_BUF_LEN];
2901         struct request_data *req;
2902         int client_sk, err;
2903         unsigned int msg_len;
2904         GSList *list;
2905         bool waiting_for_connect = false;
2906         int qtype = 0;
2907         struct cache_entry *entry;
2908
2909         client_sk = g_io_channel_unix_get_fd(client->channel);
2910
2911         if (read_len == 0) {
2912                 DBG("client %d closed, pending %d bytes",
2913                         client_sk, client->buf_end);
2914                 g_hash_table_remove(partial_tcp_req_table,
2915                                         GINT_TO_POINTER(client_sk));
2916                 return false;
2917         }
2918
2919         DBG("client %d received %d bytes", client_sk, read_len);
2920
2921         client->buf_end += read_len;
2922
2923         if (client->buf_end < 2)
2924                 return true;
2925
2926         msg_len = get_msg_len(client->buf);
2927         if (msg_len > TCP_MAX_BUF_LEN) {
2928                 DBG("client %d sent too much data %d", client_sk, msg_len);
2929                 g_hash_table_remove(partial_tcp_req_table,
2930                                         GINT_TO_POINTER(client_sk));
2931                 return false;
2932         }
2933
2934 read_another:
2935         DBG("client %d msg len %d end %d past end %d", client_sk, msg_len,
2936                 client->buf_end, client->buf_end - (msg_len + 2));
2937
2938         if (client->buf_end < (msg_len + 2)) {
2939                 DBG("client %d still missing %d bytes",
2940                         client_sk,
2941                         msg_len + 2 - client->buf_end);
2942                 return true;
2943         }
2944
2945         DBG("client %d all data %d received", client_sk, msg_len);
2946
2947         err = parse_request(client->buf + 2, msg_len,
2948                         query, sizeof(query));
2949         if (err < 0 || (g_slist_length(server_list) == 0)) {
2950                 send_response(client_sk, client->buf, msg_len + 2,
2951                         NULL, 0, IPPROTO_TCP);
2952                 return true;
2953         }
2954
2955         req = g_try_new0(struct request_data, 1);
2956         if (!req)
2957                 return true;
2958
2959         memcpy(&req->sa, client_addr, client_addr_len);
2960         req->sa_len = client_addr_len;
2961         req->client_sk = client_sk;
2962         req->protocol = IPPROTO_TCP;
2963         req->family = client->family;
2964
2965         req->srcid = client->buf[2] | (client->buf[3] << 8);
2966         req->dstid = get_id();
2967         req->altid = get_id();
2968         req->request_len = msg_len + 2;
2969
2970         client->buf[2] = req->dstid & 0xff;
2971         client->buf[3] = req->dstid >> 8;
2972
2973         req->numserv = 0;
2974         req->ifdata = client->ifdata;
2975         req->append_domain = false;
2976
2977         /*
2978          * Check if the answer is found in the cache before
2979          * creating sockets to the server.
2980          */
2981         entry = cache_check(client->buf, &qtype, IPPROTO_TCP);
2982         if (entry) {
2983                 int ttl_left = 0;
2984                 struct cache_data *data;
2985
2986                 DBG("cache hit %s type %s", query, qtype == 1 ? "A" : "AAAA");
2987                 if (qtype == 1)
2988                         data = entry->ipv4;
2989                 else
2990                         data = entry->ipv6;
2991
2992                 if (data) {
2993                         ttl_left = data->valid_until - time(NULL);
2994                         entry->hits++;
2995
2996                         send_cached_response(client_sk, data->data,
2997                                         data->data_len, NULL, 0, IPPROTO_TCP,
2998                                         req->srcid, data->answers, ttl_left);
2999
3000                         g_free(req);
3001                         goto out;
3002                 } else
3003                         DBG("data missing, ignoring cache for this query");
3004         }
3005
3006         for (list = server_list; list; list = list->next) {
3007                 struct server_data *data = list->data;
3008
3009                 if (data->protocol != IPPROTO_UDP || !data->enabled)
3010                         continue;
3011
3012                 if (!create_server(data->index, NULL, data->server,
3013                                         IPPROTO_TCP))
3014                         continue;
3015
3016                 waiting_for_connect = true;
3017         }
3018
3019         if (!waiting_for_connect) {
3020                 /* No server is waiting for connect */
3021                 send_response(client_sk, client->buf,
3022                         req->request_len, NULL, 0, IPPROTO_TCP);
3023                 g_free(req);
3024                 return true;
3025         }
3026
3027         /*
3028          * The server is not connected yet.
3029          * Copy the relevant buffers.
3030          * The request will actually be sent once we're
3031          * properly connected over TCP to the nameserver.
3032          */
3033         req->request = g_try_malloc0(req->request_len);
3034         if (!req->request) {
3035                 send_response(client_sk, client->buf,
3036                         req->request_len, NULL, 0, IPPROTO_TCP);
3037                 g_free(req);
3038                 goto out;
3039         }
3040         memcpy(req->request, client->buf, req->request_len);
3041
3042         req->name = g_try_malloc0(sizeof(query));
3043         if (!req->name) {
3044                 send_response(client_sk, client->buf,
3045                         req->request_len, NULL, 0, IPPROTO_TCP);
3046                 g_free(req->request);
3047                 g_free(req);
3048                 goto out;
3049         }
3050         memcpy(req->name, query, sizeof(query));
3051
3052         req->timeout = g_timeout_add_seconds(30, request_timeout, req);
3053
3054         request_list = g_slist_append(request_list, req);
3055
3056 out:
3057         if (client->buf_end > (msg_len + 2)) {
3058                 DBG("client %d buf %p -> %p end %d len %d new %d",
3059                         client_sk,
3060                         client->buf + msg_len + 2,
3061                         client->buf, client->buf_end,
3062                         TCP_MAX_BUF_LEN - client->buf_end,
3063                         client->buf_end - (msg_len + 2));
3064                 memmove(client->buf, client->buf + msg_len + 2,
3065                         TCP_MAX_BUF_LEN - client->buf_end);
3066                 client->buf_end = client->buf_end - (msg_len + 2);
3067
3068                 /*
3069                  * If we have a full message waiting, just read it
3070                  * immediately.
3071                  */
3072                 msg_len = get_msg_len(client->buf);
3073                 if ((msg_len + 2) == client->buf_end) {
3074                         DBG("client %d reading another %d bytes", client_sk,
3075                                                                 msg_len + 2);
3076                         goto read_another;
3077                 }
3078         } else {
3079                 DBG("client %d clearing reading buffer", client_sk);
3080
3081                 client->buf_end = 0;
3082                 memset(client->buf, 0, TCP_MAX_BUF_LEN);
3083
3084                 /*
3085                  * We received all the packets from client so we must also
3086                  * remove the timeout handler here otherwise we might get
3087                  * timeout while waiting the results from server.
3088                  */
3089                 g_source_remove(client->timeout);
3090                 client->timeout = 0;
3091         }
3092
3093         return true;
3094 }
3095
3096 static gboolean tcp_client_event(GIOChannel *channel, GIOCondition condition,
3097                                 gpointer user_data)
3098 {
3099         struct tcp_partial_client_data *client = user_data;
3100         struct sockaddr_in6 client_addr6;
3101         socklen_t client_addr6_len = sizeof(client_addr6);
3102         struct sockaddr_in client_addr4;
3103         socklen_t client_addr4_len = sizeof(client_addr4);
3104         void *client_addr;
3105         socklen_t *client_addr_len;
3106         int len, client_sk;
3107
3108         client_sk = g_io_channel_unix_get_fd(channel);
3109
3110         if (condition & (G_IO_NVAL | G_IO_ERR | G_IO_HUP)) {
3111                 g_hash_table_remove(partial_tcp_req_table,
3112                                         GINT_TO_POINTER(client_sk));
3113
3114                 connman_error("Error with TCP client %d channel", client_sk);
3115                 return FALSE;
3116         }
3117
3118         switch (client->family) {
3119         case AF_INET:
3120                 client_addr = &client_addr4;
3121                 client_addr_len = &client_addr4_len;
3122                 break;
3123         case AF_INET6:
3124                 client_addr = &client_addr6;
3125                 client_addr_len = &client_addr6_len;
3126                 break;
3127         default:
3128                 g_hash_table_remove(partial_tcp_req_table,
3129                                         GINT_TO_POINTER(client_sk));
3130                 connman_error("client %p corrupted", client);
3131                 return FALSE;
3132         }
3133
3134         len = recvfrom(client_sk, client->buf + client->buf_end,
3135                         TCP_MAX_BUF_LEN - client->buf_end, 0,
3136                         client_addr, client_addr_len);
3137         if (len < 0) {
3138                 if (errno == EAGAIN || errno == EWOULDBLOCK)
3139                         return TRUE;
3140
3141                 DBG("client %d cannot read errno %d/%s", client_sk, -errno,
3142                         strerror(errno));
3143                 g_hash_table_remove(partial_tcp_req_table,
3144                                         GINT_TO_POINTER(client_sk));
3145                 return FALSE;
3146         }
3147
3148         return read_tcp_data(client, client_addr, *client_addr_len, len);
3149 }
3150
3151 static gboolean client_timeout(gpointer user_data)
3152 {
3153         struct tcp_partial_client_data *client = user_data;
3154         int sock;
3155
3156         sock = g_io_channel_unix_get_fd(client->channel);
3157
3158         DBG("client %d timeout pending %d bytes", sock, client->buf_end);
3159
3160         g_hash_table_remove(partial_tcp_req_table, GINT_TO_POINTER(sock));
3161
3162         return FALSE;
3163 }
3164
3165 static bool tcp_listener_event(GIOChannel *channel, GIOCondition condition,
3166                                 struct listener_data *ifdata, int family,
3167                                 guint *listener_watch)
3168 {
3169         int sk, client_sk, len;
3170         unsigned int msg_len;
3171         struct tcp_partial_client_data *client;
3172         struct sockaddr_in6 client_addr6;
3173         socklen_t client_addr6_len = sizeof(client_addr6);
3174         struct sockaddr_in client_addr4;
3175         socklen_t client_addr4_len = sizeof(client_addr4);
3176         void *client_addr;
3177         socklen_t *client_addr_len;
3178         struct timeval tv;
3179         fd_set readfds;
3180
3181         DBG("condition 0x%02x channel %p ifdata %p family %d",
3182                 condition, channel, ifdata, family);
3183
3184         if (condition & (G_IO_NVAL | G_IO_ERR | G_IO_HUP)) {
3185                 if (*listener_watch > 0)
3186                         g_source_remove(*listener_watch);
3187                 *listener_watch = 0;
3188
3189                 connman_error("Error with TCP listener channel");
3190
3191                 return false;
3192         }
3193
3194         sk = g_io_channel_unix_get_fd(channel);
3195
3196         if (family == AF_INET) {
3197                 client_addr = &client_addr4;
3198                 client_addr_len = &client_addr4_len;
3199         } else {
3200                 client_addr = &client_addr6;
3201                 client_addr_len = &client_addr6_len;
3202         }
3203
3204         tv.tv_sec = tv.tv_usec = 0;
3205         FD_ZERO(&readfds);
3206         FD_SET(sk, &readfds);
3207
3208         select(sk + 1, &readfds, NULL, NULL, &tv);
3209         if (FD_ISSET(sk, &readfds)) {
3210                 client_sk = accept(sk, client_addr, client_addr_len);
3211                 DBG("client %d accepted", client_sk);
3212         } else {
3213                 DBG("No data to read from master %d, waiting.", sk);
3214                 return true;
3215         }
3216
3217         if (client_sk < 0) {
3218                 connman_error("Accept failure on TCP listener");
3219                 *listener_watch = 0;
3220                 return false;
3221         }
3222
3223         fcntl(client_sk, F_SETFL, O_NONBLOCK);
3224
3225         client = g_hash_table_lookup(partial_tcp_req_table,
3226                                         GINT_TO_POINTER(client_sk));
3227         if (!client) {
3228                 client = g_try_new0(struct tcp_partial_client_data, 1);
3229                 if (!client) {
3230                         close(client_sk);
3231                         return false;
3232                 }
3233
3234                 g_hash_table_insert(partial_tcp_req_table,
3235                                         GINT_TO_POINTER(client_sk),
3236                                         client);
3237
3238                 client->channel = g_io_channel_unix_new(client_sk);
3239                 g_io_channel_set_close_on_unref(client->channel, TRUE);
3240
3241                 client->watch = g_io_add_watch(client->channel,
3242                                                 G_IO_IN, tcp_client_event,
3243                                                 (gpointer)client);
3244
3245                 client->ifdata = ifdata;
3246
3247                 DBG("client %d created %p", client_sk, client);
3248         } else {
3249                 DBG("client %d already exists %p", client_sk, client);
3250         }
3251
3252         if (!client->buf) {
3253                 client->buf = g_try_malloc(TCP_MAX_BUF_LEN);
3254                 if (!client->buf)
3255                         return false;
3256         }
3257         memset(client->buf, 0, TCP_MAX_BUF_LEN);
3258         client->buf_end = 0;
3259         client->family = family;
3260
3261         if (client->timeout == 0)
3262                 client->timeout = g_timeout_add_seconds(2, client_timeout,
3263                                                         client);
3264
3265         /*
3266          * Check how much data there is. If all is there, then we can
3267          * proceed normally, otherwise read the bits until everything
3268          * is received or timeout occurs.
3269          */
3270         len = recv(client_sk, client->buf, TCP_MAX_BUF_LEN, 0);
3271         if (len < 0) {
3272                 if (errno == EAGAIN || errno == EWOULDBLOCK) {
3273                         DBG("client %d no data to read, waiting", client_sk);
3274                         return true;
3275                 }
3276
3277                 DBG("client %d cannot read errno %d/%s", client_sk, -errno,
3278                         strerror(errno));
3279                 g_hash_table_remove(partial_tcp_req_table,
3280                                         GINT_TO_POINTER(client_sk));
3281                 return true;
3282         }
3283
3284         if (len < 2) {
3285                 DBG("client %d not enough data to read, waiting", client_sk);
3286                 client->buf_end += len;
3287                 return true;
3288         }
3289
3290         msg_len = get_msg_len(client->buf);
3291         if (msg_len > TCP_MAX_BUF_LEN) {
3292                 DBG("client %d invalid message length %u ignoring packet",
3293                         client_sk, msg_len);
3294                 g_hash_table_remove(partial_tcp_req_table,
3295                                         GINT_TO_POINTER(client_sk));
3296                 return true;
3297         }
3298
3299         /*
3300          * The packet length bytes do not contain the total message length,
3301          * that is the reason to -2 below.
3302          */
3303         if (msg_len != (unsigned int)(len - 2)) {
3304                 DBG("client %d sent %d bytes but expecting %u pending %d",
3305                         client_sk, len, msg_len + 2, msg_len + 2 - len);
3306
3307                 client->buf_end += len;
3308                 return true;
3309         }
3310
3311         return read_tcp_data(client, client_addr, *client_addr_len, len);
3312 }
3313
3314 static gboolean tcp4_listener_event(GIOChannel *channel, GIOCondition condition,
3315                                 gpointer user_data)
3316 {
3317         struct listener_data *ifdata = user_data;
3318
3319         return tcp_listener_event(channel, condition, ifdata, AF_INET,
3320                                 &ifdata->tcp4_listener_watch);
3321 }
3322
3323 static gboolean tcp6_listener_event(GIOChannel *channel, GIOCondition condition,
3324                                 gpointer user_data)
3325 {
3326         struct listener_data *ifdata = user_data;
3327
3328         return tcp_listener_event(channel, condition, user_data, AF_INET6,
3329                                 &ifdata->tcp6_listener_watch);
3330 }
3331
3332 static bool udp_listener_event(GIOChannel *channel, GIOCondition condition,
3333                                 struct listener_data *ifdata, int family,
3334                                 guint *listener_watch)
3335 {
3336         unsigned char buf[768];
3337         char query[512];
3338         struct request_data *req;
3339         struct sockaddr_in6 client_addr6;
3340         socklen_t client_addr6_len = sizeof(client_addr6);
3341         struct sockaddr_in client_addr4;
3342         socklen_t client_addr4_len = sizeof(client_addr4);
3343         void *client_addr;
3344         socklen_t *client_addr_len;
3345         int sk, err, len;
3346
3347         if (condition & (G_IO_NVAL | G_IO_ERR | G_IO_HUP)) {
3348                 connman_error("Error with UDP listener channel");
3349                 *listener_watch = 0;
3350                 return false;
3351         }
3352
3353         sk = g_io_channel_unix_get_fd(channel);
3354
3355         if (family == AF_INET) {
3356                 client_addr = &client_addr4;
3357                 client_addr_len = &client_addr4_len;
3358         } else {
3359                 client_addr = &client_addr6;
3360                 client_addr_len = &client_addr6_len;
3361         }
3362
3363         memset(client_addr, 0, *client_addr_len);
3364         len = recvfrom(sk, buf, sizeof(buf), 0, client_addr, client_addr_len);
3365         if (len < 2)
3366                 return true;
3367
3368         DBG("Received %d bytes (id 0x%04x)", len, buf[0] | buf[1] << 8);
3369
3370         err = parse_request(buf, len, query, sizeof(query));
3371         if (err < 0 || (g_slist_length(server_list) == 0)) {
3372                 send_response(sk, buf, len, client_addr,
3373                                 *client_addr_len, IPPROTO_UDP);
3374                 return true;
3375         }
3376
3377         req = g_try_new0(struct request_data, 1);
3378         if (!req)
3379                 return true;
3380
3381         memcpy(&req->sa, client_addr, *client_addr_len);
3382         req->sa_len = *client_addr_len;
3383         req->client_sk = 0;
3384         req->protocol = IPPROTO_UDP;
3385         req->family = family;
3386
3387         req->srcid = buf[0] | (buf[1] << 8);
3388         req->dstid = get_id();
3389         req->altid = get_id();
3390         req->request_len = len;
3391
3392         buf[0] = req->dstid & 0xff;
3393         buf[1] = req->dstid >> 8;
3394
3395         req->numserv = 0;
3396         req->ifdata = ifdata;
3397         req->append_domain = false;
3398
3399         if (resolv(req, buf, query)) {
3400                 /* a cached result was sent, so the request can be released */
3401                 g_free(req);
3402                 return true;
3403         }
3404
3405         req->timeout = g_timeout_add_seconds(5, request_timeout, req);
3406         request_list = g_slist_append(request_list, req);
3407
3408         return true;
3409 }
3410
3411 static gboolean udp4_listener_event(GIOChannel *channel, GIOCondition condition,
3412                                 gpointer user_data)
3413 {
3414         struct listener_data *ifdata = user_data;
3415
3416         return udp_listener_event(channel, condition, ifdata, AF_INET,
3417                                 &ifdata->udp4_listener_watch);
3418 }
3419
3420 static gboolean udp6_listener_event(GIOChannel *channel, GIOCondition condition,
3421                                 gpointer user_data)
3422 {
3423         struct listener_data *ifdata = user_data;
3424
3425         return udp_listener_event(channel, condition, user_data, AF_INET6,
3426                                 &ifdata->udp6_listener_watch);
3427 }
3428
3429 static GIOChannel *get_listener(int family, int protocol, int index)
3430 {
3431         GIOChannel *channel;
3432         const char *proto;
3433         union {
3434                 struct sockaddr sa;
3435                 struct sockaddr_in6 sin6;
3436                 struct sockaddr_in sin;
3437         } s;
3438         socklen_t slen;
3439         int sk, type;
3440         char *interface;
3441
3442         DBG("family %d protocol %d index %d", family, protocol, index);
3443
3444         switch (protocol) {
3445         case IPPROTO_UDP:
3446                 proto = "UDP";
3447                 type = SOCK_DGRAM | SOCK_CLOEXEC;
3448                 break;
3449
3450         case IPPROTO_TCP:
3451                 proto = "TCP";
3452                 type = SOCK_STREAM | SOCK_CLOEXEC;
3453                 break;
3454
3455         default:
3456                 return NULL;
3457         }
3458
3459         sk = socket(family, type, protocol);
3460         if (sk < 0 && family == AF_INET6 && errno == EAFNOSUPPORT) {
3461                 connman_error("No IPv6 support");
3462                 return NULL;
3463         }
3464
3465         if (sk < 0) {
3466                 connman_error("Failed to create %s listener socket", proto);
3467                 return NULL;
3468         }
3469
3470         interface = connman_inet_ifname(index);
3471         if (!interface || setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE,
3472                                         interface,
3473                                         strlen(interface) + 1) < 0) {
3474                 connman_error("Failed to bind %s listener interface "
3475                         "for %s (%d/%s)",
3476                         proto, family == AF_INET ? "IPv4" : "IPv6",
3477                         -errno, strerror(errno));
3478                 close(sk);
3479                 g_free(interface);
3480                 return NULL;
3481         }
3482         g_free(interface);
3483
3484         if (family == AF_INET6) {
3485                 memset(&s.sin6, 0, sizeof(s.sin6));
3486                 s.sin6.sin6_family = AF_INET6;
3487                 s.sin6.sin6_port = htons(53);
3488                 slen = sizeof(s.sin6);
3489
3490                 if (__connman_inet_get_interface_address(index,
3491                                                 AF_INET6,
3492                                                 &s.sin6.sin6_addr) < 0) {
3493                         /* So we could not find suitable IPv6 address for
3494                          * the interface. This could happen if we have
3495                          * disabled IPv6 for the interface.
3496                          */
3497                         close(sk);
3498                         return NULL;
3499                 }
3500
3501         } else if (family == AF_INET) {
3502                 memset(&s.sin, 0, sizeof(s.sin));
3503                 s.sin.sin_family = AF_INET;
3504                 s.sin.sin_port = htons(53);
3505                 slen = sizeof(s.sin);
3506
3507                 if (__connman_inet_get_interface_address(index,
3508                                                 AF_INET,
3509                                                 &s.sin.sin_addr) < 0) {
3510                         close(sk);
3511                         return NULL;
3512                 }
3513         } else {
3514                 close(sk);
3515                 return NULL;
3516         }
3517
3518         if (bind(sk, &s.sa, slen) < 0) {
3519                 connman_error("Failed to bind %s listener socket", proto);
3520                 close(sk);
3521                 return NULL;
3522         }
3523
3524         if (protocol == IPPROTO_TCP) {
3525
3526                 if (listen(sk, 10) < 0) {
3527                         connman_error("Failed to listen on TCP socket %d/%s",
3528                                 -errno, strerror(errno));
3529                         close(sk);
3530                         return NULL;
3531                 }
3532
3533                 fcntl(sk, F_SETFL, O_NONBLOCK);
3534         }
3535
3536         channel = g_io_channel_unix_new(sk);
3537         if (!channel) {
3538                 connman_error("Failed to create %s listener channel", proto);
3539                 close(sk);
3540                 return NULL;
3541         }
3542
3543         g_io_channel_set_close_on_unref(channel, TRUE);
3544
3545         return channel;
3546 }
3547
3548 #define UDP_IPv4_FAILED 0x01
3549 #define TCP_IPv4_FAILED 0x02
3550 #define UDP_IPv6_FAILED 0x04
3551 #define TCP_IPv6_FAILED 0x08
3552 #define UDP_FAILED (UDP_IPv4_FAILED | UDP_IPv6_FAILED)
3553 #define TCP_FAILED (TCP_IPv4_FAILED | TCP_IPv6_FAILED)
3554 #define IPv6_FAILED (UDP_IPv6_FAILED | TCP_IPv6_FAILED)
3555 #define IPv4_FAILED (UDP_IPv4_FAILED | TCP_IPv4_FAILED)
3556
3557 static int create_dns_listener(int protocol, struct listener_data *ifdata)
3558 {
3559         int ret = 0;
3560
3561         if (protocol == IPPROTO_TCP) {
3562                 ifdata->tcp4_listener_channel = get_listener(AF_INET, protocol,
3563                                                         ifdata->index);
3564                 if (ifdata->tcp4_listener_channel)
3565                         ifdata->tcp4_listener_watch =
3566                                 g_io_add_watch(ifdata->tcp4_listener_channel,
3567                                         G_IO_IN, tcp4_listener_event,
3568                                         (gpointer)ifdata);
3569                 else
3570                         ret |= TCP_IPv4_FAILED;
3571
3572                 ifdata->tcp6_listener_channel = get_listener(AF_INET6, protocol,
3573                                                         ifdata->index);
3574                 if (ifdata->tcp6_listener_channel)
3575                         ifdata->tcp6_listener_watch =
3576                                 g_io_add_watch(ifdata->tcp6_listener_channel,
3577                                         G_IO_IN, tcp6_listener_event,
3578                                         (gpointer)ifdata);
3579                 else
3580                         ret |= TCP_IPv6_FAILED;
3581         } else {
3582                 ifdata->udp4_listener_channel = get_listener(AF_INET, protocol,
3583                                                         ifdata->index);
3584                 if (ifdata->udp4_listener_channel)
3585                         ifdata->udp4_listener_watch =
3586                                 g_io_add_watch(ifdata->udp4_listener_channel,
3587                                         G_IO_IN, udp4_listener_event,
3588                                         (gpointer)ifdata);
3589                 else
3590                         ret |= UDP_IPv4_FAILED;
3591
3592                 ifdata->udp6_listener_channel = get_listener(AF_INET6, protocol,
3593                                                         ifdata->index);
3594                 if (ifdata->udp6_listener_channel)
3595                         ifdata->udp6_listener_watch =
3596                                 g_io_add_watch(ifdata->udp6_listener_channel,
3597                                         G_IO_IN, udp6_listener_event,
3598                                         (gpointer)ifdata);
3599                 else
3600                         ret |= UDP_IPv6_FAILED;
3601         }
3602
3603         return ret;
3604 }
3605
3606 static void destroy_udp_listener(struct listener_data *ifdata)
3607 {
3608         DBG("index %d", ifdata->index);
3609
3610         if (ifdata->udp4_listener_watch > 0)
3611                 g_source_remove(ifdata->udp4_listener_watch);
3612
3613         if (ifdata->udp6_listener_watch > 0)
3614                 g_source_remove(ifdata->udp6_listener_watch);
3615
3616         if (ifdata->udp4_listener_channel)
3617                 g_io_channel_unref(ifdata->udp4_listener_channel);
3618         if (ifdata->udp6_listener_channel)
3619                 g_io_channel_unref(ifdata->udp6_listener_channel);
3620 }
3621
3622 static void destroy_tcp_listener(struct listener_data *ifdata)
3623 {
3624         DBG("index %d", ifdata->index);
3625
3626         if (ifdata->tcp4_listener_watch > 0)
3627                 g_source_remove(ifdata->tcp4_listener_watch);
3628         if (ifdata->tcp6_listener_watch > 0)
3629                 g_source_remove(ifdata->tcp6_listener_watch);
3630
3631         if (ifdata->tcp4_listener_channel)
3632                 g_io_channel_unref(ifdata->tcp4_listener_channel);
3633         if (ifdata->tcp6_listener_channel)
3634                 g_io_channel_unref(ifdata->tcp6_listener_channel);
3635 }
3636
3637 static int create_listener(struct listener_data *ifdata)
3638 {
3639         int err, index;
3640
3641         err = create_dns_listener(IPPROTO_UDP, ifdata);
3642         if ((err & UDP_FAILED) == UDP_FAILED)
3643                 return -EIO;
3644
3645         err |= create_dns_listener(IPPROTO_TCP, ifdata);
3646         if ((err & TCP_FAILED) == TCP_FAILED) {
3647                 destroy_udp_listener(ifdata);
3648                 return -EIO;
3649         }
3650
3651         index = connman_inet_ifindex("lo");
3652         if (ifdata->index == index) {
3653                 if ((err & IPv6_FAILED) != IPv6_FAILED)
3654                         __connman_resolvfile_append(index, NULL, "::1");
3655
3656                 if ((err & IPv4_FAILED) != IPv4_FAILED)
3657                         __connman_resolvfile_append(index, NULL, "127.0.0.1");
3658         }
3659
3660         return 0;
3661 }
3662
3663 static void destroy_listener(struct listener_data *ifdata)
3664 {
3665         int index;
3666         GSList *list;
3667
3668         index = connman_inet_ifindex("lo");
3669         if (ifdata->index == index) {
3670                 __connman_resolvfile_remove(index, NULL, "127.0.0.1");
3671                 __connman_resolvfile_remove(index, NULL, "::1");
3672         }
3673
3674         for (list = request_list; list; list = list->next) {
3675                 struct request_data *req = list->data;
3676
3677                 DBG("Dropping request (id 0x%04x -> 0x%04x)",
3678                                                 req->srcid, req->dstid);
3679                 destroy_request_data(req);
3680                 list->data = NULL;
3681         }
3682
3683         g_slist_free(request_list);
3684         request_list = NULL;
3685
3686         destroy_tcp_listener(ifdata);
3687         destroy_udp_listener(ifdata);
3688 }
3689
3690 int __connman_dnsproxy_add_listener(int index)
3691 {
3692         struct listener_data *ifdata;
3693         int err;
3694
3695         DBG("index %d", index);
3696
3697         if (index < 0)
3698                 return -EINVAL;
3699
3700         if (!listener_table)
3701                 return -ENOENT;
3702
3703         if (g_hash_table_lookup(listener_table, GINT_TO_POINTER(index)))
3704                 return 0;
3705
3706         ifdata = g_try_new0(struct listener_data, 1);
3707         if (!ifdata)
3708                 return -ENOMEM;
3709
3710         ifdata->index = index;
3711         ifdata->udp4_listener_channel = NULL;
3712         ifdata->udp4_listener_watch = 0;
3713         ifdata->tcp4_listener_channel = NULL;
3714         ifdata->tcp4_listener_watch = 0;
3715         ifdata->udp6_listener_channel = NULL;
3716         ifdata->udp6_listener_watch = 0;
3717         ifdata->tcp6_listener_channel = NULL;
3718         ifdata->tcp6_listener_watch = 0;
3719
3720         err = create_listener(ifdata);
3721         if (err < 0) {
3722                 connman_error("Couldn't create listener for index %d err %d",
3723                                 index, err);
3724                 g_free(ifdata);
3725                 return err;
3726         }
3727         g_hash_table_insert(listener_table, GINT_TO_POINTER(ifdata->index),
3728                         ifdata);
3729         return 0;
3730 }
3731
3732 void __connman_dnsproxy_remove_listener(int index)
3733 {
3734         struct listener_data *ifdata;
3735
3736         DBG("index %d", index);
3737
3738         if (!listener_table)
3739                 return;
3740
3741         ifdata = g_hash_table_lookup(listener_table, GINT_TO_POINTER(index));
3742         if (!ifdata)
3743                 return;
3744
3745         destroy_listener(ifdata);
3746
3747         g_hash_table_remove(listener_table, GINT_TO_POINTER(index));
3748 }
3749
3750 static void remove_listener(gpointer key, gpointer value, gpointer user_data)
3751 {
3752         int index = GPOINTER_TO_INT(key);
3753         struct listener_data *ifdata = value;
3754
3755         DBG("index %d", index);
3756
3757         destroy_listener(ifdata);
3758 }
3759
3760 static void free_partial_reqs(gpointer value)
3761 {
3762         struct tcp_partial_client_data *data = value;
3763
3764         client_reset(data);
3765         g_free(data);
3766 }
3767
3768 int __connman_dnsproxy_init(void)
3769 {
3770         int err, index;
3771
3772         DBG("");
3773
3774         srandom(time(NULL));
3775
3776         listener_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
3777                                                         NULL, g_free);
3778
3779         partial_tcp_req_table = g_hash_table_new_full(g_direct_hash,
3780                                                         g_direct_equal,
3781                                                         NULL,
3782                                                         free_partial_reqs);
3783
3784         index = connman_inet_ifindex("lo");
3785         err = __connman_dnsproxy_add_listener(index);
3786         if (err < 0)
3787                 return err;
3788
3789         err = connman_notifier_register(&dnsproxy_notifier);
3790         if (err < 0)
3791                 goto destroy;
3792
3793         return 0;
3794
3795 destroy:
3796         __connman_dnsproxy_remove_listener(index);
3797         g_hash_table_destroy(listener_table);
3798         g_hash_table_destroy(partial_tcp_req_table);
3799
3800         return err;
3801 }
3802
3803 void __connman_dnsproxy_cleanup(void)
3804 {
3805         DBG("");
3806
3807         connman_notifier_unregister(&dnsproxy_notifier);
3808
3809         g_hash_table_foreach(listener_table, remove_listener, NULL);
3810
3811         g_hash_table_destroy(listener_table);
3812
3813         g_hash_table_destroy(partial_tcp_req_table);
3814 }