1 /* Support for cookies.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GNU Wget.
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
20 Additional permission under GNU GPL version 3 section 7
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
31 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
32 cookie patch submitted by Tomasz Wegrzanowski.
34 This implements the client-side cookie support, as specified
35 (loosely) by Netscape's "preliminary specification", currently
38 http://wp.netscape.com/newsref/std/cookie_spec.html
40 rfc2109 is not supported because of its incompatibilities with the
41 above widely-used specification. rfc2965 is entirely ignored,
42 since popular client software doesn't implement it, and even the
43 sites that do send Set-Cookie2 also emit Set-Cookie for
57 #include "http.h" /* for http_atotm */
59 /* Declarations of `struct cookie' and the most basic functions. */
61 /* Cookie jar serves as cookie storage and a means of retrieving
62 cookies efficiently. All cookies with the same domain are stored
63 in a linked list called "chain". A cookie chain can be reached by
64 looking up the domain in the cookie jar's chains_by_domain table.
66 For example, to reach all the cookies under google.com, one must
67 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
68 course, when sending a cookie to `www.google.com', one must search
69 for cookies that belong to either `www.google.com' or `google.com'
70 -- but the point is that the code doesn't need to go through *all*
74 /* Cookie chains indexed by domain. */
75 struct hash_table *chains;
77 int cookie_count; /* number of cookies in the jar. */
80 /* Value set by entry point functions, so that the low-level
81 routines don't need to call time() all the time. */
82 static time_t cookies_now;
87 struct cookie_jar *jar = xnew (struct cookie_jar);
88 jar->chains = make_nocase_string_hash_table (0);
89 jar->cookie_count = 0;
94 char *domain; /* domain of the cookie */
95 int port; /* port number */
96 char *path; /* path prefix of the cookie */
98 unsigned discard_requested :1; /* whether cookie was created to
99 request discarding another
102 unsigned secure :1; /* whether cookie should be
103 transmitted over non-https
105 unsigned domain_exact :1; /* whether DOMAIN must match as a
108 unsigned permanent :1; /* whether the cookie should outlive
110 time_t expiry_time; /* time when the cookie expires, 0
111 means undetermined. */
113 char *attr; /* cookie attribute name */
114 char *value; /* cookie attribute value */
116 struct cookie *next; /* used for chaining of cookies in the
120 #define PORT_ANY (-1)
122 /* Allocate and return a new, empty cookie structure. */
124 static struct cookie *
127 struct cookie *cookie = xnew0 (struct cookie);
129 /* Both cookie->permanent and cookie->expiry_time are now 0. This
130 means that the cookie doesn't expire, but is only valid for this
131 session (i.e. not written out to disk). */
133 cookie->port = PORT_ANY;
137 /* Non-zero if the cookie has expired. Assumes cookies_now has been
138 set by one of the entry point functions. */
141 cookie_expired_p (const struct cookie *c)
143 return c->expiry_time != 0 && c->expiry_time < cookies_now;
146 /* Deallocate COOKIE and its components. */
149 delete_cookie (struct cookie *cookie)
151 xfree_null (cookie->domain);
152 xfree_null (cookie->path);
153 xfree_null (cookie->attr);
154 xfree_null (cookie->value);
158 /* Functions for storing cookies.
160 All cookies can be reached beginning with jar->chains. The key in
161 that table is the domain name, and the value is a linked list of
162 all cookies from that domain. Every new cookie is placed on the
165 /* Find and return a cookie in JAR whose domain, path, and attribute
166 name correspond to COOKIE. If found, PREVPTR will point to the
167 location of the cookie previous in chain, or NULL if the found
168 cookie is the head of a chain.
170 If no matching cookie is found, return NULL. */
172 static struct cookie *
173 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
174 struct cookie **prevptr)
176 struct cookie *chain, *prev;
178 chain = hash_table_get (jar->chains, cookie->domain);
183 for (; chain; prev = chain, chain = chain->next)
184 if (0 == strcmp (cookie->path, chain->path)
185 && 0 == strcmp (cookie->attr, chain->attr)
186 && cookie->port == chain->port)
197 /* Store COOKIE to the jar.
199 This is done by placing COOKIE at the head of its chain. However,
200 if COOKIE matches a cookie already in memory, as determined by
201 find_matching_cookie, the old cookie is unlinked and destroyed.
203 The key of each chain's hash table entry is allocated only the
204 first time; next hash_table_put's reuse the same key. */
207 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
209 struct cookie *chain_head;
212 if (hash_table_get_pair (jar->chains, cookie->domain,
213 &chain_key, &chain_head))
215 /* A chain of cookies in this domain already exists. Check for
216 duplicates -- if an extant cookie exactly matches our domain,
217 port, path, and name, replace it. */
219 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
223 /* Remove VICTIM from the chain. COOKIE will be placed at
227 prev->next = victim->next;
228 cookie->next = chain_head;
232 /* prev is NULL; apparently VICTIM was at the head of
233 the chain. This place will be taken by COOKIE, so
234 all we need to do is: */
235 cookie->next = victim->next;
237 delete_cookie (victim);
239 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
242 cookie->next = chain_head;
246 /* We are now creating the chain. Use a copy of cookie->domain
247 as the key for the life-time of the chain. Using
248 cookie->domain would be unsafe because the life-time of the
249 chain may exceed the life-time of the cookie. (Cookies may
250 be deleted from the chain by this very function.) */
252 chain_key = xstrdup (cookie->domain);
255 hash_table_put (jar->chains, chain_key, cookie);
260 time_t exptime = cookie->expiry_time;
261 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
262 cookie->domain, cookie->port,
263 cookie->port == PORT_ANY ? " (ANY)" : "",
265 cookie->permanent ? "permanent" : "session",
266 cookie->secure ? "secure" : "insecure",
267 cookie->expiry_time ? datetime_str (exptime) : "none",
268 cookie->attr, cookie->value));
272 /* Discard a cookie matching COOKIE's domain, port, path, and
273 attribute name. This gets called when we encounter a cookie whose
274 expiry date is in the past, or whose max-age is set to 0. The
275 former corresponds to netscape cookie spec, while the latter is
276 specified by rfc2109. */
279 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
281 struct cookie *prev, *victim;
283 if (!hash_table_count (jar->chains))
284 /* No elements == nothing to discard. */
287 victim = find_matching_cookie (jar, cookie, &prev);
291 /* Simply unchain the victim. */
292 prev->next = victim->next;
295 /* VICTIM was head of its chain. We need to place a new
296 cookie at the head. */
297 char *chain_key = NULL;
300 res = hash_table_get_pair (jar->chains, victim->domain,
305 /* VICTIM was the only cookie in the chain. Destroy the
306 chain and deallocate the chain key. */
307 hash_table_remove (jar->chains, victim->domain);
311 hash_table_put (jar->chains, chain_key, victim->next);
313 delete_cookie (victim);
314 DEBUGP (("Discarded old cookie.\n"));
318 /* Functions for parsing the `Set-Cookie' header, and creating new
319 cookies from the wire. */
321 #define TOKEN_IS(token, string_literal) \
322 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
324 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
326 /* Parse the contents of the `Set-Cookie' header. The header looks
329 name1=value1; name2=value2; ...
331 Trailing semicolon is optional; spaces are allowed between all
332 tokens. Additionally, values may be quoted.
334 A new cookie is returned upon success, NULL otherwise.
336 The first name-value pair will be used to set the cookie's
337 attribute name and value. Subsequent parameters will be checked
338 against field names such as `domain', `path', etc. Recognized
339 fields will be parsed and the corresponding members of COOKIE
342 static struct cookie *
343 parse_set_cookie (const char *set_cookie, bool silent)
345 const char *ptr = set_cookie;
346 struct cookie *cookie = cookie_new ();
347 param_token name, value;
349 if (!extract_param (&ptr, &name, &value, ';'))
353 cookie->attr = strdupdelim (name.b, name.e);
354 cookie->value = strdupdelim (value.b, value.e);
356 while (extract_param (&ptr, &name, &value, ';'))
358 if (TOKEN_IS (name, "domain"))
360 if (!TOKEN_NON_EMPTY (value))
362 xfree_null (cookie->domain);
363 /* Strictly speaking, we should set cookie->domain_exact if the
364 domain doesn't begin with a dot. But many sites set the
365 domain to "foo.com" and expect "subhost.foo.com" to get the
366 cookie, and it apparently works in browsers. */
369 cookie->domain = strdupdelim (value.b, value.e);
371 else if (TOKEN_IS (name, "path"))
373 if (!TOKEN_NON_EMPTY (value))
375 xfree_null (cookie->path);
376 cookie->path = strdupdelim (value.b, value.e);
378 else if (TOKEN_IS (name, "expires"))
383 if (!TOKEN_NON_EMPTY (value))
385 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
387 expires = http_atotm (value_copy);
388 if (expires != (time_t) -1)
390 cookie->permanent = 1;
391 cookie->expiry_time = expires;
392 /* According to netscape's specification, expiry time in
393 the past means that discarding of a matching cookie
395 if (cookie->expiry_time < cookies_now)
396 cookie->discard_requested = 1;
399 /* Error in expiration spec. Assume default (cookie doesn't
400 expire, but valid only for this session.) */
403 else if (TOKEN_IS (name, "max-age"))
408 if (!TOKEN_NON_EMPTY (value))
410 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
412 sscanf (value_copy, "%lf", &maxage);
414 /* something went wrong. */
416 cookie->permanent = 1;
417 cookie->expiry_time = cookies_now + maxage;
419 /* According to rfc2109, a cookie with max-age of 0 means that
420 discarding of a matching cookie is requested. */
422 cookie->discard_requested = 1;
424 else if (TOKEN_IS (name, "secure"))
426 /* ignore value completely */
430 /* Ignore unrecognized attribute. */
434 /* extract_param has encountered a syntax error */
437 /* The cookie has been successfully constructed; return it. */
442 logprintf (LOG_NOTQUIET,
443 _("Syntax error in Set-Cookie: %s at position %d.\n"),
444 quotearg_style (escape_quoting_style, set_cookie),
445 (int) (ptr - set_cookie));
446 delete_cookie (cookie);
451 #undef TOKEN_NON_EMPTY
453 /* Sanity checks. These are important, otherwise it is possible for
454 mailcious attackers to destroy important cookie information and/or
455 violate your privacy. */
458 #define REQUIRE_DIGITS(p) do { \
459 if (!c_isdigit (*p)) \
461 for (++p; c_isdigit (*p); p++) \
465 #define REQUIRE_DOT(p) do { \
470 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
472 We don't want to call network functions like inet_addr() because
473 all we need is a check, preferrably one that is small, fast, and
477 numeric_address_p (const char *addr)
479 const char *p = addr;
481 REQUIRE_DIGITS (p); /* A */
482 REQUIRE_DOT (p); /* . */
483 REQUIRE_DIGITS (p); /* B */
484 REQUIRE_DOT (p); /* . */
485 REQUIRE_DIGITS (p); /* C */
486 REQUIRE_DOT (p); /* . */
487 REQUIRE_DIGITS (p); /* D */
494 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
495 Originally I tried to make the check compliant with rfc2109, but
496 the sites deviated too often, so I had to fall back to "tail
497 matching", as defined by the original Netscape's cookie spec. */
500 check_domain_match (const char *cookie_domain, const char *host)
504 /* Numeric address requires exact match. It also requires HOST to
506 if (numeric_address_p (cookie_domain))
507 return 0 == strcmp (cookie_domain, host);
511 /* For the sake of efficiency, check for exact match first. */
512 if (0 == strcasecmp (cookie_domain, host))
517 /* HOST must match the tail of cookie_domain. */
518 if (!match_tail (host, cookie_domain, true))
521 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
522 make sure that somebody is not trying to set the cookie for a
523 subdomain shared by many entities. For example, "company.co.uk"
524 must not be allowed to set a cookie for ".co.uk". On the other
525 hand, "sso.redhat.de" should be able to set a cookie for
528 The only marginally sane way to handle this I can think of is to
529 reject on the basis of the length of the second-level domain name
530 (but when the top-level domain is unknown), with the assumption
531 that those of three or less characters could be reserved. For
534 .co.org -> works because the TLD is known
535 .co.uk -> doesn't work because "co" is only two chars long
536 .com.au -> doesn't work because "com" is only 3 chars long
537 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
538 .cnn.de -> doesn't work for the same reason (ugh!!)
539 .abcd.de -> works because "abcd" is 4 chars long
540 .img.cnn.de -> works because it's not trying to set the 2nd level domain
541 .cnn.co.uk -> works for the same reason
543 That should prevent misuse, while allowing reasonable usage. If
544 someone knows of a better way to handle this, please let me
547 const char *p = cookie_domain;
548 int dccount = 1; /* number of domain components */
549 int ldcl = 0; /* last domain component length */
550 int nldcl = 0; /* next to last domain component length */
553 /* Ignore leading period in this calculation. */
556 for (out = 0; !out; p++)
564 /* Empty domain component found -- the domain is invalid. */
566 if (*(p + 1) == '\0')
568 /* Tolerate trailing '.' by not treating the domain as
569 one ending with an empty domain component. */
591 int known_toplevel = false;
592 static const char *known_toplevel_domains[] = {
593 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
595 for (i = 0; i < countof (known_toplevel_domains); i++)
596 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
598 known_toplevel = true;
601 if (!known_toplevel && nldcl <= 3)
608 /* Don't allow the host "foobar.com" to set a cookie for domain
610 if (*cookie_domain != '.')
612 int dlen = strlen (cookie_domain);
613 int hlen = strlen (host);
614 /* cookie host: hostname.foobar.com */
615 /* desired domain: bar.com */
616 /* '.' must be here in host-> ^ */
617 if (hlen > dlen && host[hlen - dlen - 1] != '.')
626 static int path_matches (const char *, const char *);
628 /* Check whether PATH begins with COOKIE_PATH. */
631 check_path_match (const char *cookie_path, const char *path)
633 return path_matches (path, cookie_path) != 0;
636 /* Prepend '/' to string S. S is copied to fresh stack-allocated
637 space and its value is modified to point to the new location. */
639 #define PREPEND_SLASH(s) do { \
640 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
642 strcpy (PS_newstr + 1, s); \
647 /* Process the HTTP `Set-Cookie' header. This results in storing the
648 cookie or discarding a matching one, or ignoring it completely, all
649 depending on the contents. */
652 cookie_handle_set_cookie (struct cookie_jar *jar,
653 const char *host, int port,
654 const char *path, const char *set_cookie)
656 struct cookie *cookie;
657 cookies_now = time (NULL);
659 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
660 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
661 simply prepend slash to PATH. */
662 PREPEND_SLASH (path);
664 cookie = parse_set_cookie (set_cookie, false);
668 /* Sanitize parts of cookie. */
673 /* If the domain was not provided, we use the one we're talking
674 to, and set exact match. */
675 cookie->domain = xstrdup (host);
676 cookie->domain_exact = 1;
677 /* Set the port, but only if it's non-default. */
678 if (port != 80 && port != 443)
683 if (!check_domain_match (cookie->domain, host))
685 logprintf (LOG_NOTQUIET,
686 _("Cookie coming from %s attempted to set domain to %s\n"),
687 quotearg_style (escape_quoting_style, host),
688 quotearg_style (escape_quoting_style, cookie->domain));
689 xfree (cookie->domain);
696 /* The cookie doesn't set path: set it to the URL path, sans the
697 file part ("/dir/file" truncated to "/dir/"). */
698 char *trailing_slash = strrchr (path, '/');
700 cookie->path = strdupdelim (path, trailing_slash + 1);
702 /* no slash in the string -- can this even happen? */
703 cookie->path = xstrdup (path);
707 /* The cookie sets its own path; verify that it is legal. */
708 if (!check_path_match (cookie->path, path))
710 DEBUGP (("Attempt to fake the path: %s, %s\n",
711 cookie->path, path));
716 /* Now store the cookie, or discard an existing cookie, if
717 discarding was requested. */
719 if (cookie->discard_requested)
721 discard_matching_cookie (jar, cookie);
725 store_cookie (jar, cookie);
730 delete_cookie (cookie);
733 /* Support for sending out cookies in HTTP requests, based on
734 previously stored cookies. Entry point is
735 `build_cookies_request'. */
737 /* Return a count of how many times CHR occurs in STRING. */
740 count_char (const char *string, char chr)
744 for (p = string; *p; p++)
750 /* Find the cookie chains whose domains match HOST and store them to
753 A cookie chain is the head of a list of cookies that belong to a
754 host/domain. Given HOST "img.search.xemacs.org", this function
755 will return the chains for "img.search.xemacs.org",
756 "search.xemacs.org", and "xemacs.org" -- those of them that exist
759 DEST should be large enough to accept (in the worst case) as many
760 elements as there are domain components of HOST. */
763 find_chains_of_host (struct cookie_jar *jar, const char *host,
764 struct cookie *dest[])
769 /* Bail out quickly if there are no cookies in the jar. */
770 if (!hash_table_count (jar->chains))
773 if (numeric_address_p (host))
774 /* If host is an IP address, only check for the exact match. */
777 /* Otherwise, check all the subdomains except the top-level (last)
778 one. As a domain with N components has N-1 dots, the number of
779 passes equals the number of dots. */
780 passes = count_char (host, '.');
784 /* Find chains that match HOST, starting with exact match and
785 progressing to less specific domains. For instance, given HOST
786 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
787 srk.fer.hr's, then fer.hr's. */
790 struct cookie *chain = hash_table_get (jar->chains, host);
792 dest[dest_count++] = chain;
793 if (++passcnt >= passes)
795 host = strchr (host, '.') + 1;
801 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
805 path_matches (const char *full_path, const char *prefix)
807 int len = strlen (prefix);
809 if (0 != strncmp (full_path, prefix, len))
810 /* FULL_PATH doesn't begin with PREFIX. */
813 /* Length of PREFIX determines the quality of the match. */
817 /* Return true iff COOKIE matches the provided parameters of the URL
818 being downloaded: HOST, PORT, PATH, and SECFLAG.
820 If PATH_GOODNESS is non-NULL, store the "path goodness" value
821 there. That value is a measure of how closely COOKIE matches PATH,
822 used for ordering cookies. */
825 cookie_matches_url (const struct cookie *cookie,
826 const char *host, int port, const char *path,
827 bool secflag, int *path_goodness)
831 if (cookie_expired_p (cookie))
832 /* Ignore stale cookies. Don't bother unchaining the cookie at
833 this point -- Wget is a relatively short-lived application, and
834 stale cookies will not be saved by `save_cookies'. On the
835 other hand, this function should be as efficient as
839 if (cookie->secure && !secflag)
840 /* Don't transmit secure cookies over insecure connections. */
842 if (cookie->port != PORT_ANY && cookie->port != port)
845 /* If exact domain match is required, verify that cookie's domain is
846 equal to HOST. If not, assume success on the grounds of the
847 cookie's chain having been found by find_chains_of_host. */
848 if (cookie->domain_exact
849 && 0 != strcasecmp (host, cookie->domain))
852 pg = path_matches (path, cookie->path);
857 /* If the caller requested path_goodness, we return it. This is
858 an optimization, so that the caller doesn't need to call
859 path_matches() again. */
864 /* A structure that points to a cookie, along with the additional
865 information about the cookie's "goodness". This allows us to sort
866 the cookies when returning them to the server, as required by the
869 struct weighed_cookie {
870 struct cookie *cookie;
875 /* Comparator used for uniquifying the list. */
878 equality_comparator (const void *p1, const void *p2)
880 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
881 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
883 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
884 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
886 /* We only really care whether both name and value are equal. We
887 return them in this order only for consistency... */
888 return namecmp ? namecmp : valuecmp;
891 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
892 cookies with the same attr name and value. Whenever a duplicate
893 pair is found, one of the cookies is removed. */
896 eliminate_dups (struct weighed_cookie *outgoing, int count)
898 struct weighed_cookie *h; /* hare */
899 struct weighed_cookie *t; /* tortoise */
900 struct weighed_cookie *end = outgoing + count;
902 /* We deploy a simple uniquify algorithm: first sort the array
903 according to our sort criteria, then copy it to itself, comparing
904 each cookie to its neighbor and ignoring the duplicates. */
906 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
908 /* "Hare" runs through all the entries in the array, followed by
909 "tortoise". If a duplicate is found, the hare skips it.
910 Non-duplicate entries are copied to the tortoise ptr. */
912 for (h = t = outgoing; h < end; h++)
916 struct cookie *c0 = h[0].cookie;
917 struct cookie *c1 = h[1].cookie;
918 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
919 continue; /* ignore the duplicate */
922 /* If the hare has advanced past the tortoise (because of
923 previous dups), make sure the values get copied. Otherwise,
924 no copying is necessary. */
933 /* Comparator used for sorting by quality. */
936 goodness_comparator (const void *p1, const void *p2)
938 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
939 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
941 /* Subtractions take `wc2' as the first argument becauase we want a
942 sort in *decreasing* order of goodness. */
943 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
944 int pgdiff = wc2->path_goodness - wc1->path_goodness;
946 /* Sort by domain goodness; if these are the same, sort by path
947 goodness. (The sorting order isn't really specified; maybe it
948 should be the other way around.) */
949 return dgdiff ? dgdiff : pgdiff;
952 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
953 requests PATH from the server. The resulting string is allocated
954 with `malloc', and the caller is responsible for freeing it. If no
955 cookies pertain to this request, i.e. no cookie header should be
956 generated, NULL is returned. */
959 cookie_header (struct cookie_jar *jar, const char *host,
960 int port, const char *path, bool secflag)
962 struct cookie **chains;
965 struct cookie *cookie;
966 struct weighed_cookie *outgoing;
969 int result_size, pos;
970 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
972 /* First, find the cookie chains whose domains match HOST. */
974 /* Allocate room for find_chains_of_host to write to. The number of
975 chains can at most equal the number of subdomains, hence
976 1+<number of dots>. */
977 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
978 chain_count = find_chains_of_host (jar, host, chains);
980 /* No cookies for this host. */
984 cookies_now = time (NULL);
986 /* Now extract from the chains those cookies that match our host
987 (for domain_exact cookies), port (for cookies with port other
988 than PORT_ANY), etc. See matching_cookie for details. */
990 /* Count the number of matching cookies. */
992 for (i = 0; i < chain_count; i++)
993 for (cookie = chains[i]; cookie; cookie = cookie->next)
994 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
997 return NULL; /* no cookies matched */
999 /* Allocate the array. */
1000 outgoing = alloca_array (struct weighed_cookie, count);
1002 /* Fill the array with all the matching cookies from the chains that
1005 for (i = 0; i < chain_count; i++)
1006 for (cookie = chains[i]; cookie; cookie = cookie->next)
1009 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1011 outgoing[ocnt].cookie = cookie;
1012 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1013 outgoing[ocnt].path_goodness = pg;
1016 assert (ocnt == count);
1018 /* Eliminate duplicate cookies; that is, those whose name and value
1020 count = eliminate_dups (outgoing, count);
1022 /* Sort the array so that best-matching domains come first, and
1023 that, within one domain, best-matching paths come first. */
1024 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1026 /* Count the space the name=value pairs will take. */
1028 for (i = 0; i < count; i++)
1030 struct cookie *c = outgoing[i].cookie;
1032 result_size += strlen (c->attr) + 1 + strlen (c->value);
1035 /* Allocate output buffer:
1036 name=value pairs -- result_size
1037 "; " separators -- (count - 1) * 2
1038 \0 terminator -- 1 */
1039 result_size = result_size + (count - 1) * 2 + 1;
1040 result = xmalloc (result_size);
1042 for (i = 0; i < count; i++)
1044 struct cookie *c = outgoing[i].cookie;
1045 int namlen = strlen (c->attr);
1046 int vallen = strlen (c->value);
1048 memcpy (result + pos, c->attr, namlen);
1050 result[pos++] = '=';
1051 memcpy (result + pos, c->value, vallen);
1055 result[pos++] = ';';
1056 result[pos++] = ' ';
1059 result[pos++] = '\0';
1060 assert (pos == result_size);
1064 /* Support for loading and saving cookies. The format used for
1065 loading and saving should be the format of the `cookies.txt' file
1066 used by Netscape and Mozilla, at least the Unix versions.
1067 (Apparently IE can export cookies in that format as well.) The
1068 format goes like this:
1070 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1072 DOMAIN -- cookie domain, optionally followed by :PORT
1073 DOMAIN-FLAG -- whether all hosts in the domain match
1075 SECURE-FLAG -- whether cookie requires secure connection
1076 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1077 ATTR-NAME -- name of the cookie attribute
1078 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1080 The fields are separated by TABs. All fields are mandatory, except
1081 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1082 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1083 whitespace only, and comment lines (beginning with # optionally
1084 preceded by whitespace) are ignored.
1086 Example line from cookies.txt (split in two lines for readability):
1088 .google.com TRUE / FALSE 2147368447 \
1089 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1093 /* If the region [B, E) ends with :<digits>, parse the number, return
1094 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1095 If port is not specified, return 0. */
1098 domain_port (const char *domain_b, const char *domain_e,
1099 const char **domain_e_ptr)
1103 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1106 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1107 port = 10 * port + (*p - '0');
1109 /* Garbage following port number. */
1111 *domain_e_ptr = colon;
1115 #define GET_WORD(p, b, e) do { \
1117 while (*p && *p != '\t') \
1120 if (b == e || !*p) \
1125 /* Load cookies from FILE. */
1128 cookie_jar_load (struct cookie_jar *jar, const char *file)
1131 FILE *fp = fopen (file, "r");
1134 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1135 quote (file), strerror (errno));
1138 cookies_now = time (NULL);
1140 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1142 struct cookie *cookie;
1148 char *domain_b = NULL, *domain_e = NULL;
1149 char *domflag_b = NULL, *domflag_e = NULL;
1150 char *path_b = NULL, *path_e = NULL;
1151 char *secure_b = NULL, *secure_e = NULL;
1152 char *expires_b = NULL, *expires_e = NULL;
1153 char *name_b = NULL, *name_e = NULL;
1154 char *value_b = NULL, *value_e = NULL;
1156 /* Skip leading white-space. */
1157 while (*p && c_isspace (*p))
1159 /* Ignore empty lines. */
1160 if (!*p || *p == '#')
1163 GET_WORD (p, domain_b, domain_e);
1164 GET_WORD (p, domflag_b, domflag_e);
1165 GET_WORD (p, path_b, path_e);
1166 GET_WORD (p, secure_b, secure_e);
1167 GET_WORD (p, expires_b, expires_e);
1168 GET_WORD (p, name_b, name_e);
1170 /* Don't use GET_WORD for value because it ends with newline,
1173 value_e = p + strlen (p);
1174 if (value_e > value_b && value_e[-1] == '\n')
1176 if (value_e > value_b && value_e[-1] == '\r')
1178 /* Empty values are legal (I think), so don't bother checking. */
1180 cookie = cookie_new ();
1182 cookie->attr = strdupdelim (name_b, name_e);
1183 cookie->value = strdupdelim (value_b, value_e);
1184 cookie->path = strdupdelim (path_b, path_e);
1185 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1187 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1188 value indicating if all machines within a given domain can
1189 access the variable. This value is set automatically by the
1190 browser, depending on the value set for the domain." */
1191 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1193 /* DOMAIN needs special treatment because we might need to
1194 extract the port. */
1195 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1197 cookie->port = port;
1199 if (*domain_b == '.')
1200 ++domain_b; /* remove leading dot internally */
1201 cookie->domain = strdupdelim (domain_b, domain_e);
1203 /* safe default in case EXPIRES field is garbled. */
1204 expiry = (double)cookies_now - 1;
1206 /* I don't like changing the line, but it's safe here. (line is
1209 sscanf (expires_b, "%lf", &expiry);
1213 /* EXPIRY can be 0 for session cookies saved because the
1214 user specified `--keep-session-cookies' in the past.
1215 They remain session cookies, and will be saved only if
1216 the user has specified `keep-session-cookies' again. */
1220 if (expiry < cookies_now)
1221 goto abort_cookie; /* ignore stale cookie. */
1222 cookie->expiry_time = expiry;
1223 cookie->permanent = 1;
1226 store_cookie (jar, cookie);
1232 delete_cookie (cookie);
1237 /* Save cookies, in format described above, to FILE. */
1240 cookie_jar_save (struct cookie_jar *jar, const char *file)
1243 hash_table_iterator iter;
1245 DEBUGP (("Saving cookies to %s.\n", file));
1247 cookies_now = time (NULL);
1249 fp = fopen (file, "w");
1252 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1253 quote (file), strerror (errno));
1257 fputs ("# HTTP cookie file.\n", fp);
1258 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1259 fputs ("# Edit at your own risk.\n\n", fp);
1261 for (hash_table_iterate (jar->chains, &iter);
1262 hash_table_iter_next (&iter);
1265 const char *domain = iter.key;
1266 struct cookie *cookie = iter.value;
1267 for (; cookie; cookie = cookie->next)
1269 if (!cookie->permanent && !opt.keep_session_cookies)
1271 if (cookie_expired_p (cookie))
1273 if (!cookie->domain_exact)
1276 if (cookie->port != PORT_ANY)
1277 fprintf (fp, ":%d", cookie->port);
1278 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1279 cookie->domain_exact ? "FALSE" : "TRUE",
1280 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1281 (double)cookie->expiry_time,
1282 cookie->attr, cookie->value);
1289 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1290 quote (file), strerror (errno));
1291 if (fclose (fp) < 0)
1292 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1293 quote (file), strerror (errno));
1295 DEBUGP (("Done saving cookies.\n"));
1298 /* Clean up cookie-related data. */
1301 cookie_jar_delete (struct cookie_jar *jar)
1303 /* Iterate over chains (indexed by domain) and free them. */
1304 hash_table_iterator iter;
1305 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1307 struct cookie *chain = iter.value;
1309 /* Then all cookies in this chain. */
1312 struct cookie *next = chain->next;
1313 delete_cookie (chain);
1317 hash_table_destroy (jar->chains);
1321 /* Test cases. Currently this is only tests parse_set_cookies. To
1322 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1329 /* Tests expected to succeed: */
1332 const char *results[10];
1334 { "arg=value", {"arg", "value", NULL} },
1335 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1336 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1337 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1338 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1339 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1340 { "arg=", {"arg", "", NULL} },
1341 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1342 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1345 /* Tests expected to fail: */
1346 static char *tests_fail[] = {
1348 "arg=\"unterminated",
1350 "arg1=;=another-empty-name",
1354 for (i = 0; i < countof (tests_succ); i++)
1357 const char *data = tests_succ[i].data;
1358 const char **expected = tests_succ[i].results;
1361 c = parse_set_cookie (data, true);
1364 printf ("NULL cookie returned for valid data: %s\n", data);
1368 /* Test whether extract_param handles these cases correctly. */
1370 param_token name, value;
1371 const char *ptr = data;
1373 while (extract_param (&ptr, &name, &value, ';'))
1375 char *n = strdupdelim (name.b, name.e);
1376 char *v = strdupdelim (value.b, value.e);
1379 printf ("Too many parameters for '%s'\n", data);
1382 if (0 != strcmp (expected[j], n))
1383 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1384 j / 2 + 1, data, expected[j], n);
1385 if (0 != strcmp (expected[j + 1], v))
1386 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1387 j / 2 + 1, data, expected[j + 1], v);
1393 printf ("Too few parameters for '%s'\n", data);
1397 for (i = 0; i < countof (tests_fail); i++)
1400 char *data = tests_fail[i];
1401 c = parse_set_cookie (data, true);
1403 printf ("Failed to report error on invalid data: %s\n", data);
1406 #endif /* TEST_COOKIES */